Added CPU and GPU implementations of a modified version of FTRL, triggered using an attribute, that multiplies the "linear" accumulator by the learning rate. This enables a learning rate of zero to work correctly.

PiperOrigin-RevId: 305311806
Change-Id: Ie2602f4f0208e440fb30cb4968010da7954ffc67
This commit is contained in:
A. Unique TensorFlower 2020-04-07 12:07:52 -07:00 committed by TensorFlower Gardener
parent 287707cc35
commit 2617230e61
8 changed files with 464 additions and 50 deletions

View File

@ -747,8 +747,8 @@ REGISTER_XLA_OP(Name("ResourceApplyCenteredRMSProp")
.TypeConstraint("T", kFloatAndComplexTypes), .TypeConstraint("T", kFloatAndComplexTypes),
ResourceApplyCenteredRMSProp); ResourceApplyCenteredRMSProp);
void CompileFtrl(XlaOpKernelContext* ctx, DataType dtype, void CompileFtrl(XlaOpKernelContext* ctx, DataType dtype, bool has_l2_shrinkage,
bool has_l2_shrinkage) { bool multiply_linear_by_lr) {
xla::XlaBuilder* b = ctx->builder(); xla::XlaBuilder* b = ctx->builder();
TensorShape var_shape, accum_shape, linear_shape; TensorShape var_shape, accum_shape, linear_shape;
@ -840,9 +840,19 @@ void CompileFtrl(XlaOpKernelContext* ctx, DataType dtype,
xla::XlaOp new_accum = accum + xla::Square(grad); xla::XlaOp new_accum = accum + xla::Square(grad);
xla::XlaOp new_accum_lr_pow = xla::Pow(new_accum, -lr_power); xla::XlaOp new_accum_lr_pow = xla::Pow(new_accum, -lr_power);
xla::XlaOp accum_lr_pow = xla::Pow(accum, -lr_power); xla::XlaOp accum_lr_pow = xla::Pow(accum, -lr_power);
linear = linear + grad_to_use - (new_accum_lr_pow - accum_lr_pow) / lr * var; if (multiply_linear_by_lr) {
xla::XlaOp linear_clipped = xla::Clamp(-l1, linear, l1); linear =
xla::XlaOp quadratic = new_accum_lr_pow / lr + two * l2; linear + grad_to_use * lr - (new_accum_lr_pow - accum_lr_pow) * var;
} else {
linear =
linear + grad_to_use - (new_accum_lr_pow - accum_lr_pow) / lr * var;
}
xla::XlaOp linear_clipped =
(multiply_linear_by_lr ? xla::Clamp(-l1 * lr, linear, l1 * lr)
: xla::Clamp(-l1, linear, l1));
xla::XlaOp quadratic =
(multiply_linear_by_lr ? new_accum_lr_pow + two * l2 * lr
: new_accum_lr_pow / lr + two * l2);
var = (linear_clipped - linear) / quadratic; var = (linear_clipped - linear) / quadratic;
accum = new_accum; accum = new_accum;
@ -855,14 +865,20 @@ class ResourceApplyFtrl : public XlaOpKernel {
public: public:
explicit ResourceApplyFtrl(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { explicit ResourceApplyFtrl(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
OP_REQUIRES_OK(ctx, ctx->GetAttr("T", &dtype_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("T", &dtype_));
OP_REQUIRES_OK(
ctx, ctx->GetAttr("multiply_linear_by_lr", &multiply_linear_by_lr_));
} }
void Compile(XlaOpKernelContext* ctx) override { void Compile(XlaOpKernelContext* ctx) override {
CompileFtrl(ctx, dtype_, /*has_l2_shrinkage=*/false); CompileFtrl(ctx, dtype_, /*has_l2_shrinkage=*/false,
/*multiply_linear_by_lr=*/multiply_linear_by_lr_);
} }
private: private:
DataType dtype_; DataType dtype_;
// Whether to keep the "linear" slot variable multiplied by the learning rate.
bool multiply_linear_by_lr_;
}; };
REGISTER_XLA_OP(Name("ResourceApplyFtrl").TypeConstraint("T", kFloatTypes), REGISTER_XLA_OP(Name("ResourceApplyFtrl").TypeConstraint("T", kFloatTypes),
ResourceApplyFtrl); ResourceApplyFtrl);
@ -871,14 +887,20 @@ class ResourceApplyFtrlV2 : public XlaOpKernel {
public: public:
explicit ResourceApplyFtrlV2(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { explicit ResourceApplyFtrlV2(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
OP_REQUIRES_OK(ctx, ctx->GetAttr("T", &dtype_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("T", &dtype_));
OP_REQUIRES_OK(
ctx, ctx->GetAttr("multiply_linear_by_lr", &multiply_linear_by_lr_));
} }
void Compile(XlaOpKernelContext* ctx) override { void Compile(XlaOpKernelContext* ctx) override {
CompileFtrl(ctx, dtype_, /*has_l2_shrinkage=*/true); CompileFtrl(ctx, dtype_, /*has_l2_shrinkage=*/true,
/*multiply_linear_by_lr=*/multiply_linear_by_lr_);
} }
private: private:
DataType dtype_; DataType dtype_;
// Whether to keep the "linear" slot variable multiplied by the learning rate.
bool multiply_linear_by_lr_;
}; };
REGISTER_XLA_OP(Name("ResourceApplyFtrlV2").TypeConstraint("T", kFloatTypes), REGISTER_XLA_OP(Name("ResourceApplyFtrlV2").TypeConstraint("T", kFloatTypes),
ResourceApplyFtrlV2); ResourceApplyFtrlV2);

View File

@ -248,6 +248,47 @@ struct ApplyFtrlV2<CPUDevice, T> {
} }
}; };
template <typename T>
struct ApplyFtrlV2MultiplyLinearByLr<CPUDevice, T> {
void operator()(const CPUDevice& d, typename TTypes<T>::Flat var,
typename TTypes<T>::Flat accum,
typename TTypes<T>::Flat linear,
typename TTypes<T>::ConstFlat grad,
typename TTypes<T>::ConstScalar lr,
typename TTypes<T>::ConstScalar l1,
typename TTypes<T>::ConstScalar l2,
typename TTypes<T>::ConstScalar l2_shrinkage,
typename TTypes<T>::ConstScalar lr_power) {
auto grad_with_shrinkage = grad + static_cast<T>(2) * l2_shrinkage() * var;
auto new_accum = accum + grad * grad;
// special case for which lr_power=-0.5.
if (lr_power() == static_cast<T>(-0.5)) {
linear.device(d) +=
grad_with_shrinkage * lr() - (new_accum.sqrt() - accum.sqrt()) * var;
} else {
linear.device(d) +=
grad_with_shrinkage * lr() -
(new_accum.pow(-lr_power()) - accum.pow(-lr_power())) * var;
}
auto x = (linear.constant(l1() * lr()) * linear.sign() - linear);
if (lr_power() == static_cast<T>(-0.5)) {
auto y =
new_accum.sqrt() + linear.constant(static_cast<T>(2) * l2() * lr());
auto pre_shrink = x / y;
var.device(d) = (linear.abs() > linear.constant(l1() * lr()))
.select(pre_shrink, var.constant(static_cast<T>(0)));
} else {
auto y = new_accum.pow(-lr_power()) +
linear.constant(static_cast<T>(2) * l2() * lr());
auto pre_shrink = x / y;
var.device(d) = (linear.abs() > linear.constant(l1() * lr()))
.select(pre_shrink, var.constant(static_cast<T>(0)));
}
accum.device(d) += grad * grad;
}
};
template <typename T> template <typename T>
struct ApplyFtrl<CPUDevice, T> { struct ApplyFtrl<CPUDevice, T> {
void operator()(const CPUDevice& d, typename TTypes<T>::Flat var, void operator()(const CPUDevice& d, typename TTypes<T>::Flat var,
@ -286,6 +327,44 @@ struct ApplyFtrl<CPUDevice, T> {
} }
}; };
template <typename T>
struct ApplyFtrlMultiplyLinearByLr<CPUDevice, T> {
void operator()(const CPUDevice& d, typename TTypes<T>::Flat var,
typename TTypes<T>::Flat accum,
typename TTypes<T>::Flat linear,
typename TTypes<T>::ConstFlat grad,
typename TTypes<T>::ConstScalar lr,
typename TTypes<T>::ConstScalar l1,
typename TTypes<T>::ConstScalar l2,
typename TTypes<T>::ConstScalar lr_power) {
auto new_accum = accum + grad.square();
// special case for which lr_power=-0.5.
if (lr_power() == static_cast<T>(-0.5)) {
linear.device(d) += grad * lr() - (new_accum.sqrt() - accum.sqrt()) * var;
} else {
linear.device(d) +=
grad * lr() -
(new_accum.pow(-lr_power()) - accum.pow(-lr_power())) * var;
}
auto x = (linear.constant(l1()) * lr() * linear.sign() - linear);
if (lr_power() == static_cast<T>(-0.5)) {
auto y =
new_accum.sqrt() + linear.constant(static_cast<T>(2) * l2() * lr());
auto pre_shrink = x / y;
var.device(d) = (linear.abs() > linear.constant(l1() * lr()))
.select(pre_shrink, var.constant(static_cast<T>(0)));
} else {
auto y = new_accum.pow(-lr_power()) +
linear.constant(static_cast<T>(2) * l2() * lr());
auto pre_shrink = x / y;
var.device(d) = (linear.abs() > linear.constant(l1() * lr()))
.select(pre_shrink, var.constant(static_cast<T>(0)));
}
accum.device(d) += grad.square();
}
};
template <typename T> template <typename T>
struct ApplyMomentum<CPUDevice, T> { struct ApplyMomentum<CPUDevice, T> {
void operator()(const CPUDevice& d, typename TTypes<T>::Flat var, void operator()(const CPUDevice& d, typename TTypes<T>::Flat var,
@ -1556,8 +1635,19 @@ namespace {
template <typename T> template <typename T>
inline T FtrlCompute(const T& accum, const T& linear, const T& lr, const T& l1, inline T FtrlCompute(const T& accum, const T& linear, const T& lr, const T& l1,
const T& l2, const T& lr_power) { const T& l2, const T& lr_power,
const bool multiply_linear_by_lr) {
T quadratic; T quadratic;
if (multiply_linear_by_lr) {
if (lr_power == static_cast<T>(-0.5)) {
quadratic = Eigen::numext::sqrt(accum) + static_cast<T>(2) * l2 * lr;
} else {
quadratic =
Eigen::numext::pow(accum, -lr_power) + static_cast<T>(2) * l2 * lr;
}
auto l1_reg_adjust = std::max(std::min(linear, l1 * lr), -l1 * lr);
return (l1_reg_adjust - linear) / quadratic;
} else {
if (lr_power == static_cast<T>(-0.5)) { if (lr_power == static_cast<T>(-0.5)) {
quadratic = Eigen::numext::sqrt(accum) / lr + static_cast<T>(2) * l2; quadratic = Eigen::numext::sqrt(accum) / lr + static_cast<T>(2) * l2;
} else { } else {
@ -1566,6 +1656,7 @@ inline T FtrlCompute(const T& accum, const T& linear, const T& lr, const T& l1,
} }
auto l1_reg_adjust = std::max(std::min(linear, l1), -l1); auto l1_reg_adjust = std::max(std::min(linear, l1), -l1);
return (l1_reg_adjust - linear) / quadratic; return (l1_reg_adjust - linear) / quadratic;
}
} }
} // namespace } // namespace
@ -2392,6 +2483,8 @@ class ApplyFtrlOp : public OpKernel {
public: public:
explicit ApplyFtrlOp(OpKernelConstruction* ctx) : OpKernel(ctx) { explicit ApplyFtrlOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_));
OP_REQUIRES_OK(
ctx, ctx->GetAttr("multiply_linear_by_lr", &multiply_linear_by_lr_));
} }
void Compute(OpKernelContext* ctx) override { void Compute(OpKernelContext* ctx) override {
@ -2466,10 +2559,22 @@ class ApplyFtrlOp : public OpKernel {
errors::InvalidArgument("l2 shrinkage regularization strength " errors::InvalidArgument("l2 shrinkage regularization strength "
"is not a scalar: ", "is not a scalar: ",
l2_shrinkage.shape().DebugString())); l2_shrinkage.shape().DebugString()));
if (multiply_linear_by_lr_) {
functor::ApplyFtrlV2<Device, T>()( functor::ApplyFtrlV2<Device, T>()(
device, var.flat<T>(), accum.flat<T>(), linear.flat<T>(), device, var.flat<T>(), accum.flat<T>(), linear.flat<T>(),
grad.flat<T>(), lr.scalar<T>(), l1.scalar<T>(), l2.scalar<T>(), grad.flat<T>(), lr.scalar<T>(), l1.scalar<T>(), l2.scalar<T>(),
l2_shrinkage.scalar<T>(), lr_power.scalar<T>()); l2_shrinkage.scalar<T>(), lr_power.scalar<T>());
} else {
functor::ApplyFtrlV2MultiplyLinearByLr<Device, T>()(
device, var.flat<T>(), accum.flat<T>(), linear.flat<T>(),
grad.flat<T>(), lr.scalar<T>(), l1.scalar<T>(), l2.scalar<T>(),
l2_shrinkage.scalar<T>(), lr_power.scalar<T>());
}
} else if (multiply_linear_by_lr_) {
functor::ApplyFtrlMultiplyLinearByLr<Device, T>()(
device, var.flat<T>(), accum.flat<T>(), linear.flat<T>(),
grad.flat<T>(), lr.scalar<T>(), l1.scalar<T>(), l2.scalar<T>(),
lr_power.scalar<T>());
} else { } else {
functor::ApplyFtrl<Device, T>()(device, var.flat<T>(), accum.flat<T>(), functor::ApplyFtrl<Device, T>()(device, var.flat<T>(), accum.flat<T>(),
linear.flat<T>(), grad.flat<T>(), linear.flat<T>(), grad.flat<T>(),
@ -2482,6 +2587,7 @@ class ApplyFtrlOp : public OpKernel {
private: private:
bool use_exclusive_lock_; bool use_exclusive_lock_;
bool multiply_linear_by_lr_;
}; };
#define REGISTER_KERNELS(D, T) \ #define REGISTER_KERNELS(D, T) \
@ -2559,7 +2665,16 @@ namespace functor {
typename TTypes<T>::ConstScalar l1, typename TTypes<T>::ConstScalar l2, \ typename TTypes<T>::ConstScalar l1, typename TTypes<T>::ConstScalar l2, \
typename TTypes<T>::ConstScalar l2_shrinkage, \ typename TTypes<T>::ConstScalar l2_shrinkage, \
typename TTypes<T>::ConstScalar lr_power); \ typename TTypes<T>::ConstScalar lr_power); \
extern template struct ApplyFtrlV2<GPUDevice, T>; extern template struct ApplyFtrlV2<GPUDevice, T>; \
template <> \
void ApplyFtrlV2MultiplyLinearByLr<GPUDevice, T>::operator()( \
const GPUDevice& d, typename TTypes<T>::Flat var, \
typename TTypes<T>::Flat accum, typename TTypes<T>::Flat linear, \
typename TTypes<T>::ConstFlat grad, typename TTypes<T>::ConstScalar lr, \
typename TTypes<T>::ConstScalar l1, typename TTypes<T>::ConstScalar l2, \
typename TTypes<T>::ConstScalar l2_shrinkage, \
typename TTypes<T>::ConstScalar lr_power); \
extern template struct ApplyFtrlV2MultiplyLinearByLr<GPUDevice, T>;
DECLARE_GPU_SPEC(Eigen::half); DECLARE_GPU_SPEC(Eigen::half);
DECLARE_GPU_SPEC(float); DECLARE_GPU_SPEC(float);
DECLARE_GPU_SPEC(double); DECLARE_GPU_SPEC(double);
@ -2579,6 +2694,8 @@ class SparseApplyFtrlOp : public OpKernel {
public: public:
explicit SparseApplyFtrlOp(OpKernelConstruction* ctx) : OpKernel(ctx) { explicit SparseApplyFtrlOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("use_locking", &use_exclusive_lock_));
OP_REQUIRES_OK(
ctx, ctx->GetAttr("multiply_linear_by_lr", &multiply_linear_by_lr_));
} }
void Compute(OpKernelContext* ctx) override TF_NO_THREAD_SAFETY_ANALYSIS { void Compute(OpKernelContext* ctx) override TF_NO_THREAD_SAFETY_ANALYSIS {
@ -2714,6 +2831,17 @@ class SparseApplyFtrlOp : public OpKernel {
// eigen tensor library. // eigen tensor library.
#define COMPUTE_FTRL(grad, grad_maybe_with_shrinkage) \ #define COMPUTE_FTRL(grad, grad_maybe_with_shrinkage) \
auto new_accum = accum + grad.square(); \ auto new_accum = accum + grad.square(); \
if (multiply_linear_by_lr_) { \
if (lr_power_scalar == static_cast<T>(-0.5)) { \
linear += grad_maybe_with_shrinkage * lr_scalar - \
(new_accum.sqrt() - accum.sqrt()) * var; \
} else { \
linear += \
grad_maybe_with_shrinkage * lr_scalar - \
(new_accum.pow(-lr_power_scalar) - accum.pow(-lr_power_scalar)) * \
var; \
} \
} else { \
if (lr_power_scalar == static_cast<T>(-0.5)) { \ if (lr_power_scalar == static_cast<T>(-0.5)) { \
linear += grad_maybe_with_shrinkage - \ linear += grad_maybe_with_shrinkage - \
(new_accum.sqrt() - accum.sqrt()) / lr_scalar * var; \ (new_accum.sqrt() - accum.sqrt()) / lr_scalar * var; \
@ -2722,17 +2850,35 @@ class SparseApplyFtrlOp : public OpKernel {
accum.pow(-lr_power_scalar)) / \ accum.pow(-lr_power_scalar)) / \
lr_scalar * var; \ lr_scalar * var; \
} \ } \
auto l1_reg_adjust = linear.cwiseMin(l1_scalar).cwiseMax(-l1_scalar); \ } \
auto l1_reg_adjust = \
(multiply_linear_by_lr_ \
? linear.cwiseMin(l1_scalar * lr_scalar) \
.cwiseMax(-l1_scalar * lr_scalar) \
: linear.cwiseMin(l1_scalar).cwiseMax(-l1_scalar)); \
auto x = l1_reg_adjust - linear; \ auto x = l1_reg_adjust - linear; \
if (multiply_linear_by_lr_) { \
if (lr_power_scalar == static_cast<T>(-0.5)) { \
auto y = new_accum.sqrt() + \
linear.constant(static_cast<T>(2) * l2_scalar * lr_scalar); \
var = x / y; \
} else { \
auto y = new_accum.pow(-lr_power_scalar) + \
linear.constant(static_cast<T>(2) * l2_scalar * lr_scalar); \
var = x / y; \
} \
} else { \
if (lr_power_scalar == static_cast<T>(-0.5)) { \ if (lr_power_scalar == static_cast<T>(-0.5)) { \
auto y = new_accum.sqrt() / new_accum.constant(lr_scalar) + \ auto y = new_accum.sqrt() / new_accum.constant(lr_scalar) + \
linear.constant(static_cast<T>(2) * l2_scalar); \ linear.constant(static_cast<T>(2) * l2_scalar); \
var = x / y; \ var = x / y; \
} else { \ } else { \
auto y = new_accum.pow(-lr_power_scalar) / new_accum.constant(lr_scalar) + \ auto y = \
new_accum.pow(-lr_power_scalar) / new_accum.constant(lr_scalar) + \
linear.constant(static_cast<T>(2) * l2_scalar); \ linear.constant(static_cast<T>(2) * l2_scalar); \
var = x / y; \ var = x / y; \
} \ } \
} \
accum += grad.square(); accum += grad.square();
if (has_l2_shrinkage) { if (has_l2_shrinkage) {
@ -2781,10 +2927,13 @@ class SparseApplyFtrlOp : public OpKernel {
T updated_a = a + grad_flat(i) * grad_flat(i); T updated_a = a + grad_flat(i) * grad_flat(i);
using Eigen::numext::pow; using Eigen::numext::pow;
T sigma = pow(updated_a, -lr_power_scalar) - pow(a, -lr_power_scalar); T sigma = pow(updated_a, -lr_power_scalar) - pow(a, -lr_power_scalar);
if (!multiply_linear_by_lr_) {
sigma /= lr_scalar; sigma /= lr_scalar;
T updated_l = l + g - sigma * v; }
T updated_l = (multiply_linear_by_lr_ ? l + g * lr_scalar - sigma * v
: l + g - sigma * v);
v = FtrlCompute(updated_a, updated_l, lr_scalar, l1_scalar, l2_scalar, v = FtrlCompute(updated_a, updated_l, lr_scalar, l1_scalar, l2_scalar,
lr_power_scalar); lr_power_scalar, multiply_linear_by_lr_);
a = updated_a; a = updated_a;
l = updated_l; l = updated_l;
} }
@ -2796,6 +2945,7 @@ class SparseApplyFtrlOp : public OpKernel {
private: private:
bool use_exclusive_lock_; bool use_exclusive_lock_;
bool multiply_linear_by_lr_;
}; };
#define REGISTER_KERNELS(T, Tindices) \ #define REGISTER_KERNELS(T, Tindices) \

View File

@ -113,6 +113,18 @@ struct ApplyFtrl {
typename TTypes<T>::ConstScalar lr_power); typename TTypes<T>::ConstScalar lr_power);
}; };
template <typename Device, typename T>
struct ApplyFtrlMultiplyLinearByLr {
void operator()(const Device& d, typename TTypes<T>::Flat var,
typename TTypes<T>::Flat accum,
typename TTypes<T>::Flat linear,
typename TTypes<T>::ConstFlat grad,
typename TTypes<T>::ConstScalar lr,
typename TTypes<T>::ConstScalar l1,
typename TTypes<T>::ConstScalar l2,
typename TTypes<T>::ConstScalar lr_power);
};
template <typename Device, typename T> template <typename Device, typename T>
struct ApplyFtrlV2 { struct ApplyFtrlV2 {
void operator()(const Device& d, typename TTypes<T>::Flat var, void operator()(const Device& d, typename TTypes<T>::Flat var,
@ -126,6 +138,19 @@ struct ApplyFtrlV2 {
typename TTypes<T>::ConstScalar lr_power); typename TTypes<T>::ConstScalar lr_power);
}; };
template <typename Device, typename T>
struct ApplyFtrlV2MultiplyLinearByLr {
void operator()(const Device& d, typename TTypes<T>::Flat var,
typename TTypes<T>::Flat accum,
typename TTypes<T>::Flat linear,
typename TTypes<T>::ConstFlat grad,
typename TTypes<T>::ConstScalar lr,
typename TTypes<T>::ConstScalar l1,
typename TTypes<T>::ConstScalar l2,
typename TTypes<T>::ConstScalar l2_shrinkage,
typename TTypes<T>::ConstScalar lr_power);
};
template <typename Device, typename T> template <typename Device, typename T>
struct ApplyMomentum { struct ApplyMomentum {
void operator()(const Device& d, typename TTypes<T>::Flat var, void operator()(const Device& d, typename TTypes<T>::Flat var,

View File

@ -215,6 +215,41 @@ struct ApplyFtrl<GPUDevice, T> {
} }
}; };
template <typename T>
struct ApplyFtrlMultiplyLinearByLr<GPUDevice, T> {
void operator()(const GPUDevice& d, typename TTypes<T>::Flat var,
typename TTypes<T>::Flat accum,
typename TTypes<T>::Flat linear,
typename TTypes<T>::ConstFlat grad,
typename TTypes<T>::ConstScalar lr,
typename TTypes<T>::ConstScalar l1,
typename TTypes<T>::ConstScalar l2,
typename TTypes<T>::ConstScalar lr_power) {
Eigen::array<typename TTypes<T>::Tensor::Index, 1> bcast;
bcast[0] = grad.dimension(0);
Eigen::Sizes<1> single;
auto lr_bcast = lr.reshape(single).broadcast(bcast);
auto l1_lr_bcast = (l1 * lr).reshape(single).broadcast(bcast);
auto l2_lr_bcast = (l2 * lr).reshape(single).broadcast(bcast);
auto lr_power_bcast = -lr_power.reshape(single).broadcast(bcast);
const auto two = static_cast<T>(2.0);
auto new_accum = accum + grad.square();
auto accum_power = accum.binaryExpr(lr_power_bcast,
Eigen::internal::scalar_pow_op<T, T>());
auto new_accum_power = new_accum.binaryExpr(
lr_power_bcast, Eigen::internal::scalar_pow_op<T, T>());
linear.device(d) += grad * lr_bcast - (new_accum_power - accum_power) * var;
auto x = (l1_lr_bcast * linear.sign() - linear);
auto y = new_accum_power + linear.constant(two) * l2_lr_bcast;
auto pre_shrink = x / y;
var.device(d) = (linear.abs() > l1_lr_bcast)
.select(pre_shrink, var.constant(static_cast<T>(0)));
accum.device(d) += grad.square();
}
};
template <typename T> template <typename T>
struct ApplyFtrlV2<GPUDevice, T> { struct ApplyFtrlV2<GPUDevice, T> {
void operator()(const GPUDevice& d, typename TTypes<T>::Flat var, void operator()(const GPUDevice& d, typename TTypes<T>::Flat var,
@ -255,6 +290,46 @@ struct ApplyFtrlV2<GPUDevice, T> {
} }
}; };
template <typename T>
struct ApplyFtrlV2MultiplyLinearByLr<GPUDevice, T> {
void operator()(const GPUDevice& d, typename TTypes<T>::Flat var,
typename TTypes<T>::Flat accum,
typename TTypes<T>::Flat linear,
typename TTypes<T>::ConstFlat grad,
typename TTypes<T>::ConstScalar lr,
typename TTypes<T>::ConstScalar l1,
typename TTypes<T>::ConstScalar l2,
typename TTypes<T>::ConstScalar l2_shrinkage,
typename TTypes<T>::ConstScalar lr_power) {
Eigen::array<typename TTypes<T>::Tensor::Index, 1> bcast;
bcast[0] = grad.dimension(0);
Eigen::Sizes<1> single;
auto l2_shrinkage_bcast = l2_shrinkage.reshape(single).broadcast(bcast);
auto lr_bcast = lr.reshape(single).broadcast(bcast);
auto l1_lr_bcast = (l1 * lr).reshape(single).broadcast(bcast);
auto l2_lr_bcast = (l2 * lr).reshape(single).broadcast(bcast);
auto lr_power_bcast = -lr_power.reshape(single).broadcast(bcast);
const auto two = static_cast<T>(2.0);
auto new_accum = accum + grad.square();
auto accum_power = accum.binaryExpr(lr_power_bcast,
Eigen::internal::scalar_pow_op<T, T>());
auto new_accum_power = new_accum.binaryExpr(
lr_power_bcast, Eigen::internal::scalar_pow_op<T, T>());
auto grad_with_shrinkage =
grad + (var.constant(two) * l2_shrinkage_bcast * var);
linear.device(d) +=
grad_with_shrinkage * lr_bcast - (new_accum_power - accum_power) * var;
auto x = (l1_lr_bcast * linear.sign() - linear);
auto y = new_accum_power + linear.constant(two) * l2_lr_bcast;
auto pre_shrink = x / y;
var.device(d) = (linear.abs() > l1_lr_bcast)
.select(pre_shrink, var.constant(static_cast<T>(0)));
accum.device(d) += grad.square();
}
};
template <typename T> template <typename T>
struct ApplyMomentum<GPUDevice, T> { struct ApplyMomentum<GPUDevice, T> {
void operator()(const GPUDevice& d, typename TTypes<T>::Flat var, void operator()(const GPUDevice& d, typename TTypes<T>::Flat var,
@ -565,10 +640,18 @@ template struct functor::ApplyFtrl<GPUDevice, Eigen::half>;
template struct functor::ApplyFtrl<GPUDevice, float>; template struct functor::ApplyFtrl<GPUDevice, float>;
template struct functor::ApplyFtrl<GPUDevice, double>; template struct functor::ApplyFtrl<GPUDevice, double>;
template struct functor::ApplyFtrlMultiplyLinearByLr<GPUDevice, Eigen::half>;
template struct functor::ApplyFtrlMultiplyLinearByLr<GPUDevice, float>;
template struct functor::ApplyFtrlMultiplyLinearByLr<GPUDevice, double>;
template struct functor::ApplyFtrlV2<GPUDevice, Eigen::half>; template struct functor::ApplyFtrlV2<GPUDevice, Eigen::half>;
template struct functor::ApplyFtrlV2<GPUDevice, float>; template struct functor::ApplyFtrlV2<GPUDevice, float>;
template struct functor::ApplyFtrlV2<GPUDevice, double>; template struct functor::ApplyFtrlV2<GPUDevice, double>;
template struct functor::ApplyFtrlV2MultiplyLinearByLr<GPUDevice, Eigen::half>;
template struct functor::ApplyFtrlV2MultiplyLinearByLr<GPUDevice, float>;
template struct functor::ApplyFtrlV2MultiplyLinearByLr<GPUDevice, double>;
template struct functor::ApplyMomentum<GPUDevice, Eigen::half>; template struct functor::ApplyMomentum<GPUDevice, Eigen::half>;
template struct functor::ApplyMomentum<GPUDevice, float>; template struct functor::ApplyMomentum<GPUDevice, float>;
template struct functor::ApplyMomentum<GPUDevice, double>; template struct functor::ApplyMomentum<GPUDevice, double>;

View File

@ -559,6 +559,7 @@ REGISTER_OP("ApplyFtrl")
.Output("out: Ref(T)") .Output("out: Ref(T)")
.Attr("T: numbertype") .Attr("T: numbertype")
.Attr("use_locking: bool = false") .Attr("use_locking: bool = false")
.Attr("multiply_linear_by_lr: bool = false")
.SetShapeFn(ApplyFtrlShapeFn</*is_sparse=*/false, /*is_resource=*/false>); .SetShapeFn(ApplyFtrlShapeFn</*is_sparse=*/false, /*is_resource=*/false>);
REGISTER_OP("SparseApplyFtrl") REGISTER_OP("SparseApplyFtrl")
@ -575,6 +576,7 @@ REGISTER_OP("SparseApplyFtrl")
.Attr("T: numbertype") .Attr("T: numbertype")
.Attr("Tindices: {int32, int64}") .Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false") .Attr("use_locking: bool = false")
.Attr("multiply_linear_by_lr: bool = false")
.SetShapeFn(ApplyFtrlShapeFn</*is_sparse=*/true, /*is_resource=*/false>); .SetShapeFn(ApplyFtrlShapeFn</*is_sparse=*/true, /*is_resource=*/false>);
REGISTER_OP("ResourceApplyFtrl") REGISTER_OP("ResourceApplyFtrl")
@ -588,6 +590,7 @@ REGISTER_OP("ResourceApplyFtrl")
.Input("lr_power: T") .Input("lr_power: T")
.Attr("T: numbertype") .Attr("T: numbertype")
.Attr("use_locking: bool = false") .Attr("use_locking: bool = false")
.Attr("multiply_linear_by_lr: bool = false")
.SetShapeFn(ApplyFtrlShapeFn</*is_sparse=*/false, /*is_resource=*/true>); .SetShapeFn(ApplyFtrlShapeFn</*is_sparse=*/false, /*is_resource=*/true>);
REGISTER_OP("ResourceSparseApplyFtrl") REGISTER_OP("ResourceSparseApplyFtrl")
@ -603,6 +606,7 @@ REGISTER_OP("ResourceSparseApplyFtrl")
.Attr("T: numbertype") .Attr("T: numbertype")
.Attr("Tindices: {int32, int64}") .Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false") .Attr("use_locking: bool = false")
.Attr("multiply_linear_by_lr: bool = false")
.SetShapeFn(ApplyFtrlShapeFn</*is_sparse=*/true, /*is_resource=*/true>); .SetShapeFn(ApplyFtrlShapeFn</*is_sparse=*/true, /*is_resource=*/true>);
REGISTER_OP("ApplyFtrlV2") REGISTER_OP("ApplyFtrlV2")
@ -618,6 +622,7 @@ REGISTER_OP("ApplyFtrlV2")
.Output("out: Ref(T)") .Output("out: Ref(T)")
.Attr("T: numbertype") .Attr("T: numbertype")
.Attr("use_locking: bool = false") .Attr("use_locking: bool = false")
.Attr("multiply_linear_by_lr: bool = false")
.SetShapeFn(ApplyFtrlShapeFn</*is_sparse=*/false, /*is_resource=*/false>); .SetShapeFn(ApplyFtrlShapeFn</*is_sparse=*/false, /*is_resource=*/false>);
REGISTER_OP("SparseApplyFtrlV2") REGISTER_OP("SparseApplyFtrlV2")
@ -635,6 +640,7 @@ REGISTER_OP("SparseApplyFtrlV2")
.Attr("T: numbertype") .Attr("T: numbertype")
.Attr("Tindices: {int32, int64}") .Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false") .Attr("use_locking: bool = false")
.Attr("multiply_linear_by_lr: bool = false")
.SetShapeFn(ApplyFtrlShapeFn</*is_sparse=*/true, /*is_resource=*/false>); .SetShapeFn(ApplyFtrlShapeFn</*is_sparse=*/true, /*is_resource=*/false>);
REGISTER_OP("ResourceApplyFtrlV2") REGISTER_OP("ResourceApplyFtrlV2")
@ -649,6 +655,7 @@ REGISTER_OP("ResourceApplyFtrlV2")
.Input("lr_power: T") .Input("lr_power: T")
.Attr("T: numbertype") .Attr("T: numbertype")
.Attr("use_locking: bool = false") .Attr("use_locking: bool = false")
.Attr("multiply_linear_by_lr: bool = false")
.SetShapeFn(ApplyFtrlShapeFn</*is_sparse=*/false, /*is_resource=*/true>); .SetShapeFn(ApplyFtrlShapeFn</*is_sparse=*/false, /*is_resource=*/true>);
REGISTER_OP("ResourceSparseApplyFtrlV2") REGISTER_OP("ResourceSparseApplyFtrlV2")
@ -665,6 +672,7 @@ REGISTER_OP("ResourceSparseApplyFtrlV2")
.Attr("T: numbertype") .Attr("T: numbertype")
.Attr("Tindices: {int32, int64}") .Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false") .Attr("use_locking: bool = false")
.Attr("multiply_linear_by_lr: bool = false")
.SetShapeFn(ApplyFtrlShapeFn</*is_sparse=*/true, /*is_resource=*/true>); .SetShapeFn(ApplyFtrlShapeFn</*is_sparse=*/true, /*is_resource=*/true>);
template <bool is_sparse, bool is_resource> template <bool is_sparse, bool is_resource>

View File

@ -129,6 +129,61 @@ class TrainingOpsTest(TensorFlowTestCase):
self.assertAllClose(linear_update, self.evaluate(linear)) self.assertAllClose(linear_update, self.evaluate(linear))
self.assertAllClose(expected_out, out) self.assertAllClose(expected_out, out)
def _testTypesForFtrlMultiplyLinearByLr(self,
x,
y,
z,
lr,
grad,
use_gpu=None,
l1=0.0,
l2=0.0,
lr_power=-0.5):
self.setUp()
with self.session(use_gpu=use_gpu):
var = variables.VariableV1(x)
accum = variables.VariableV1(y)
linear = variables.VariableV1(z)
self.evaluate(variables.global_variables_initializer())
self.assertAllCloseAccordingToType(x, self.evaluate(var))
apply_ftrl = (
training_ops.apply_ftrl(
var,
accum,
linear,
grad,
lr,
l1,
l2,
lr_power,
multiply_linear_by_lr=True))
out = self.evaluate(apply_ftrl)
self.assertShapeEqual(out, apply_ftrl)
accum_update = y + grad * grad
linear_update = z + grad * lr - (accum_update**(-lr_power) - y**
(-lr_power)) * x
quadratic = accum_update**(-lr_power) + 2 * l2 * lr
expected_out = np.array([
(np.sign(linear_update[i]) * l1 * lr - linear_update[i]) /
(quadratic[i]) if np.abs(linear_update[i]) > l1 * lr else 0.0
for i in range(linear_update.size)
])
self.assertAllCloseAccordingToType(accum_update, self.evaluate(accum))
if x.dtype == np.float16:
# The calculations here really are not very precise in float16.
self.assertAllClose(
linear_update, self.evaluate(linear), rtol=2e-2, atol=2e-2)
self.assertAllClose(expected_out, out, rtol=2e-2, atol=2e-2)
elif x.dtype == np.float32:
# The calculations here not sufficiently precise in float32.
self.assertAllClose(
linear_update, self.evaluate(linear), rtol=1e-5, atol=1e-5)
self.assertAllClose(expected_out, out, rtol=1e-5, atol=1e-5)
else:
self.assertAllClose(linear_update, self.evaluate(linear))
self.assertAllClose(expected_out, out)
@test_util.run_v1_only("b/120545219") @test_util.run_v1_only("b/120545219")
def testApplyAdagrad(self): def testApplyAdagrad(self):
for (dtype, use_gpu) in itertools.product( for (dtype, use_gpu) in itertools.product(
@ -151,6 +206,19 @@ class TrainingOpsTest(TensorFlowTestCase):
grad = np.arange(100).astype(dtype) grad = np.arange(100).astype(dtype)
self._testTypesForFtrl(x, y, z, lr, grad, use_gpu=False, l1=l1, l2=l2) self._testTypesForFtrl(x, y, z, lr, grad, use_gpu=False, l1=l1, l2=l2)
@test_util.run_v1_only("b/120545219")
def testApplyFtrlMultiplyLinearByLr(self):
for dtype in [np.float16, np.float32, np.float64]:
x = np.arange(100).astype(dtype)
y = np.arange(1, 101).astype(dtype)
z = np.arange(102, 202).astype(dtype)
lr = np.array(2.0).astype(dtype)
l1 = np.array(3.0).astype(dtype)
l2 = np.array(4.0).astype(dtype)
grad = np.arange(100).astype(dtype)
self._testTypesForFtrlMultiplyLinearByLr(
x, y, z, lr, grad, use_gpu=False, l1=l1, l2=l2)
def _testTypesForSparseAdagrad(self, x, y, lr, grad, indices): def _testTypesForSparseAdagrad(self, x, y, lr, grad, indices):
self.setUp() self.setUp()
with self.session(use_gpu=False): with self.session(use_gpu=False):
@ -203,6 +271,47 @@ class TrainingOpsTest(TensorFlowTestCase):
out = self.evaluate(sparse_apply_ftrl) out = self.evaluate(sparse_apply_ftrl)
self.assertShapeEqual(out, sparse_apply_ftrl) self.assertShapeEqual(out, sparse_apply_ftrl)
for (i, index) in enumerate(indices):
self.assertAllCloseAccordingToType(
x[index] - lr * grad[i] *
(y[index] + grad[i] * grad[i])**(lr_power),
self.evaluate(var)[index])
self.assertAllCloseAccordingToType(y[index] + grad[i] * grad[i],
self.evaluate(accum)[index])
def _testTypesForSparseFtrlMultiplyLinearByLr(self,
x,
y,
z,
lr,
grad,
indices,
l1=0.0,
l2=0.0,
lr_power=-0.5):
self.setUp()
with self.session(use_gpu=False):
var = variables.VariableV1(x)
accum = variables.VariableV1(y)
linear = variables.VariableV1(z)
self.evaluate(variables.global_variables_initializer())
self.assertAllCloseAccordingToType(x, self.evaluate(var))
sparse_apply_ftrl = (
training_ops.sparse_apply_ftrl(
var,
accum,
linear,
grad,
constant_op.constant(indices, self._toType(indices.dtype)),
lr,
l1,
l2,
lr_power=lr_power,
multiply_linear_by_lr=True))
out = self.evaluate(sparse_apply_ftrl)
self.assertShapeEqual(out, sparse_apply_ftrl)
for (i, index) in enumerate(indices): for (i, index) in enumerate(indices):
self.assertAllCloseAccordingToType( self.assertAllCloseAccordingToType(
x[index] - lr * grad[i] * (y[index] + grad[i] * grad[i])** x[index] - lr * grad[i] * (y[index] + grad[i] * grad[i])**
@ -255,6 +364,23 @@ class TrainingOpsTest(TensorFlowTestCase):
indices = np.array([0, 2]).astype(index_type) indices = np.array([0, 2]).astype(index_type)
self._testTypesForSparseFtrl(x, y, z, lr, grad, indices) self._testTypesForSparseFtrl(x, y, z, lr, grad, indices)
@test_util.run_v1_only("b/120545219")
def testSparseApplyFtrlMultiplyLinearByLrDim1(self):
for (dtype,
index_type) in itertools.product([np.float16, np.float32, np.float64],
[np.int32, np.int64]):
x_val = [[0.0], [0.0], [0.0]]
y_val = [[4.0], [5.0], [6.0]]
z_val = [[0.0], [0.0], [0.0]]
x = np.array(x_val).astype(dtype)
y = np.array(y_val).astype(dtype)
z = np.array(z_val).astype(dtype)
lr = np.array(2.0).astype(dtype)
grad_val = [[1.5], [2.5]]
grad = np.array(grad_val).astype(dtype)
indices = np.array([0, 2]).astype(index_type)
self._testTypesForSparseFtrlMultiplyLinearByLr(x, y, z, lr, grad, indices)
@test_util.run_v1_only("b/120545219") @test_util.run_v1_only("b/120545219")
def testApplyAdam(self): def testApplyAdam(self):
for dtype, use_gpu in itertools.product( for dtype, use_gpu in itertools.product(

View File

@ -150,11 +150,11 @@ tf_module {
} }
member_method { member_method {
name: "ApplyFtrl" name: "ApplyFtrl"
argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'lr\', \'l1\', \'l2\', \'lr_power\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'lr\', \'l1\', \'l2\', \'lr_power\', \'use_locking\', \'multiply_linear_by_lr\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
} }
member_method { member_method {
name: "ApplyFtrlV2" name: "ApplyFtrlV2"
argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'lr\', \'l1\', \'l2\', \'l2_shrinkage\', \'lr_power\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'lr\', \'l1\', \'l2\', \'l2_shrinkage\', \'lr_power\', \'use_locking\', \'multiply_linear_by_lr\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
} }
member_method { member_method {
name: "ApplyGradientDescent" name: "ApplyGradientDescent"
@ -3414,11 +3414,11 @@ tf_module {
} }
member_method { member_method {
name: "ResourceApplyFtrl" name: "ResourceApplyFtrl"
argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'lr\', \'l1\', \'l2\', \'lr_power\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'lr\', \'l1\', \'l2\', \'lr_power\', \'use_locking\', \'multiply_linear_by_lr\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
} }
member_method { member_method {
name: "ResourceApplyFtrlV2" name: "ResourceApplyFtrlV2"
argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'lr\', \'l1\', \'l2\', \'l2_shrinkage\', \'lr_power\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'lr\', \'l1\', \'l2\', \'l2_shrinkage\', \'lr_power\', \'use_locking\', \'multiply_linear_by_lr\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
} }
member_method { member_method {
name: "ResourceApplyGradientDescent" name: "ResourceApplyGradientDescent"
@ -3526,11 +3526,11 @@ tf_module {
} }
member_method { member_method {
name: "ResourceSparseApplyFtrl" name: "ResourceSparseApplyFtrl"
argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'indices\', \'lr\', \'l1\', \'l2\', \'lr_power\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'indices\', \'lr\', \'l1\', \'l2\', \'lr_power\', \'use_locking\', \'multiply_linear_by_lr\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
} }
member_method { member_method {
name: "ResourceSparseApplyFtrlV2" name: "ResourceSparseApplyFtrlV2"
argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'indices\', \'lr\', \'l1\', \'l2\', \'l2_shrinkage\', \'lr_power\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'indices\', \'lr\', \'l1\', \'l2\', \'l2_shrinkage\', \'lr_power\', \'use_locking\', \'multiply_linear_by_lr\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
} }
member_method { member_method {
name: "ResourceSparseApplyKerasMomentum" name: "ResourceSparseApplyKerasMomentum"
@ -4030,11 +4030,11 @@ tf_module {
} }
member_method { member_method {
name: "SparseApplyFtrl" name: "SparseApplyFtrl"
argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'indices\', \'lr\', \'l1\', \'l2\', \'lr_power\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'indices\', \'lr\', \'l1\', \'l2\', \'lr_power\', \'use_locking\', \'multiply_linear_by_lr\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
} }
member_method { member_method {
name: "SparseApplyFtrlV2" name: "SparseApplyFtrlV2"
argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'indices\', \'lr\', \'l1\', \'l2\', \'l2_shrinkage\', \'lr_power\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'indices\', \'lr\', \'l1\', \'l2\', \'l2_shrinkage\', \'lr_power\', \'use_locking\', \'multiply_linear_by_lr\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
} }
member_method { member_method {
name: "SparseApplyMomentum" name: "SparseApplyMomentum"

View File

@ -150,11 +150,11 @@ tf_module {
} }
member_method { member_method {
name: "ApplyFtrl" name: "ApplyFtrl"
argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'lr\', \'l1\', \'l2\', \'lr_power\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'lr\', \'l1\', \'l2\', \'lr_power\', \'use_locking\', \'multiply_linear_by_lr\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
} }
member_method { member_method {
name: "ApplyFtrlV2" name: "ApplyFtrlV2"
argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'lr\', \'l1\', \'l2\', \'l2_shrinkage\', \'lr_power\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'lr\', \'l1\', \'l2\', \'l2_shrinkage\', \'lr_power\', \'use_locking\', \'multiply_linear_by_lr\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
} }
member_method { member_method {
name: "ApplyGradientDescent" name: "ApplyGradientDescent"
@ -3414,11 +3414,11 @@ tf_module {
} }
member_method { member_method {
name: "ResourceApplyFtrl" name: "ResourceApplyFtrl"
argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'lr\', \'l1\', \'l2\', \'lr_power\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'lr\', \'l1\', \'l2\', \'lr_power\', \'use_locking\', \'multiply_linear_by_lr\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
} }
member_method { member_method {
name: "ResourceApplyFtrlV2" name: "ResourceApplyFtrlV2"
argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'lr\', \'l1\', \'l2\', \'l2_shrinkage\', \'lr_power\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'lr\', \'l1\', \'l2\', \'l2_shrinkage\', \'lr_power\', \'use_locking\', \'multiply_linear_by_lr\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
} }
member_method { member_method {
name: "ResourceApplyGradientDescent" name: "ResourceApplyGradientDescent"
@ -3526,11 +3526,11 @@ tf_module {
} }
member_method { member_method {
name: "ResourceSparseApplyFtrl" name: "ResourceSparseApplyFtrl"
argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'indices\', \'lr\', \'l1\', \'l2\', \'lr_power\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'indices\', \'lr\', \'l1\', \'l2\', \'lr_power\', \'use_locking\', \'multiply_linear_by_lr\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
} }
member_method { member_method {
name: "ResourceSparseApplyFtrlV2" name: "ResourceSparseApplyFtrlV2"
argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'indices\', \'lr\', \'l1\', \'l2\', \'l2_shrinkage\', \'lr_power\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'indices\', \'lr\', \'l1\', \'l2\', \'l2_shrinkage\', \'lr_power\', \'use_locking\', \'multiply_linear_by_lr\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
} }
member_method { member_method {
name: "ResourceSparseApplyKerasMomentum" name: "ResourceSparseApplyKerasMomentum"
@ -4030,11 +4030,11 @@ tf_module {
} }
member_method { member_method {
name: "SparseApplyFtrl" name: "SparseApplyFtrl"
argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'indices\', \'lr\', \'l1\', \'l2\', \'lr_power\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'indices\', \'lr\', \'l1\', \'l2\', \'lr_power\', \'use_locking\', \'multiply_linear_by_lr\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
} }
member_method { member_method {
name: "SparseApplyFtrlV2" name: "SparseApplyFtrlV2"
argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'indices\', \'lr\', \'l1\', \'l2\', \'l2_shrinkage\', \'lr_power\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " argspec: "args=[\'var\', \'accum\', \'linear\', \'grad\', \'indices\', \'lr\', \'l1\', \'l2\', \'l2_shrinkage\', \'lr_power\', \'use_locking\', \'multiply_linear_by_lr\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'False\', \'None\'], "
} }
member_method { member_method {
name: "SparseApplyMomentum" name: "SparseApplyMomentum"