Automated g4 rollback of changelist 168576795
PiperOrigin-RevId: 168606478
This commit is contained in:
parent
78f1dd5031
commit
f95b1cf115
@ -416,16 +416,7 @@ tf_gen_op_wrappers_cc(
|
||||
"sparse_ops",
|
||||
"state_ops",
|
||||
"string_ops",
|
||||
"adadelta_ops",
|
||||
"adagrad_da_ops",
|
||||
"adagrad_ops",
|
||||
"adam_ops",
|
||||
"ftrl_ops",
|
||||
"momentum_ops",
|
||||
"gradient_descent_ops",
|
||||
"proximal_adagrad_ops",
|
||||
"proximal_gradient_descent_ops",
|
||||
"rms_prop_ops",
|
||||
"training_ops",
|
||||
"user_ops",
|
||||
],
|
||||
other_hdrs = [
|
||||
|
@ -16,34 +16,25 @@ limitations under the License.
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CC_OPS_STANDARD_OPS_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CC_OPS_STANDARD_OPS_H_
|
||||
|
||||
#include "tensorflow/cc/ops/adadelta_ops.h"
|
||||
#include "tensorflow/cc/ops/adagrad_da_ops.h"
|
||||
#include "tensorflow/cc/ops/adagrad_ops.h"
|
||||
#include "tensorflow/cc/ops/adam_ops.h"
|
||||
#include "tensorflow/cc/ops/array_ops.h"
|
||||
#include "tensorflow/cc/ops/candidate_sampling_ops.h"
|
||||
#include "tensorflow/cc/ops/const_op.h"
|
||||
#include "tensorflow/cc/ops/control_flow_ops.h"
|
||||
#include "tensorflow/cc/ops/data_flow_ops.h"
|
||||
#include "tensorflow/cc/ops/ftrl_ops.h"
|
||||
#include "tensorflow/cc/ops/gradient_descent_ops.h"
|
||||
#include "tensorflow/cc/ops/image_ops.h"
|
||||
#include "tensorflow/cc/ops/io_ops.h"
|
||||
#include "tensorflow/cc/ops/linalg_ops.h"
|
||||
#include "tensorflow/cc/ops/logging_ops.h"
|
||||
#include "tensorflow/cc/ops/lookup_ops.h"
|
||||
#include "tensorflow/cc/ops/math_ops.h"
|
||||
#include "tensorflow/cc/ops/momentum_ops.h"
|
||||
#include "tensorflow/cc/ops/nn_ops.h"
|
||||
#include "tensorflow/cc/ops/no_op.h"
|
||||
#include "tensorflow/cc/ops/parsing_ops.h"
|
||||
#include "tensorflow/cc/ops/proximal_adagrad_ops.h"
|
||||
#include "tensorflow/cc/ops/proximal_gradient_descent_ops.h"
|
||||
#include "tensorflow/cc/ops/random_ops.h"
|
||||
#include "tensorflow/cc/ops/rms_prop_ops.h"
|
||||
#include "tensorflow/cc/ops/sparse_ops.h"
|
||||
#include "tensorflow/cc/ops/state_ops.h"
|
||||
#include "tensorflow/cc/ops/string_ops.h"
|
||||
#include "tensorflow/cc/ops/training_ops.h"
|
||||
#include "tensorflow/cc/ops/user_ops.h"
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CC_OPS_STANDARD_OPS_H_
|
||||
|
@ -535,7 +535,6 @@ cc_library(
|
||||
|
||||
# Generates library per group of ops.
|
||||
tf_gen_op_libs(
|
||||
extra_srcs = ["ops/training_ops.h"],
|
||||
op_lib_names = [
|
||||
"bitwise_ops",
|
||||
"candidate_sampling_ops",
|
||||
@ -568,16 +567,7 @@ tf_gen_op_libs(
|
||||
"stateless_random_ops",
|
||||
"string_ops",
|
||||
"summary_ops",
|
||||
"adadelta_ops",
|
||||
"adagrad_da_ops",
|
||||
"adagrad_ops",
|
||||
"adam_ops",
|
||||
"ftrl_ops",
|
||||
"momentum_ops",
|
||||
"gradient_descent_ops",
|
||||
"proximal_adagrad_ops",
|
||||
"proximal_gradient_descent_ops",
|
||||
"rms_prop_ops",
|
||||
"training_ops",
|
||||
],
|
||||
)
|
||||
|
||||
@ -655,16 +645,7 @@ cc_library(
|
||||
":state_ops_op_lib",
|
||||
":stateless_random_ops_op_lib",
|
||||
":string_ops_op_lib",
|
||||
":adadelta_ops_op_lib",
|
||||
":adagrad_da_ops_op_lib",
|
||||
":adagrad_ops_op_lib",
|
||||
":adam_ops_op_lib",
|
||||
":ftrl_ops_op_lib",
|
||||
":momentum_ops_op_lib",
|
||||
":gradient_descent_ops_op_lib",
|
||||
":proximal_adagrad_ops_op_lib",
|
||||
":proximal_gradient_descent_ops_op_lib",
|
||||
":rms_prop_ops_op_lib",
|
||||
":training_ops_op_lib",
|
||||
":user_ops_op_lib",
|
||||
":word2vec_ops",
|
||||
] + tf_additional_cloud_op_deps(),
|
||||
|
@ -3888,18 +3888,9 @@ tf_kernel_library(
|
||||
":bounds_check",
|
||||
":training_op_helpers",
|
||||
":variable_ops",
|
||||
"//tensorflow/core:adadelta_ops_op_lib",
|
||||
"//tensorflow/core:adagrad_da_ops_op_lib",
|
||||
"//tensorflow/core:adagrad_ops_op_lib",
|
||||
"//tensorflow/core:adam_ops_op_lib",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:ftrl_ops_op_lib",
|
||||
"//tensorflow/core:gradient_descent_ops_op_lib",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:momentum_ops_op_lib",
|
||||
"//tensorflow/core:proximal_adagrad_ops_op_lib",
|
||||
"//tensorflow/core:proximal_gradient_descent_ops_op_lib",
|
||||
"//tensorflow/core:rms_prop_ops_op_lib",
|
||||
"//tensorflow/core:training_ops_op_lib",
|
||||
"//third_party/eigen3",
|
||||
],
|
||||
)
|
||||
|
@ -1,163 +0,0 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/ops/training_ops.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
static Status ApplyAdadeltaShapeFn(InferenceContext* c, bool sparse) {
|
||||
ShapeHandle unused;
|
||||
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
|
||||
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // accum
|
||||
TF_RETURN_IF_ERROR(
|
||||
c->Merge(s, ShapeOrHandleShape(c, 2), &s)); // accum update
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); // lr
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); // rho
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); // epsilon
|
||||
TF_RETURN_IF_ERROR(
|
||||
HandleGradAndIndicesInputs(c, sparse, 6 /* grad_idx */, &s));
|
||||
if (c->num_outputs() > 0) {
|
||||
c->set_output(0, s);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
REGISTER_OP("ApplyAdadelta")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("accum: Ref(T)")
|
||||
.Input("accum_update: Ref(T)")
|
||||
.Input("lr: T")
|
||||
.Input("rho: T")
|
||||
.Input("epsilon: T")
|
||||
.Input("grad: T")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyAdadeltaShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the adadelta scheme.
|
||||
|
||||
accum = rho() * accum + (1 - rho()) * grad.square();
|
||||
update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad;
|
||||
update_accum = rho() * update_accum + (1 - rho()) * update.square();
|
||||
var -= update;
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
accum_update: Should be from a Variable().
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
rho: Decay factor. Must be a scalar.
|
||||
epsilon: Constant factor. Must be a scalar.
|
||||
grad: The gradient.
|
||||
out: Same as "var".
|
||||
use_locking: If True, updating of the var, accum and update_accum tensors will be protected by
|
||||
a lock; otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("SparseApplyAdadelta")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("accum: Ref(T)")
|
||||
.Input("accum_update: Ref(T)")
|
||||
.Input("lr: T")
|
||||
.Input("rho: T")
|
||||
.Input("epsilon: T")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyAdadeltaShapeFn(c, true /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
accum_update:: Should be from a Variable().
|
||||
lr: Learning rate. Must be a scalar.
|
||||
rho: Decay factor. Must be a scalar.
|
||||
epsilon: Constant factor. Must be a scalar.
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var and accum.
|
||||
out: Same as "var".
|
||||
use_locking: If True, updating of the var and accum tensors will be protected by
|
||||
a lock; otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceApplyAdadelta")
|
||||
.Input("var: resource")
|
||||
.Input("accum: resource")
|
||||
.Input("accum_update: resource")
|
||||
.Input("lr: T")
|
||||
.Input("rho: T")
|
||||
.Input("epsilon: T")
|
||||
.Input("grad: T")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyAdadeltaShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the adadelta scheme.
|
||||
|
||||
accum = rho() * accum + (1 - rho()) * grad.square();
|
||||
update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad;
|
||||
update_accum = rho() * update_accum + (1 - rho()) * update.square();
|
||||
var -= update;
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
accum_update: Should be from a Variable().
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
rho: Decay factor. Must be a scalar.
|
||||
epsilon: Constant factor. Must be a scalar.
|
||||
grad: The gradient.
|
||||
use_locking: If True, updating of the var, accum and update_accum tensors will be protected by
|
||||
a lock; otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceSparseApplyAdadelta")
|
||||
.Input("var: resource")
|
||||
.Input("accum: resource")
|
||||
.Input("accum_update: resource")
|
||||
.Input("lr: T")
|
||||
.Input("rho: T")
|
||||
.Input("epsilon: T")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyAdadeltaShapeFn(c, true /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
accum_update:: Should be from a Variable().
|
||||
lr: Learning rate. Must be a scalar.
|
||||
rho: Decay factor. Must be a scalar.
|
||||
epsilon: Constant factor. Must be a scalar.
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var and accum.
|
||||
use_locking: If True, updating of the var and accum tensors will be protected by
|
||||
a lock; otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
} // namespace tensorflow
|
@ -1,168 +0,0 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/ops/training_ops.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
static Status ApplyAdagradDAShapeFn(InferenceContext* c, bool sparse) {
|
||||
ShapeHandle unused;
|
||||
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
|
||||
TF_RETURN_IF_ERROR(
|
||||
c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // grad_accumulator
|
||||
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 2),
|
||||
&s)); // gradient_squared_accumulator
|
||||
TF_RETURN_IF_ERROR(
|
||||
HandleGradAndIndicesInputs(c, sparse, 3 /* grad_idx */, &s));
|
||||
int idx = sparse ? 5 : 4;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // lr
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // l1
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // l2
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // global step
|
||||
if (c->num_outputs() > 0) {
|
||||
c->set_output(0, s);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
REGISTER_OP("ApplyAdagradDA")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("gradient_accumulator: Ref(T)")
|
||||
.Input("gradient_squared_accumulator: Ref(T)")
|
||||
.Input("grad: T")
|
||||
.Input("lr: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("global_step: int64")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyAdagradDAShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the proximal adagrad scheme.
|
||||
|
||||
var: Should be from a Variable().
|
||||
gradient_accumulator: Should be from a Variable().
|
||||
gradient_squared_accumulator: Should be from a Variable().
|
||||
grad: The gradient.
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: L2 regularization. Must be a scalar.
|
||||
global_step: Training step number. Must be a scalar.
|
||||
out: Same as "var".
|
||||
use_locking: If True, updating of the var and accum tensors will be protected by
|
||||
a lock; otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("SparseApplyAdagradDA")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("gradient_accumulator: Ref(T)")
|
||||
.Input("gradient_squared_accumulator: Ref(T)")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Input("lr: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("global_step: int64")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyAdagradDAShapeFn(c, true /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update entries in '*var' and '*accum' according to the proximal adagrad scheme.
|
||||
|
||||
var: Should be from a Variable().
|
||||
gradient_accumulator: Should be from a Variable().
|
||||
gradient_squared_accumulator: Should be from a Variable().
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var and accum.
|
||||
lr: Learning rate. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: L2 regularization. Must be a scalar.
|
||||
global_step: Training step number. Must be a scalar.
|
||||
out: Same as "var".
|
||||
use_locking: If True, updating of the var and accum tensors will be protected by
|
||||
a lock; otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceApplyAdagradDA")
|
||||
.Input("var: resource")
|
||||
.Input("gradient_accumulator: resource")
|
||||
.Input("gradient_squared_accumulator: resource")
|
||||
.Input("grad: T")
|
||||
.Input("lr: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("global_step: int64")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyAdagradDAShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the proximal adagrad scheme.
|
||||
|
||||
var: Should be from a Variable().
|
||||
gradient_accumulator: Should be from a Variable().
|
||||
gradient_squared_accumulator: Should be from a Variable().
|
||||
grad: The gradient.
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: L2 regularization. Must be a scalar.
|
||||
global_step: Training step number. Must be a scalar.
|
||||
use_locking: If True, updating of the var and accum tensors will be protected by
|
||||
a lock; otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceSparseApplyAdagradDA")
|
||||
.Input("var: resource")
|
||||
.Input("gradient_accumulator: resource")
|
||||
.Input("gradient_squared_accumulator: resource")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Input("lr: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("global_step: int64")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyAdagradDAShapeFn(c, true /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update entries in '*var' and '*accum' according to the proximal adagrad scheme.
|
||||
|
||||
var: Should be from a Variable().
|
||||
gradient_accumulator: Should be from a Variable().
|
||||
gradient_squared_accumulator: Should be from a Variable().
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var and accum.
|
||||
lr: Learning rate. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: L2 regularization. Must be a scalar.
|
||||
global_step: Training step number. Must be a scalar.
|
||||
use_locking: If True, updating of the var and accum tensors will be protected by
|
||||
a lock; otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
} // namespace tensorflow
|
@ -1,147 +0,0 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/ops/training_ops.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
static Status ApplyAdagradShapeFn(InferenceContext* c, bool sparse) {
|
||||
ShapeHandle unused;
|
||||
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
|
||||
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // accum
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); // lr
|
||||
TF_RETURN_IF_ERROR(
|
||||
HandleGradAndIndicesInputs(c, sparse, 3 /* grad_idx */, &s));
|
||||
if (c->num_outputs() > 0) {
|
||||
c->set_output(0, s);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
REGISTER_OP("ApplyAdagrad")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("accum: Ref(T)")
|
||||
.Input("lr: T")
|
||||
.Input("grad: T")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyAdagradShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the adagrad scheme.
|
||||
|
||||
accum += grad * grad
|
||||
var -= lr * grad * (1 / sqrt(accum))
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
grad: The gradient.
|
||||
out: Same as "var".
|
||||
use_locking: If `True`, updating of the var and accum tensors will be protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceApplyAdagrad")
|
||||
.Input("var: resource")
|
||||
.Input("accum: resource")
|
||||
.Input("lr: T")
|
||||
.Input("grad: T")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyAdagradShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the adagrad scheme.
|
||||
|
||||
accum += grad * grad
|
||||
var -= lr * grad * (1 / sqrt(accum))
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
grad: The gradient.
|
||||
use_locking: If `True`, updating of the var and accum tensors will be protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("SparseApplyAdagrad")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("accum: Ref(T)")
|
||||
.Input("lr: T")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyAdagradShapeFn(c, true /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update relevant entries in '*var' and '*accum' according to the adagrad scheme.
|
||||
|
||||
That is for rows we have grad for, we update var and accum as follows:
|
||||
accum += grad * grad
|
||||
var -= lr * grad * (1 / sqrt(accum))
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
lr: Learning rate. Must be a scalar.
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var and accum.
|
||||
out: Same as "var".
|
||||
use_locking: If `True`, updating of the var and accum tensors will be protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceSparseApplyAdagrad")
|
||||
.Input("var: resource")
|
||||
.Input("accum: resource")
|
||||
.Input("lr: T")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyAdagradShapeFn(c, true /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update relevant entries in '*var' and '*accum' according to the adagrad scheme.
|
||||
|
||||
That is for rows we have grad for, we update var and accum as follows:
|
||||
accum += grad * grad
|
||||
var -= lr * grad * (1 / sqrt(accum))
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
lr: Learning rate. Must be a scalar.
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var and accum.
|
||||
use_locking: If `True`, updating of the var and accum tensors will be protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
} // namespace tensorflow
|
@ -1,125 +0,0 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/ops/training_ops.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
static Status ApplyAdamShapeFn(InferenceContext* c, bool sparse) {
|
||||
ShapeHandle unused;
|
||||
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
|
||||
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // m
|
||||
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 2), &s)); // v
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); // beta1_power
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); // beta2_power
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); // lr
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(6), 0, &unused)); // beta1
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(7), 0, &unused)); // beta2
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(8), 0, &unused)); // epsilon
|
||||
TF_RETURN_IF_ERROR(
|
||||
HandleGradAndIndicesInputs(c, sparse, 9 /* grad_idx */, &s));
|
||||
if (c->num_outputs() > 0) {
|
||||
c->set_output(0, s);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
REGISTER_OP("ApplyAdam")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("m: Ref(T)")
|
||||
.Input("v: Ref(T)")
|
||||
.Input("beta1_power: T")
|
||||
.Input("beta2_power: T")
|
||||
.Input("lr: T")
|
||||
.Input("beta1: T")
|
||||
.Input("beta2: T")
|
||||
.Input("epsilon: T")
|
||||
.Input("grad: T")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.Attr("use_nesterov: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyAdamShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the Adam algorithm.
|
||||
|
||||
lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
|
||||
m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t
|
||||
v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t
|
||||
variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
|
||||
|
||||
var: Should be from a Variable().
|
||||
m: Should be from a Variable().
|
||||
v: Should be from a Variable().
|
||||
beta1_power: Must be a scalar.
|
||||
beta2_power: Must be a scalar.
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
beta1: Momentum factor. Must be a scalar.
|
||||
beta2: Momentum factor. Must be a scalar.
|
||||
epsilon: Ridge term. Must be a scalar.
|
||||
grad: The gradient.
|
||||
out: Same as "var".
|
||||
use_locking: If `True`, updating of the var, m, and v tensors will be protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
use_nesterov: If `True`, uses the nesterov update.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceApplyAdam")
|
||||
.Input("var: resource")
|
||||
.Input("m: resource")
|
||||
.Input("v: resource")
|
||||
.Input("beta1_power: T")
|
||||
.Input("beta2_power: T")
|
||||
.Input("lr: T")
|
||||
.Input("beta1: T")
|
||||
.Input("beta2: T")
|
||||
.Input("epsilon: T")
|
||||
.Input("grad: T")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.Attr("use_nesterov: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyAdamShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the Adam algorithm.
|
||||
|
||||
lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
|
||||
m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t
|
||||
v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t
|
||||
variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
|
||||
|
||||
var: Should be from a Variable().
|
||||
m: Should be from a Variable().
|
||||
v: Should be from a Variable().
|
||||
beta1_power: Must be a scalar.
|
||||
beta2_power: Must be a scalar.
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
beta1: Momentum factor. Must be a scalar.
|
||||
beta2: Momentum factor. Must be a scalar.
|
||||
epsilon: Ridge term. Must be a scalar.
|
||||
grad: The gradient.
|
||||
use_locking: If `True`, updating of the var, m, and v tensors will be protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
use_nesterov: If `True`, uses the nesterov update.
|
||||
)doc");
|
||||
|
||||
} // namespace tensorflow
|
@ -1,368 +0,0 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/ops/training_ops.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
static Status ApplyFtrlShapeFn(InferenceContext* c, bool sparse) {
|
||||
ShapeHandle unused;
|
||||
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
|
||||
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // accum
|
||||
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 2), &s)); // linear
|
||||
TF_RETURN_IF_ERROR(
|
||||
HandleGradAndIndicesInputs(c, sparse, 3 /* grad_idx */, &s));
|
||||
int idx = sparse ? 5 : 4;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // lr
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // l1
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // l2
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // lr_power
|
||||
if (c->num_outputs() > 0) {
|
||||
c->set_output(0, s);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
REGISTER_OP("ApplyFtrl")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("accum: Ref(T)")
|
||||
.Input("linear: Ref(T)")
|
||||
.Input("grad: T")
|
||||
.Input("lr: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("lr_power: T")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyFtrlShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the Ftrl-proximal scheme.
|
||||
|
||||
accum_new = accum + grad * grad
|
||||
linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
|
||||
quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
|
||||
var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
|
||||
accum = accum_new
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
linear: Should be from a Variable().
|
||||
grad: The gradient.
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
l1: L1 regulariation. Must be a scalar.
|
||||
l2: L2 regulariation. Must be a scalar.
|
||||
lr_power: Scaling factor. Must be a scalar.
|
||||
out: Same as "var".
|
||||
use_locking: If `True`, updating of the var and accum tensors will be protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("SparseApplyFtrl")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("accum: Ref(T)")
|
||||
.Input("linear: Ref(T)")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Input("lr: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("lr_power: T")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyFtrlShapeFn(c, true /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update relevant entries in '*var' according to the Ftrl-proximal scheme.
|
||||
|
||||
That is for rows we have grad for, we update var, accum and linear as follows:
|
||||
accum_new = accum + grad * grad
|
||||
linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
|
||||
quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
|
||||
var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
|
||||
accum = accum_new
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
linear: Should be from a Variable().
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var and accum.
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: L2 regularization. Must be a scalar.
|
||||
lr_power: Scaling factor. Must be a scalar.
|
||||
out: Same as "var".
|
||||
use_locking: If `True`, updating of the var and accum tensors will be protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceApplyFtrl")
|
||||
.Input("var: resource")
|
||||
.Input("accum: resource")
|
||||
.Input("linear: resource")
|
||||
.Input("grad: T")
|
||||
.Input("lr: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("lr_power: T")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyFtrlShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the Ftrl-proximal scheme.
|
||||
|
||||
accum_new = accum + grad * grad
|
||||
linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
|
||||
quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
|
||||
var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
|
||||
accum = accum_new
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
linear: Should be from a Variable().
|
||||
grad: The gradient.
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
l1: L1 regulariation. Must be a scalar.
|
||||
l2: L2 regulariation. Must be a scalar.
|
||||
lr_power: Scaling factor. Must be a scalar.
|
||||
use_locking: If `True`, updating of the var and accum tensors will be protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceSparseApplyFtrl")
|
||||
.Input("var: resource")
|
||||
.Input("accum: resource")
|
||||
.Input("linear: resource")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Input("lr: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("lr_power: T")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyFtrlShapeFn(c, true /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update relevant entries in '*var' according to the Ftrl-proximal scheme.
|
||||
|
||||
That is for rows we have grad for, we update var, accum and linear as follows:
|
||||
accum_new = accum + grad * grad
|
||||
linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
|
||||
quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
|
||||
var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
|
||||
accum = accum_new
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
linear: Should be from a Variable().
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var and accum.
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: L2 regularization. Must be a scalar.
|
||||
lr_power: Scaling factor. Must be a scalar.
|
||||
use_locking: If `True`, updating of the var and accum tensors will be protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ApplyFtrlV2")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("accum: Ref(T)")
|
||||
.Input("linear: Ref(T)")
|
||||
.Input("grad: T")
|
||||
.Input("lr: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("l2_shrinkage: T")
|
||||
.Input("lr_power: T")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyFtrlShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the Ftrl-proximal scheme.
|
||||
|
||||
grad_with_shrinkage = grad + 2 * l2_shrinkage * var
|
||||
accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
|
||||
linear += grad_with_shrinkage +
|
||||
(accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
|
||||
quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
|
||||
var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
|
||||
accum = accum_new
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
linear: Should be from a Variable().
|
||||
grad: The gradient.
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
l1: L1 regulariation. Must be a scalar.
|
||||
l2: online L2 regulariation. Must be a scalar.
|
||||
l2: L2 shrinkage regulariation. Must be a scalar.
|
||||
lr_power: Scaling factor. Must be a scalar.
|
||||
out: Same as "var".
|
||||
use_locking: If `True`, updating of the var and accum tensors will be protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("SparseApplyFtrlV2")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("accum: Ref(T)")
|
||||
.Input("linear: Ref(T)")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Input("lr: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("l2_shrinkage: T")
|
||||
.Input("lr_power: T")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyFtrlShapeFn(c, true /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update relevant entries in '*var' according to the Ftrl-proximal scheme.
|
||||
|
||||
That is for rows we have grad for, we update var, accum and linear as follows:
|
||||
grad_with_shrinkage = grad + 2 * l2_shrinkage * var
|
||||
accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
|
||||
linear += grad_with_shrinkage +
|
||||
(accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
|
||||
quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
|
||||
var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
|
||||
accum = accum_new
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
linear: Should be from a Variable().
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var and accum.
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: onine L2 regularization. Must be a scalar.
|
||||
l2: L2 shrinkage regulariation. Must be a scalar.
|
||||
lr_power: Scaling factor. Must be a scalar.
|
||||
out: Same as "var".
|
||||
use_locking: If `True`, updating of the var and accum tensors will be protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceApplyFtrlV2")
|
||||
.Input("var: resource")
|
||||
.Input("accum: resource")
|
||||
.Input("linear: resource")
|
||||
.Input("grad: T")
|
||||
.Input("lr: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("l2_shrinkage: T")
|
||||
.Input("lr_power: T")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyFtrlShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the Ftrl-proximal scheme.
|
||||
|
||||
grad_with_shrinkage = grad + 2 * l2_shrinkage * var
|
||||
accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
|
||||
linear += grad_with_shrinkage +
|
||||
(accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
|
||||
quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
|
||||
var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
|
||||
accum = accum_new
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
linear: Should be from a Variable().
|
||||
grad: The gradient.
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
l1: L1 regulariation. Must be a scalar.
|
||||
l2: onine L2 regularization. Must be a scalar.
|
||||
l2: L2 shrinkage regulariation. Must be a scalar.
|
||||
lr_power: Scaling factor. Must be a scalar.
|
||||
use_locking: If `True`, updating of the var and accum tensors will be protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceSparseApplyFtrlV2")
|
||||
.Input("var: resource")
|
||||
.Input("accum: resource")
|
||||
.Input("linear: resource")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Input("lr: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("l2_shrinkage: T")
|
||||
.Input("lr_power: T")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyFtrlShapeFn(c, true /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update relevant entries in '*var' according to the Ftrl-proximal scheme.
|
||||
|
||||
That is for rows we have grad for, we update var, accum and linear as follows:
|
||||
grad_with_shrinkage = grad + 2 * l2_shrinkage * var
|
||||
accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
|
||||
linear += grad_with_shrinkage +
|
||||
(accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
|
||||
quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
|
||||
var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
|
||||
accum = accum_new
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
linear: Should be from a Variable().
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var and accum.
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: onine L2 regularization. Must be a scalar.
|
||||
l2: L2 shrinkage regulariation. Must be a scalar.
|
||||
lr_power: Scaling factor. Must be a scalar.
|
||||
use_locking: If `True`, updating of the var and accum tensors will be protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
} // namespace tensorflow
|
@ -1,69 +0,0 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/ops/training_ops.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
static Status ApplyGradientDescentShapeFn(InferenceContext* c) {
|
||||
ShapeHandle unused;
|
||||
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); // alpha
|
||||
TF_RETURN_IF_ERROR(c->Merge(s, c->input(2), &s)); // delta
|
||||
if (c->num_outputs() > 0) {
|
||||
c->set_output(0, s);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
REGISTER_OP("ApplyGradientDescent")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("alpha: T")
|
||||
.Input("delta: T")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn(ApplyGradientDescentShapeFn)
|
||||
.Doc(R"doc(
|
||||
Update '*var' by subtracting 'alpha' * 'delta' from it.
|
||||
|
||||
var: Should be from a Variable().
|
||||
alpha: Scaling factor. Must be a scalar.
|
||||
delta: The change.
|
||||
out: Same as "var".
|
||||
use_locking: If `True`, the subtraction will be protected by a lock;
|
||||
otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceApplyGradientDescent")
|
||||
.Input("var: resource")
|
||||
.Input("alpha: T")
|
||||
.Input("delta: T")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn(ApplyGradientDescentShapeFn)
|
||||
.Doc(R"doc(
|
||||
Update '*var' by subtracting 'alpha' * 'delta' from it.
|
||||
|
||||
var: Should be from a Variable().
|
||||
alpha: Scaling factor. Must be a scalar.
|
||||
delta: The change.
|
||||
use_locking: If `True`, the subtraction will be protected by a lock;
|
||||
otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
} // namespace tensorflow
|
@ -1,179 +0,0 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/ops/training_ops.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
static Status ApplyMomentumShapeFn(InferenceContext* c, bool sparse) {
|
||||
ShapeHandle unused;
|
||||
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
|
||||
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // accum
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); // lr
|
||||
TF_RETURN_IF_ERROR(
|
||||
HandleGradAndIndicesInputs(c, sparse, 3 /* grad_idx */, &s));
|
||||
int idx = sparse ? 5 : 4;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // momentum
|
||||
if (c->num_outputs() > 0) {
|
||||
c->set_output(0, s);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
REGISTER_OP("ApplyMomentum")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("accum: Ref(T)")
|
||||
.Input("lr: T")
|
||||
.Input("grad: T")
|
||||
.Input("momentum: T")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.Attr("use_nesterov: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyMomentumShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the momentum scheme. Set use_nesterov = True if you
|
||||
want to use Nesterov momentum.
|
||||
|
||||
accum = accum * momentum + grad
|
||||
var -= lr * accum
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
grad: The gradient.
|
||||
momentum: Momentum. Must be a scalar.
|
||||
out: Same as "var".
|
||||
use_locking: If `True`, updating of the var and accum tensors will be protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
use_nesterov: If `True`, the tensor passed to compute grad will be
|
||||
var - lr * momentum * accum, so in the end, the var you get is actually
|
||||
var - lr * momentum * accum.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("SparseApplyMomentum")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("accum: Ref(T)")
|
||||
.Input("lr: T")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Input("momentum: T")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.Attr("use_nesterov: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyMomentumShapeFn(c, true /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update relevant entries in '*var' and '*accum' according to the momentum scheme.
|
||||
Set use_nesterov = True if you want to use Nesterov momentum.
|
||||
|
||||
That is for rows we have grad for, we update var and accum as follows:
|
||||
|
||||
accum = accum * momentum + grad
|
||||
var -= lr * accum
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
lr: Learning rate. Must be a scalar.
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var and accum.
|
||||
momentum: Momentum. Must be a scalar.
|
||||
out: Same as "var".
|
||||
use_locking: If `True`, updating of the var and accum tensors will be protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
use_nesterov: If `True`, the tensor passed to compute grad will be
|
||||
var - lr * momentum * accum, so in the end, the var you get is actually
|
||||
var - lr * momentum * accum.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceApplyMomentum")
|
||||
.Input("var: resource")
|
||||
.Input("accum: resource")
|
||||
.Input("lr: T")
|
||||
.Input("grad: T")
|
||||
.Input("momentum: T")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.Attr("use_nesterov: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyMomentumShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the momentum scheme. Set use_nesterov = True if you
|
||||
want to use Nesterov momentum.
|
||||
|
||||
accum = accum * momentum + grad
|
||||
var -= lr * accum
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
grad: The gradient.
|
||||
momentum: Momentum. Must be a scalar.
|
||||
use_locking: If `True`, updating of the var and accum tensors will be protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
use_nesterov: If `True`, the tensor passed to compute grad will be
|
||||
var - lr * momentum * accum, so in the end, the var you get is actually
|
||||
var - lr * momentum * accum.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceSparseApplyMomentum")
|
||||
.Input("var: resource")
|
||||
.Input("accum: resource")
|
||||
.Input("lr: T")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Input("momentum: T")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.Attr("use_nesterov: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyMomentumShapeFn(c, true /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update relevant entries in '*var' and '*accum' according to the momentum scheme.
|
||||
Set use_nesterov = True if you want to use Nesterov momentum.
|
||||
|
||||
That is for rows we have grad for, we update var and accum as follows:
|
||||
|
||||
accum = accum * momentum + grad
|
||||
var -= lr * accum
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
lr: Learning rate. Must be a scalar.
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var and accum.
|
||||
momentum: Momentum. Must be a scalar.
|
||||
use_locking: If `True`, updating of the var and accum tensors will be protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
use_nesterov: If `True`, the tensor passed to compute grad will be
|
||||
var - lr * momentum * accum, so in the end, the var you get is actually
|
||||
var - lr * momentum * accum.
|
||||
)doc");
|
||||
|
||||
} // namespace tensorflow
|
@ -1,183 +0,0 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/ops/training_ops.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
static Status ApplyProximalAdagradShapeFn(InferenceContext* c, bool sparse) {
|
||||
ShapeHandle unused;
|
||||
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
|
||||
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // accum
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); // lr
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); // l1
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); // l2
|
||||
TF_RETURN_IF_ERROR(
|
||||
HandleGradAndIndicesInputs(c, sparse, 5 /* grad_idx */, &s));
|
||||
if (c->num_outputs() > 0) {
|
||||
c->set_output(0, s);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
REGISTER_OP("ApplyProximalAdagrad")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("accum: Ref(T)")
|
||||
.Input("lr: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("grad: T")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyProximalAdagradShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' and '*accum' according to FOBOS with Adagrad learning rate.
|
||||
accum += grad * grad
|
||||
prox_v = var - lr * grad * (1 / sqrt(accum))
|
||||
var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
grad: The gradient.
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: L2 regularization. Must be a scalar.
|
||||
out: Same as "var".
|
||||
use_locking: If True, updating of the var and accum tensors will be protected by
|
||||
a lock; otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceApplyProximalAdagrad")
|
||||
.Input("var: resource")
|
||||
.Input("accum: resource")
|
||||
.Input("lr: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("grad: T")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyProximalAdagradShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' and '*accum' according to FOBOS with Adagrad learning rate.
|
||||
accum += grad * grad
|
||||
prox_v = var - lr * grad * (1 / sqrt(accum))
|
||||
var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
grad: The gradient.
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: L2 regularization. Must be a scalar.
|
||||
use_locking: If True, updating of the var and accum tensors will be protected by
|
||||
a lock; otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("SparseApplyProximalAdagrad")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("accum: Ref(T)")
|
||||
.Input("lr: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyProximalAdagradShapeFn(c, true /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Sparse update entries in '*var' and '*accum' according to FOBOS algorithm.
|
||||
|
||||
That is for rows we have grad for, we update var and accum as follows:
|
||||
accum += grad * grad
|
||||
prox_v = var
|
||||
prox_v -= lr * grad * (1 / sqrt(accum))
|
||||
var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
lr: Learning rate. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: L2 regularization. Must be a scalar.
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var and accum.
|
||||
out: Same as "var".
|
||||
use_locking: If True, updating of the var and accum tensors will be protected by
|
||||
a lock; otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceSparseApplyProximalAdagrad")
|
||||
.Input("var: resource")
|
||||
.Input("accum: resource")
|
||||
.Input("lr: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyProximalAdagradShapeFn(c, true /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Sparse update entries in '*var' and '*accum' according to FOBOS algorithm.
|
||||
|
||||
That is for rows we have grad for, we update var and accum as follows:
|
||||
accum += grad * grad
|
||||
prox_v = var
|
||||
prox_v -= lr * grad * (1 / sqrt(accum))
|
||||
var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
|
||||
|
||||
var: Should be from a Variable().
|
||||
accum: Should be from a Variable().
|
||||
lr: Learning rate. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: L2 regularization. Must be a scalar.
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var and accum.
|
||||
use_locking: If True, updating of the var and accum tensors will be protected by
|
||||
a lock; otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
static Status ApplyFtrlShapeFn(InferenceContext* c, bool sparse) {
|
||||
ShapeHandle unused;
|
||||
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
|
||||
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // accum
|
||||
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 2), &s)); // linear
|
||||
TF_RETURN_IF_ERROR(
|
||||
HandleGradAndIndicesInputs(c, sparse, 3 /* grad_idx */, &s));
|
||||
int idx = sparse ? 5 : 4;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // lr
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // l1
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // l2
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // lr_power
|
||||
if (c->num_outputs() > 0) {
|
||||
c->set_output(0, s);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace tensorflow
|
@ -1,151 +0,0 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/ops/training_ops.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
static Status ApplyProximalGradientDescentShapeFn(InferenceContext* c,
|
||||
bool sparse) {
|
||||
ShapeHandle unused;
|
||||
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); // alpha
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); // l1
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); // l2
|
||||
TF_RETURN_IF_ERROR(
|
||||
HandleGradAndIndicesInputs(c, sparse, 4 /* grad_idx */, &s));
|
||||
if (c->num_outputs() > 0) {
|
||||
c->set_output(0, s);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
REGISTER_OP("ApplyProximalGradientDescent")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("alpha: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("delta: T")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyProximalGradientDescentShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' as FOBOS algorithm with fixed learning rate.
|
||||
prox_v = var - alpha * delta
|
||||
var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
|
||||
|
||||
var: Should be from a Variable().
|
||||
alpha: Scaling factor. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: L2 regularization. Must be a scalar.
|
||||
delta: The change.
|
||||
out: Same as "var".
|
||||
use_locking: If True, the subtraction will be protected by a lock;
|
||||
otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("SparseApplyProximalGradientDescent")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("alpha: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyProximalGradientDescentShapeFn(c, true /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Sparse update '*var' as FOBOS algorithm with fixed learning rate.
|
||||
|
||||
That is for rows we have grad for, we update var as follows:
|
||||
prox_v = var - alpha * grad
|
||||
var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
|
||||
|
||||
var: Should be from a Variable().
|
||||
alpha: Scaling factor. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: L2 regularization. Must be a scalar.
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var and accum.
|
||||
out: Same as "var".
|
||||
use_locking: If True, the subtraction will be protected by a lock;
|
||||
otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceApplyProximalGradientDescent")
|
||||
.Input("var: resource")
|
||||
.Input("alpha: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("delta: T")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyProximalGradientDescentShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' as FOBOS algorithm with fixed learning rate.
|
||||
prox_v = var - alpha * delta
|
||||
var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
|
||||
|
||||
var: Should be from a Variable().
|
||||
alpha: Scaling factor. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: L2 regularization. Must be a scalar.
|
||||
delta: The change.
|
||||
use_locking: If True, the subtraction will be protected by a lock;
|
||||
otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceSparseApplyProximalGradientDescent")
|
||||
.Input("var: resource")
|
||||
.Input("alpha: T")
|
||||
.Input("l1: T")
|
||||
.Input("l2: T")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyProximalGradientDescentShapeFn(c, true /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Sparse update '*var' as FOBOS algorithm with fixed learning rate.
|
||||
|
||||
That is for rows we have grad for, we update var as follows:
|
||||
prox_v = var - alpha * grad
|
||||
var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
|
||||
|
||||
var: Should be from a Variable().
|
||||
alpha: Scaling factor. Must be a scalar.
|
||||
l1: L1 regularization. Must be a scalar.
|
||||
l2: L2 regularization. Must be a scalar.
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var and accum.
|
||||
use_locking: If True, the subtraction will be protected by a lock;
|
||||
otherwise the behavior is undefined, but may exhibit less contention.
|
||||
)doc");
|
||||
|
||||
} // namespace tensorflow
|
@ -1,425 +0,0 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/ops/training_ops.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
static Status ApplyRMSPropShapeFn(InferenceContext* c, bool sparse) {
|
||||
ShapeHandle unused;
|
||||
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
|
||||
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // ms
|
||||
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 2), &s)); // mom
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); // lr
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); // rho
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); // momentum
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(6), 0, &unused)); // epsilon
|
||||
TF_RETURN_IF_ERROR(
|
||||
HandleGradAndIndicesInputs(c, sparse, 7 /* grad_idx */, &s));
|
||||
if (c->num_outputs() > 0) {
|
||||
c->set_output(0, s);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
static Status ApplyCenteredRMSPropShapeFn(InferenceContext* c, bool sparse) {
|
||||
ShapeHandle unused;
|
||||
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
|
||||
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // ms
|
||||
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 2), &s)); // mg
|
||||
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 3), &s)); // mom
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); // lr
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); // rho
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(6), 0, &unused)); // momentum
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(7), 0, &unused)); // epsilon
|
||||
TF_RETURN_IF_ERROR(
|
||||
HandleGradAndIndicesInputs(c, sparse, 8 /* grad_idx */, &s));
|
||||
if (c->num_outputs() > 0) {
|
||||
c->set_output(0, s);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
REGISTER_OP("ApplyRMSProp")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("ms: Ref(T)")
|
||||
.Input("mom: Ref(T)")
|
||||
.Input("lr: T")
|
||||
.Input("rho: T")
|
||||
.Input("momentum: T")
|
||||
.Input("epsilon: T")
|
||||
.Input("grad: T")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyRMSPropShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the RMSProp algorithm.
|
||||
Note that in dense implementation of this algorithm, ms and mom will
|
||||
update even if the grad is zero, but in this sparse implementation, ms
|
||||
and mom will not update in iterations during which the grad is zero.
|
||||
|
||||
mean_square = decay * mean_square + (1-decay) * gradient ** 2
|
||||
Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
|
||||
|
||||
ms <- rho * ms_{t-1} + (1-rho) * grad * grad
|
||||
mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
|
||||
var <- var - mom
|
||||
|
||||
var: Should be from a Variable().
|
||||
ms: Should be from a Variable().
|
||||
mom: Should be from a Variable().
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
epsilon: Ridge term. Must be a scalar.
|
||||
rho: Decay rate. Must be a scalar.
|
||||
grad: The gradient.
|
||||
out: Same as "var".
|
||||
use_locking: If `True`, updating of the var, ms, and mom tensors is protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ApplyCenteredRMSProp")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("mg: Ref(T)")
|
||||
.Input("ms: Ref(T)")
|
||||
.Input("mom: Ref(T)")
|
||||
.Input("lr: T")
|
||||
.Input("rho: T")
|
||||
.Input("momentum: T")
|
||||
.Input("epsilon: T")
|
||||
.Input("grad: T")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyCenteredRMSPropShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the centered RMSProp algorithm.
|
||||
The centered RMSProp algorithm uses an estimate of the centered second moment
|
||||
(i.e., the variance) for normalization, as opposed to regular RMSProp, which
|
||||
uses the (uncentered) second moment. This often helps with training, but is
|
||||
slightly more expensive in terms of computation and memory.
|
||||
|
||||
Note that in dense implementation of this algorithm, mg, ms, and mom will
|
||||
update even if the grad is zero, but in this sparse implementation, mg, ms,
|
||||
and mom will not update in iterations during which the grad is zero.
|
||||
|
||||
mean_square = decay * mean_square + (1-decay) * gradient ** 2
|
||||
mean_grad = decay * mean_grad + (1-decay) * gradient
|
||||
|
||||
Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
|
||||
|
||||
mg <- rho * mg_{t-1} + (1-rho) * grad
|
||||
ms <- rho * ms_{t-1} + (1-rho) * grad * grad
|
||||
mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)
|
||||
var <- var - mom
|
||||
|
||||
var: Should be from a Variable().
|
||||
mg: Should be from a Variable().
|
||||
ms: Should be from a Variable().
|
||||
mom: Should be from a Variable().
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
epsilon: Ridge term. Must be a scalar.
|
||||
rho: Decay rate. Must be a scalar.
|
||||
grad: The gradient.
|
||||
out: Same as "var".
|
||||
use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
|
||||
protected by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("SparseApplyRMSProp")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("ms: Ref(T)")
|
||||
.Input("mom: Ref(T)")
|
||||
.Input("lr: T")
|
||||
.Input("rho: T")
|
||||
.Input("momentum: T")
|
||||
.Input("epsilon: T")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyRMSPropShapeFn(c, true /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the RMSProp algorithm.
|
||||
Note that in dense implementation of this algorithm, ms and mom will
|
||||
update even if the grad is zero, but in this sparse implementation, ms
|
||||
and mom will not update in iterations during which the grad is zero.
|
||||
|
||||
mean_square = decay * mean_square + (1-decay) * gradient ** 2
|
||||
Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
|
||||
|
||||
ms <- rho * ms_{t-1} + (1-rho) * grad * grad
|
||||
mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
|
||||
var <- var - mom
|
||||
|
||||
var: Should be from a Variable().
|
||||
ms: Should be from a Variable().
|
||||
mom: Should be from a Variable().
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
epsilon: Ridge term. Must be a scalar.
|
||||
rho: Decay rate. Must be a scalar.
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var, ms and mom.
|
||||
out: Same as "var".
|
||||
use_locking: If `True`, updating of the var, ms, and mom tensors is protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("SparseApplyCenteredRMSProp")
|
||||
.Input("var: Ref(T)")
|
||||
.Input("mg: Ref(T)")
|
||||
.Input("ms: Ref(T)")
|
||||
.Input("mom: Ref(T)")
|
||||
.Input("lr: T")
|
||||
.Input("rho: T")
|
||||
.Input("momentum: T")
|
||||
.Input("epsilon: T")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Output("out: Ref(T)")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyCenteredRMSPropShapeFn(c, true /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the centered RMSProp algorithm.
|
||||
The centered RMSProp algorithm uses an estimate of the centered second moment
|
||||
(i.e., the variance) for normalization, as opposed to regular RMSProp, which
|
||||
uses the (uncentered) second moment. This often helps with training, but is
|
||||
slightly more expensive in terms of computation and memory.
|
||||
|
||||
Note that in dense implementation of this algorithm, mg, ms, and mom will
|
||||
update even if the grad is zero, but in this sparse implementation, mg, ms,
|
||||
and mom will not update in iterations during which the grad is zero.
|
||||
|
||||
mean_square = decay * mean_square + (1-decay) * gradient ** 2
|
||||
mean_grad = decay * mean_grad + (1-decay) * gradient
|
||||
Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
|
||||
|
||||
ms <- rho * ms_{t-1} + (1-rho) * grad * grad
|
||||
mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
|
||||
var <- var - mom
|
||||
|
||||
var: Should be from a Variable().
|
||||
mg: Should be from a Variable().
|
||||
ms: Should be from a Variable().
|
||||
mom: Should be from a Variable().
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
epsilon: Ridge term. Must be a scalar.
|
||||
rho: Decay rate. Must be a scalar.
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var, ms and mom.
|
||||
out: Same as "var".
|
||||
use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
|
||||
protected by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceApplyRMSProp")
|
||||
.Input("var: resource")
|
||||
.Input("ms: resource")
|
||||
.Input("mom: resource")
|
||||
.Input("lr: T")
|
||||
.Input("rho: T")
|
||||
.Input("momentum: T")
|
||||
.Input("epsilon: T")
|
||||
.Input("grad: T")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyRMSPropShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the RMSProp algorithm.
|
||||
Note that in dense implementation of this algorithm, ms and mom will
|
||||
update even if the grad is zero, but in this sparse implementation, ms
|
||||
and mom will not update in iterations during which the grad is zero.
|
||||
|
||||
mean_square = decay * mean_square + (1-decay) * gradient ** 2
|
||||
Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
|
||||
|
||||
ms <- rho * ms_{t-1} + (1-rho) * grad * grad
|
||||
mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
|
||||
var <- var - mom
|
||||
|
||||
var: Should be from a Variable().
|
||||
ms: Should be from a Variable().
|
||||
mom: Should be from a Variable().
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
epsilon: Ridge term. Must be a scalar.
|
||||
rho: Decay rate. Must be a scalar.
|
||||
grad: The gradient.
|
||||
use_locking: If `True`, updating of the var, ms, and mom tensors is protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceApplyCenteredRMSProp")
|
||||
.Input("var: resource")
|
||||
.Input("mg: resource")
|
||||
.Input("ms: resource")
|
||||
.Input("mom: resource")
|
||||
.Input("lr: T")
|
||||
.Input("rho: T")
|
||||
.Input("momentum: T")
|
||||
.Input("epsilon: T")
|
||||
.Input("grad: T")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyCenteredRMSPropShapeFn(c, false /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the centered RMSProp algorithm.
|
||||
The centered RMSProp algorithm uses an estimate of the centered second moment
|
||||
(i.e., the variance) for normalization, as opposed to regular RMSProp, which
|
||||
uses the (uncentered) second moment. This often helps with training, but is
|
||||
slightly more expensive in terms of computation and memory.
|
||||
|
||||
Note that in dense implementation of this algorithm, mg, ms, and mom will
|
||||
update even if the grad is zero, but in this sparse implementation, mg, ms,
|
||||
and mom will not update in iterations during which the grad is zero.
|
||||
|
||||
mean_square = decay * mean_square + (1-decay) * gradient ** 2
|
||||
mean_grad = decay * mean_grad + (1-decay) * gradient
|
||||
|
||||
Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
|
||||
|
||||
mg <- rho * mg_{t-1} + (1-rho) * grad
|
||||
ms <- rho * ms_{t-1} + (1-rho) * grad * grad
|
||||
mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)
|
||||
var <- var - mom
|
||||
|
||||
var: Should be from a Variable().
|
||||
mg: Should be from a Variable().
|
||||
ms: Should be from a Variable().
|
||||
mom: Should be from a Variable().
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
epsilon: Ridge term. Must be a scalar.
|
||||
rho: Decay rate. Must be a scalar.
|
||||
grad: The gradient.
|
||||
use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
|
||||
protected by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceSparseApplyRMSProp")
|
||||
.Input("var: resource")
|
||||
.Input("ms: resource")
|
||||
.Input("mom: resource")
|
||||
.Input("lr: T")
|
||||
.Input("rho: T")
|
||||
.Input("momentum: T")
|
||||
.Input("epsilon: T")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyRMSPropShapeFn(c, true /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the RMSProp algorithm.
|
||||
Note that in dense implementation of this algorithm, ms and mom will
|
||||
update even if the grad is zero, but in this sparse implementation, ms
|
||||
and mom will not update in iterations during which the grad is zero.
|
||||
|
||||
mean_square = decay * mean_square + (1-decay) * gradient ** 2
|
||||
Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
|
||||
|
||||
ms <- rho * ms_{t-1} + (1-rho) * grad * grad
|
||||
mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
|
||||
var <- var - mom
|
||||
|
||||
var: Should be from a Variable().
|
||||
ms: Should be from a Variable().
|
||||
mom: Should be from a Variable().
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
epsilon: Ridge term. Must be a scalar.
|
||||
rho: Decay rate. Must be a scalar.
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var, ms and mom.
|
||||
use_locking: If `True`, updating of the var, ms, and mom tensors is protected
|
||||
by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
REGISTER_OP("ResourceSparseApplyCenteredRMSProp")
|
||||
.Input("var: resource")
|
||||
.Input("mg: resource")
|
||||
.Input("ms: resource")
|
||||
.Input("mom: resource")
|
||||
.Input("lr: T")
|
||||
.Input("rho: T")
|
||||
.Input("momentum: T")
|
||||
.Input("epsilon: T")
|
||||
.Input("grad: T")
|
||||
.Input("indices: Tindices")
|
||||
.Attr("T: numbertype")
|
||||
.Attr("Tindices: {int32, int64}")
|
||||
.Attr("use_locking: bool = false")
|
||||
.SetShapeFn([](InferenceContext* c) {
|
||||
return ApplyCenteredRMSPropShapeFn(c, true /* sparse */);
|
||||
})
|
||||
.Doc(R"doc(
|
||||
Update '*var' according to the centered RMSProp algorithm.
|
||||
The centered RMSProp algorithm uses an estimate of the centered second moment
|
||||
(i.e., the variance) for normalization, as opposed to regular RMSProp, which
|
||||
uses the (uncentered) second moment. This often helps with training, but is
|
||||
slightly more expensive in terms of computation and memory.
|
||||
|
||||
Note that in dense implementation of this algorithm, mg, ms, and mom will
|
||||
update even if the grad is zero, but in this sparse implementation, mg, ms,
|
||||
and mom will not update in iterations during which the grad is zero.
|
||||
|
||||
mean_square = decay * mean_square + (1-decay) * gradient ** 2
|
||||
mean_grad = decay * mean_grad + (1-decay) * gradient
|
||||
Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
|
||||
|
||||
ms <- rho * ms_{t-1} + (1-rho) * grad * grad
|
||||
mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
|
||||
var <- var - mom
|
||||
|
||||
var: Should be from a Variable().
|
||||
mg: Should be from a Variable().
|
||||
ms: Should be from a Variable().
|
||||
mom: Should be from a Variable().
|
||||
lr: Scaling factor. Must be a scalar.
|
||||
epsilon: Ridge term. Must be a scalar.
|
||||
rho: Decay rate. Must be a scalar.
|
||||
grad: The gradient.
|
||||
indices: A vector of indices into the first dimension of var, ms and mom.
|
||||
use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
|
||||
protected by a lock; otherwise the behavior is undefined, but may exhibit less
|
||||
contention.
|
||||
)doc");
|
||||
|
||||
} // namespace tensorflow
|
1799
tensorflow/core/ops/training_ops.cc
Normal file
1799
tensorflow/core/ops/training_ops.cc
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,64 +0,0 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CORE_OPS_TRAINING_OPS_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CORE_OPS_TRAINING_OPS_H_
|
||||
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/shape_inference.h"
|
||||
#include "tensorflow/core/ops/training_ops.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
using shape_inference::DimensionHandle;
|
||||
using shape_inference::InferenceContext;
|
||||
using shape_inference::ShapeHandle;
|
||||
|
||||
static ShapeHandle ShapeOrHandleShape(InferenceContext* c, int input) {
|
||||
auto* handle_data = c->input_handle_shapes_and_types(input);
|
||||
if (handle_data != nullptr && !handle_data->empty() &&
|
||||
(*handle_data)[0].dtype != DT_INVALID) {
|
||||
return (*handle_data)[0].shape;
|
||||
}
|
||||
return c->input(input);
|
||||
}
|
||||
|
||||
// Handle the gradient and, if <sparse>, indices inputs.
|
||||
// <s> is an input+output parameter, containing the current known input shape to
|
||||
// the gradient.
|
||||
static Status HandleGradAndIndicesInputs(InferenceContext* c, bool sparse,
|
||||
int grad_idx, ShapeHandle* s) {
|
||||
ShapeHandle grad = ShapeOrHandleShape(c, grad_idx);
|
||||
if (!sparse) {
|
||||
TF_RETURN_IF_ERROR(c->Merge(*s, grad, s));
|
||||
return Status::OK();
|
||||
}
|
||||
// Indices is a vector where indices.dim[0].rank == grad[0].rank.
|
||||
ShapeHandle indices;
|
||||
TF_RETURN_IF_ERROR(c->WithRank(c->input(grad_idx + 1), 1, &indices));
|
||||
DimensionHandle unused;
|
||||
TF_RETURN_IF_ERROR(c->Merge(c->Dim(indices, 0), c->Dim(grad, 0), &unused));
|
||||
|
||||
// Trailing part of grad matches trailing part of *s.
|
||||
ShapeHandle grad_unknown_first;
|
||||
TF_RETURN_IF_ERROR(
|
||||
c->ReplaceDim(grad, 0, c->UnknownDim(), &grad_unknown_first));
|
||||
TF_RETURN_IF_ERROR(c->Merge(*s, grad_unknown_first, s));
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CORE_OPS_TRAINING_OPS_H_
|
@ -65,16 +65,7 @@ tf_java_op_gen_srcjar(
|
||||
"sparse_ops",
|
||||
"state_ops",
|
||||
"string_ops",
|
||||
"adadelta_ops",
|
||||
"adagrad_da_ops",
|
||||
"adagrad_ops",
|
||||
"adam_ops",
|
||||
"ftrl_ops",
|
||||
"momentum_ops",
|
||||
"gradient_descent_ops",
|
||||
"proximal_adagrad_ops",
|
||||
"proximal_gradient_descent_ops",
|
||||
"rms_prop_ops",
|
||||
"training_ops",
|
||||
"user_ops",
|
||||
],
|
||||
)
|
||||
|
@ -1363,53 +1363,8 @@ tf_gen_op_wrapper_private_py(
|
||||
)
|
||||
|
||||
tf_gen_op_wrapper_private_py(
|
||||
name = "adagrad_ops_gen",
|
||||
out = "training/gen_adagrad_ops.py",
|
||||
)
|
||||
|
||||
tf_gen_op_wrapper_private_py(
|
||||
name = "adagrad_da_ops_gen",
|
||||
out = "training/gen_adagrad_da_ops.py",
|
||||
)
|
||||
|
||||
tf_gen_op_wrapper_private_py(
|
||||
name = "adadelta_ops_gen",
|
||||
out = "training/gen_adadelta_ops.py",
|
||||
)
|
||||
|
||||
tf_gen_op_wrapper_private_py(
|
||||
name = "adam_ops_gen",
|
||||
out = "training/gen_adam_ops.py",
|
||||
)
|
||||
|
||||
tf_gen_op_wrapper_private_py(
|
||||
name = "ftrl_ops_gen",
|
||||
out = "training/gen_ftrl_ops.py",
|
||||
)
|
||||
|
||||
tf_gen_op_wrapper_private_py(
|
||||
name = "gradient_descent_ops_gen",
|
||||
out = "training/gen_gradient_descent_ops.py",
|
||||
)
|
||||
|
||||
tf_gen_op_wrapper_private_py(
|
||||
name = "momentum_ops_gen",
|
||||
out = "training/gen_momentum_ops.py",
|
||||
)
|
||||
|
||||
tf_gen_op_wrapper_private_py(
|
||||
name = "proximal_adagrad_ops_gen",
|
||||
out = "training/gen_proximal_adagrad_ops.py",
|
||||
)
|
||||
|
||||
tf_gen_op_wrapper_private_py(
|
||||
name = "proximal_gradient_descent_ops_gen",
|
||||
out = "training/gen_proximal_gradient_descent_ops.py",
|
||||
)
|
||||
|
||||
tf_gen_op_wrapper_private_py(
|
||||
name = "rms_prop_ops_gen",
|
||||
out = "training/gen_rms_prop_ops.py",
|
||||
name = "training_ops_gen",
|
||||
out = "training/gen_training_ops.py",
|
||||
)
|
||||
|
||||
py_library(
|
||||
@ -2640,10 +2595,6 @@ py_library(
|
||||
),
|
||||
srcs_version = "PY2AND3",
|
||||
deps = [
|
||||
":adadelta_ops_gen",
|
||||
":adagrad_da_ops_gen",
|
||||
":adagrad_ops_gen",
|
||||
":adam_ops_gen",
|
||||
":array_ops",
|
||||
":checkpoint_ops_gen",
|
||||
":client",
|
||||
@ -2652,8 +2603,6 @@ py_library(
|
||||
":errors",
|
||||
":framework",
|
||||
":framework_for_generated_wrappers",
|
||||
":ftrl_ops_gen",
|
||||
":gradient_descent_ops_gen",
|
||||
":gradients",
|
||||
":init_ops",
|
||||
":io_ops",
|
||||
@ -2661,21 +2610,18 @@ py_library(
|
||||
":lib",
|
||||
":lookup_ops",
|
||||
":math_ops",
|
||||
":momentum_ops_gen",
|
||||
":platform",
|
||||
":protos_all_py",
|
||||
":proximal_adagrad_ops_gen",
|
||||
":proximal_gradient_descent_ops_gen",
|
||||
":pywrap_tensorflow",
|
||||
":random_ops",
|
||||
":resource_variable_ops",
|
||||
":resources",
|
||||
":rms_prop_ops_gen",
|
||||
":sdca_ops",
|
||||
":sparse_ops",
|
||||
":state_ops",
|
||||
":string_ops",
|
||||
":summary",
|
||||
":training_ops_gen",
|
||||
":util",
|
||||
":variable_scope",
|
||||
":variables",
|
||||
|
@ -19,16 +19,8 @@ from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
from tensorflow.python.training import gen_training_ops
|
||||
# go/tf-wildcard-import
|
||||
# pylint: disable=wildcard-import
|
||||
from tensorflow.python.training.gen_adadelta_ops import *
|
||||
from tensorflow.python.training.gen_adagrad_da_ops import *
|
||||
from tensorflow.python.training.gen_adagrad_ops import *
|
||||
from tensorflow.python.training.gen_adam_ops import *
|
||||
from tensorflow.python.training.gen_ftrl_ops import *
|
||||
from tensorflow.python.training.gen_gradient_descent_ops import *
|
||||
from tensorflow.python.training.gen_momentum_ops import *
|
||||
from tensorflow.python.training.gen_proximal_adagrad_ops import *
|
||||
from tensorflow.python.training.gen_proximal_gradient_descent_ops import *
|
||||
from tensorflow.python.training.gen_rms_prop_ops import *
|
||||
from tensorflow.python.training.gen_training_ops import *
|
||||
# pylint: enable=wildcard-import
|
||||
|
@ -196,7 +196,7 @@ def tf_opts_nortti_if_android():
|
||||
|
||||
# Given a list of "op_lib_names" (a list of files in the ops directory
|
||||
# without their .cc extensions), generate a library for that file.
|
||||
def tf_gen_op_libs(op_lib_names, deps=None, extra_srcs=[]):
|
||||
def tf_gen_op_libs(op_lib_names, deps=None):
|
||||
# Make library out of each op so it can also be used to generate wrappers
|
||||
# for various languages.
|
||||
if not deps:
|
||||
@ -205,7 +205,7 @@ def tf_gen_op_libs(op_lib_names, deps=None, extra_srcs=[]):
|
||||
native.cc_library(
|
||||
name=n + "_op_lib",
|
||||
copts=tf_copts(),
|
||||
srcs=extra_srcs + ["ops/" + n + ".cc"],
|
||||
srcs=["ops/" + n + ".cc"],
|
||||
deps=deps + [clean_dep("//tensorflow/core:framework")],
|
||||
visibility=["//visibility:public"],
|
||||
alwayslink=1,
|
||||
|
Loading…
Reference in New Issue
Block a user