Automated g4 rollback of changelist 168576795

PiperOrigin-RevId: 168606478
This commit is contained in:
Olivia Nordquist 2017-09-13 15:40:28 -07:00 committed by TensorFlower Gardener
parent 78f1dd5031
commit f95b1cf115
20 changed files with 1812 additions and 2172 deletions

View File

@ -416,16 +416,7 @@ tf_gen_op_wrappers_cc(
"sparse_ops",
"state_ops",
"string_ops",
"adadelta_ops",
"adagrad_da_ops",
"adagrad_ops",
"adam_ops",
"ftrl_ops",
"momentum_ops",
"gradient_descent_ops",
"proximal_adagrad_ops",
"proximal_gradient_descent_ops",
"rms_prop_ops",
"training_ops",
"user_ops",
],
other_hdrs = [

View File

@ -16,34 +16,25 @@ limitations under the License.
#ifndef THIRD_PARTY_TENSORFLOW_CC_OPS_STANDARD_OPS_H_
#define THIRD_PARTY_TENSORFLOW_CC_OPS_STANDARD_OPS_H_
#include "tensorflow/cc/ops/adadelta_ops.h"
#include "tensorflow/cc/ops/adagrad_da_ops.h"
#include "tensorflow/cc/ops/adagrad_ops.h"
#include "tensorflow/cc/ops/adam_ops.h"
#include "tensorflow/cc/ops/array_ops.h"
#include "tensorflow/cc/ops/candidate_sampling_ops.h"
#include "tensorflow/cc/ops/const_op.h"
#include "tensorflow/cc/ops/control_flow_ops.h"
#include "tensorflow/cc/ops/data_flow_ops.h"
#include "tensorflow/cc/ops/ftrl_ops.h"
#include "tensorflow/cc/ops/gradient_descent_ops.h"
#include "tensorflow/cc/ops/image_ops.h"
#include "tensorflow/cc/ops/io_ops.h"
#include "tensorflow/cc/ops/linalg_ops.h"
#include "tensorflow/cc/ops/logging_ops.h"
#include "tensorflow/cc/ops/lookup_ops.h"
#include "tensorflow/cc/ops/math_ops.h"
#include "tensorflow/cc/ops/momentum_ops.h"
#include "tensorflow/cc/ops/nn_ops.h"
#include "tensorflow/cc/ops/no_op.h"
#include "tensorflow/cc/ops/parsing_ops.h"
#include "tensorflow/cc/ops/proximal_adagrad_ops.h"
#include "tensorflow/cc/ops/proximal_gradient_descent_ops.h"
#include "tensorflow/cc/ops/random_ops.h"
#include "tensorflow/cc/ops/rms_prop_ops.h"
#include "tensorflow/cc/ops/sparse_ops.h"
#include "tensorflow/cc/ops/state_ops.h"
#include "tensorflow/cc/ops/string_ops.h"
#include "tensorflow/cc/ops/training_ops.h"
#include "tensorflow/cc/ops/user_ops.h"
#endif // THIRD_PARTY_TENSORFLOW_CC_OPS_STANDARD_OPS_H_

View File

@ -535,7 +535,6 @@ cc_library(
# Generates library per group of ops.
tf_gen_op_libs(
extra_srcs = ["ops/training_ops.h"],
op_lib_names = [
"bitwise_ops",
"candidate_sampling_ops",
@ -568,16 +567,7 @@ tf_gen_op_libs(
"stateless_random_ops",
"string_ops",
"summary_ops",
"adadelta_ops",
"adagrad_da_ops",
"adagrad_ops",
"adam_ops",
"ftrl_ops",
"momentum_ops",
"gradient_descent_ops",
"proximal_adagrad_ops",
"proximal_gradient_descent_ops",
"rms_prop_ops",
"training_ops",
],
)
@ -655,16 +645,7 @@ cc_library(
":state_ops_op_lib",
":stateless_random_ops_op_lib",
":string_ops_op_lib",
":adadelta_ops_op_lib",
":adagrad_da_ops_op_lib",
":adagrad_ops_op_lib",
":adam_ops_op_lib",
":ftrl_ops_op_lib",
":momentum_ops_op_lib",
":gradient_descent_ops_op_lib",
":proximal_adagrad_ops_op_lib",
":proximal_gradient_descent_ops_op_lib",
":rms_prop_ops_op_lib",
":training_ops_op_lib",
":user_ops_op_lib",
":word2vec_ops",
] + tf_additional_cloud_op_deps(),

View File

@ -3888,18 +3888,9 @@ tf_kernel_library(
":bounds_check",
":training_op_helpers",
":variable_ops",
"//tensorflow/core:adadelta_ops_op_lib",
"//tensorflow/core:adagrad_da_ops_op_lib",
"//tensorflow/core:adagrad_ops_op_lib",
"//tensorflow/core:adam_ops_op_lib",
"//tensorflow/core:framework",
"//tensorflow/core:ftrl_ops_op_lib",
"//tensorflow/core:gradient_descent_ops_op_lib",
"//tensorflow/core:lib",
"//tensorflow/core:momentum_ops_op_lib",
"//tensorflow/core:proximal_adagrad_ops_op_lib",
"//tensorflow/core:proximal_gradient_descent_ops_op_lib",
"//tensorflow/core:rms_prop_ops_op_lib",
"//tensorflow/core:training_ops_op_lib",
"//third_party/eigen3",
],
)

View File

@ -1,163 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/ops/training_ops.h"
namespace tensorflow {
static Status ApplyAdadeltaShapeFn(InferenceContext* c, bool sparse) {
ShapeHandle unused;
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // accum
TF_RETURN_IF_ERROR(
c->Merge(s, ShapeOrHandleShape(c, 2), &s)); // accum update
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); // lr
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); // rho
TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); // epsilon
TF_RETURN_IF_ERROR(
HandleGradAndIndicesInputs(c, sparse, 6 /* grad_idx */, &s));
if (c->num_outputs() > 0) {
c->set_output(0, s);
}
return Status::OK();
}
REGISTER_OP("ApplyAdadelta")
.Input("var: Ref(T)")
.Input("accum: Ref(T)")
.Input("accum_update: Ref(T)")
.Input("lr: T")
.Input("rho: T")
.Input("epsilon: T")
.Input("grad: T")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyAdadeltaShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the adadelta scheme.
accum = rho() * accum + (1 - rho()) * grad.square();
update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad;
update_accum = rho() * update_accum + (1 - rho()) * update.square();
var -= update;
var: Should be from a Variable().
accum: Should be from a Variable().
accum_update: Should be from a Variable().
lr: Scaling factor. Must be a scalar.
rho: Decay factor. Must be a scalar.
epsilon: Constant factor. Must be a scalar.
grad: The gradient.
out: Same as "var".
use_locking: If True, updating of the var, accum and update_accum tensors will be protected by
a lock; otherwise the behavior is undefined, but may exhibit less contention.
)doc");
REGISTER_OP("SparseApplyAdadelta")
.Input("var: Ref(T)")
.Input("accum: Ref(T)")
.Input("accum_update: Ref(T)")
.Input("lr: T")
.Input("rho: T")
.Input("epsilon: T")
.Input("grad: T")
.Input("indices: Tindices")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyAdadeltaShapeFn(c, true /* sparse */);
})
.Doc(R"doc(
var: Should be from a Variable().
accum: Should be from a Variable().
accum_update:: Should be from a Variable().
lr: Learning rate. Must be a scalar.
rho: Decay factor. Must be a scalar.
epsilon: Constant factor. Must be a scalar.
grad: The gradient.
indices: A vector of indices into the first dimension of var and accum.
out: Same as "var".
use_locking: If True, updating of the var and accum tensors will be protected by
a lock; otherwise the behavior is undefined, but may exhibit less contention.
)doc");
REGISTER_OP("ResourceApplyAdadelta")
.Input("var: resource")
.Input("accum: resource")
.Input("accum_update: resource")
.Input("lr: T")
.Input("rho: T")
.Input("epsilon: T")
.Input("grad: T")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyAdadeltaShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the adadelta scheme.
accum = rho() * accum + (1 - rho()) * grad.square();
update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad;
update_accum = rho() * update_accum + (1 - rho()) * update.square();
var -= update;
var: Should be from a Variable().
accum: Should be from a Variable().
accum_update: Should be from a Variable().
lr: Scaling factor. Must be a scalar.
rho: Decay factor. Must be a scalar.
epsilon: Constant factor. Must be a scalar.
grad: The gradient.
use_locking: If True, updating of the var, accum and update_accum tensors will be protected by
a lock; otherwise the behavior is undefined, but may exhibit less contention.
)doc");
REGISTER_OP("ResourceSparseApplyAdadelta")
.Input("var: resource")
.Input("accum: resource")
.Input("accum_update: resource")
.Input("lr: T")
.Input("rho: T")
.Input("epsilon: T")
.Input("grad: T")
.Input("indices: Tindices")
.Attr("T: numbertype")
.Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyAdadeltaShapeFn(c, true /* sparse */);
})
.Doc(R"doc(
var: Should be from a Variable().
accum: Should be from a Variable().
accum_update:: Should be from a Variable().
lr: Learning rate. Must be a scalar.
rho: Decay factor. Must be a scalar.
epsilon: Constant factor. Must be a scalar.
grad: The gradient.
indices: A vector of indices into the first dimension of var and accum.
use_locking: If True, updating of the var and accum tensors will be protected by
a lock; otherwise the behavior is undefined, but may exhibit less contention.
)doc");
} // namespace tensorflow

View File

@ -1,168 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/ops/training_ops.h"
namespace tensorflow {
static Status ApplyAdagradDAShapeFn(InferenceContext* c, bool sparse) {
ShapeHandle unused;
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
TF_RETURN_IF_ERROR(
c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // grad_accumulator
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 2),
&s)); // gradient_squared_accumulator
TF_RETURN_IF_ERROR(
HandleGradAndIndicesInputs(c, sparse, 3 /* grad_idx */, &s));
int idx = sparse ? 5 : 4;
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // lr
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // l1
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // l2
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // global step
if (c->num_outputs() > 0) {
c->set_output(0, s);
}
return Status::OK();
}
REGISTER_OP("ApplyAdagradDA")
.Input("var: Ref(T)")
.Input("gradient_accumulator: Ref(T)")
.Input("gradient_squared_accumulator: Ref(T)")
.Input("grad: T")
.Input("lr: T")
.Input("l1: T")
.Input("l2: T")
.Input("global_step: int64")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyAdagradDAShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the proximal adagrad scheme.
var: Should be from a Variable().
gradient_accumulator: Should be from a Variable().
gradient_squared_accumulator: Should be from a Variable().
grad: The gradient.
lr: Scaling factor. Must be a scalar.
l1: L1 regularization. Must be a scalar.
l2: L2 regularization. Must be a scalar.
global_step: Training step number. Must be a scalar.
out: Same as "var".
use_locking: If True, updating of the var and accum tensors will be protected by
a lock; otherwise the behavior is undefined, but may exhibit less contention.
)doc");
REGISTER_OP("SparseApplyAdagradDA")
.Input("var: Ref(T)")
.Input("gradient_accumulator: Ref(T)")
.Input("gradient_squared_accumulator: Ref(T)")
.Input("grad: T")
.Input("indices: Tindices")
.Input("lr: T")
.Input("l1: T")
.Input("l2: T")
.Input("global_step: int64")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyAdagradDAShapeFn(c, true /* sparse */);
})
.Doc(R"doc(
Update entries in '*var' and '*accum' according to the proximal adagrad scheme.
var: Should be from a Variable().
gradient_accumulator: Should be from a Variable().
gradient_squared_accumulator: Should be from a Variable().
grad: The gradient.
indices: A vector of indices into the first dimension of var and accum.
lr: Learning rate. Must be a scalar.
l1: L1 regularization. Must be a scalar.
l2: L2 regularization. Must be a scalar.
global_step: Training step number. Must be a scalar.
out: Same as "var".
use_locking: If True, updating of the var and accum tensors will be protected by
a lock; otherwise the behavior is undefined, but may exhibit less contention.
)doc");
REGISTER_OP("ResourceApplyAdagradDA")
.Input("var: resource")
.Input("gradient_accumulator: resource")
.Input("gradient_squared_accumulator: resource")
.Input("grad: T")
.Input("lr: T")
.Input("l1: T")
.Input("l2: T")
.Input("global_step: int64")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyAdagradDAShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the proximal adagrad scheme.
var: Should be from a Variable().
gradient_accumulator: Should be from a Variable().
gradient_squared_accumulator: Should be from a Variable().
grad: The gradient.
lr: Scaling factor. Must be a scalar.
l1: L1 regularization. Must be a scalar.
l2: L2 regularization. Must be a scalar.
global_step: Training step number. Must be a scalar.
use_locking: If True, updating of the var and accum tensors will be protected by
a lock; otherwise the behavior is undefined, but may exhibit less contention.
)doc");
REGISTER_OP("ResourceSparseApplyAdagradDA")
.Input("var: resource")
.Input("gradient_accumulator: resource")
.Input("gradient_squared_accumulator: resource")
.Input("grad: T")
.Input("indices: Tindices")
.Input("lr: T")
.Input("l1: T")
.Input("l2: T")
.Input("global_step: int64")
.Attr("T: numbertype")
.Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyAdagradDAShapeFn(c, true /* sparse */);
})
.Doc(R"doc(
Update entries in '*var' and '*accum' according to the proximal adagrad scheme.
var: Should be from a Variable().
gradient_accumulator: Should be from a Variable().
gradient_squared_accumulator: Should be from a Variable().
grad: The gradient.
indices: A vector of indices into the first dimension of var and accum.
lr: Learning rate. Must be a scalar.
l1: L1 regularization. Must be a scalar.
l2: L2 regularization. Must be a scalar.
global_step: Training step number. Must be a scalar.
use_locking: If True, updating of the var and accum tensors will be protected by
a lock; otherwise the behavior is undefined, but may exhibit less contention.
)doc");
} // namespace tensorflow

View File

@ -1,147 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/ops/training_ops.h"
namespace tensorflow {
static Status ApplyAdagradShapeFn(InferenceContext* c, bool sparse) {
ShapeHandle unused;
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // accum
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); // lr
TF_RETURN_IF_ERROR(
HandleGradAndIndicesInputs(c, sparse, 3 /* grad_idx */, &s));
if (c->num_outputs() > 0) {
c->set_output(0, s);
}
return Status::OK();
}
REGISTER_OP("ApplyAdagrad")
.Input("var: Ref(T)")
.Input("accum: Ref(T)")
.Input("lr: T")
.Input("grad: T")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyAdagradShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the adagrad scheme.
accum += grad * grad
var -= lr * grad * (1 / sqrt(accum))
var: Should be from a Variable().
accum: Should be from a Variable().
lr: Scaling factor. Must be a scalar.
grad: The gradient.
out: Same as "var".
use_locking: If `True`, updating of the var and accum tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
)doc");
REGISTER_OP("ResourceApplyAdagrad")
.Input("var: resource")
.Input("accum: resource")
.Input("lr: T")
.Input("grad: T")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyAdagradShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the adagrad scheme.
accum += grad * grad
var -= lr * grad * (1 / sqrt(accum))
var: Should be from a Variable().
accum: Should be from a Variable().
lr: Scaling factor. Must be a scalar.
grad: The gradient.
use_locking: If `True`, updating of the var and accum tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
)doc");
REGISTER_OP("SparseApplyAdagrad")
.Input("var: Ref(T)")
.Input("accum: Ref(T)")
.Input("lr: T")
.Input("grad: T")
.Input("indices: Tindices")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyAdagradShapeFn(c, true /* sparse */);
})
.Doc(R"doc(
Update relevant entries in '*var' and '*accum' according to the adagrad scheme.
That is for rows we have grad for, we update var and accum as follows:
accum += grad * grad
var -= lr * grad * (1 / sqrt(accum))
var: Should be from a Variable().
accum: Should be from a Variable().
lr: Learning rate. Must be a scalar.
grad: The gradient.
indices: A vector of indices into the first dimension of var and accum.
out: Same as "var".
use_locking: If `True`, updating of the var and accum tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
)doc");
REGISTER_OP("ResourceSparseApplyAdagrad")
.Input("var: resource")
.Input("accum: resource")
.Input("lr: T")
.Input("grad: T")
.Input("indices: Tindices")
.Attr("T: numbertype")
.Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyAdagradShapeFn(c, true /* sparse */);
})
.Doc(R"doc(
Update relevant entries in '*var' and '*accum' according to the adagrad scheme.
That is for rows we have grad for, we update var and accum as follows:
accum += grad * grad
var -= lr * grad * (1 / sqrt(accum))
var: Should be from a Variable().
accum: Should be from a Variable().
lr: Learning rate. Must be a scalar.
grad: The gradient.
indices: A vector of indices into the first dimension of var and accum.
use_locking: If `True`, updating of the var and accum tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
)doc");
} // namespace tensorflow

View File

@ -1,125 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/ops/training_ops.h"
namespace tensorflow {
static Status ApplyAdamShapeFn(InferenceContext* c, bool sparse) {
ShapeHandle unused;
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // m
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 2), &s)); // v
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); // beta1_power
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); // beta2_power
TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); // lr
TF_RETURN_IF_ERROR(c->WithRank(c->input(6), 0, &unused)); // beta1
TF_RETURN_IF_ERROR(c->WithRank(c->input(7), 0, &unused)); // beta2
TF_RETURN_IF_ERROR(c->WithRank(c->input(8), 0, &unused)); // epsilon
TF_RETURN_IF_ERROR(
HandleGradAndIndicesInputs(c, sparse, 9 /* grad_idx */, &s));
if (c->num_outputs() > 0) {
c->set_output(0, s);
}
return Status::OK();
}
REGISTER_OP("ApplyAdam")
.Input("var: Ref(T)")
.Input("m: Ref(T)")
.Input("v: Ref(T)")
.Input("beta1_power: T")
.Input("beta2_power: T")
.Input("lr: T")
.Input("beta1: T")
.Input("beta2: T")
.Input("epsilon: T")
.Input("grad: T")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.Attr("use_nesterov: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyAdamShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the Adam algorithm.
lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t
v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t
variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
var: Should be from a Variable().
m: Should be from a Variable().
v: Should be from a Variable().
beta1_power: Must be a scalar.
beta2_power: Must be a scalar.
lr: Scaling factor. Must be a scalar.
beta1: Momentum factor. Must be a scalar.
beta2: Momentum factor. Must be a scalar.
epsilon: Ridge term. Must be a scalar.
grad: The gradient.
out: Same as "var".
use_locking: If `True`, updating of the var, m, and v tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
use_nesterov: If `True`, uses the nesterov update.
)doc");
REGISTER_OP("ResourceApplyAdam")
.Input("var: resource")
.Input("m: resource")
.Input("v: resource")
.Input("beta1_power: T")
.Input("beta2_power: T")
.Input("lr: T")
.Input("beta1: T")
.Input("beta2: T")
.Input("epsilon: T")
.Input("grad: T")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.Attr("use_nesterov: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyAdamShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the Adam algorithm.
lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t
v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t
variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
var: Should be from a Variable().
m: Should be from a Variable().
v: Should be from a Variable().
beta1_power: Must be a scalar.
beta2_power: Must be a scalar.
lr: Scaling factor. Must be a scalar.
beta1: Momentum factor. Must be a scalar.
beta2: Momentum factor. Must be a scalar.
epsilon: Ridge term. Must be a scalar.
grad: The gradient.
use_locking: If `True`, updating of the var, m, and v tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
use_nesterov: If `True`, uses the nesterov update.
)doc");
} // namespace tensorflow

View File

@ -1,368 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/ops/training_ops.h"
namespace tensorflow {
static Status ApplyFtrlShapeFn(InferenceContext* c, bool sparse) {
ShapeHandle unused;
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // accum
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 2), &s)); // linear
TF_RETURN_IF_ERROR(
HandleGradAndIndicesInputs(c, sparse, 3 /* grad_idx */, &s));
int idx = sparse ? 5 : 4;
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // lr
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // l1
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // l2
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // lr_power
if (c->num_outputs() > 0) {
c->set_output(0, s);
}
return Status::OK();
}
REGISTER_OP("ApplyFtrl")
.Input("var: Ref(T)")
.Input("accum: Ref(T)")
.Input("linear: Ref(T)")
.Input("grad: T")
.Input("lr: T")
.Input("l1: T")
.Input("l2: T")
.Input("lr_power: T")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyFtrlShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the Ftrl-proximal scheme.
accum_new = accum + grad * grad
linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
accum = accum_new
var: Should be from a Variable().
accum: Should be from a Variable().
linear: Should be from a Variable().
grad: The gradient.
lr: Scaling factor. Must be a scalar.
l1: L1 regulariation. Must be a scalar.
l2: L2 regulariation. Must be a scalar.
lr_power: Scaling factor. Must be a scalar.
out: Same as "var".
use_locking: If `True`, updating of the var and accum tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
)doc");
REGISTER_OP("SparseApplyFtrl")
.Input("var: Ref(T)")
.Input("accum: Ref(T)")
.Input("linear: Ref(T)")
.Input("grad: T")
.Input("indices: Tindices")
.Input("lr: T")
.Input("l1: T")
.Input("l2: T")
.Input("lr_power: T")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyFtrlShapeFn(c, true /* sparse */);
})
.Doc(R"doc(
Update relevant entries in '*var' according to the Ftrl-proximal scheme.
That is for rows we have grad for, we update var, accum and linear as follows:
accum_new = accum + grad * grad
linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
accum = accum_new
var: Should be from a Variable().
accum: Should be from a Variable().
linear: Should be from a Variable().
grad: The gradient.
indices: A vector of indices into the first dimension of var and accum.
lr: Scaling factor. Must be a scalar.
l1: L1 regularization. Must be a scalar.
l2: L2 regularization. Must be a scalar.
lr_power: Scaling factor. Must be a scalar.
out: Same as "var".
use_locking: If `True`, updating of the var and accum tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
)doc");
REGISTER_OP("ResourceApplyFtrl")
.Input("var: resource")
.Input("accum: resource")
.Input("linear: resource")
.Input("grad: T")
.Input("lr: T")
.Input("l1: T")
.Input("l2: T")
.Input("lr_power: T")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyFtrlShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the Ftrl-proximal scheme.
accum_new = accum + grad * grad
linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
accum = accum_new
var: Should be from a Variable().
accum: Should be from a Variable().
linear: Should be from a Variable().
grad: The gradient.
lr: Scaling factor. Must be a scalar.
l1: L1 regulariation. Must be a scalar.
l2: L2 regulariation. Must be a scalar.
lr_power: Scaling factor. Must be a scalar.
use_locking: If `True`, updating of the var and accum tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
)doc");
REGISTER_OP("ResourceSparseApplyFtrl")
.Input("var: resource")
.Input("accum: resource")
.Input("linear: resource")
.Input("grad: T")
.Input("indices: Tindices")
.Input("lr: T")
.Input("l1: T")
.Input("l2: T")
.Input("lr_power: T")
.Attr("T: numbertype")
.Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyFtrlShapeFn(c, true /* sparse */);
})
.Doc(R"doc(
Update relevant entries in '*var' according to the Ftrl-proximal scheme.
That is for rows we have grad for, we update var, accum and linear as follows:
accum_new = accum + grad * grad
linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
accum = accum_new
var: Should be from a Variable().
accum: Should be from a Variable().
linear: Should be from a Variable().
grad: The gradient.
indices: A vector of indices into the first dimension of var and accum.
lr: Scaling factor. Must be a scalar.
l1: L1 regularization. Must be a scalar.
l2: L2 regularization. Must be a scalar.
lr_power: Scaling factor. Must be a scalar.
use_locking: If `True`, updating of the var and accum tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
)doc");
REGISTER_OP("ApplyFtrlV2")
.Input("var: Ref(T)")
.Input("accum: Ref(T)")
.Input("linear: Ref(T)")
.Input("grad: T")
.Input("lr: T")
.Input("l1: T")
.Input("l2: T")
.Input("l2_shrinkage: T")
.Input("lr_power: T")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyFtrlShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the Ftrl-proximal scheme.
grad_with_shrinkage = grad + 2 * l2_shrinkage * var
accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
linear += grad_with_shrinkage +
(accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
accum = accum_new
var: Should be from a Variable().
accum: Should be from a Variable().
linear: Should be from a Variable().
grad: The gradient.
lr: Scaling factor. Must be a scalar.
l1: L1 regulariation. Must be a scalar.
l2: online L2 regulariation. Must be a scalar.
l2: L2 shrinkage regulariation. Must be a scalar.
lr_power: Scaling factor. Must be a scalar.
out: Same as "var".
use_locking: If `True`, updating of the var and accum tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
)doc");
REGISTER_OP("SparseApplyFtrlV2")
.Input("var: Ref(T)")
.Input("accum: Ref(T)")
.Input("linear: Ref(T)")
.Input("grad: T")
.Input("indices: Tindices")
.Input("lr: T")
.Input("l1: T")
.Input("l2: T")
.Input("l2_shrinkage: T")
.Input("lr_power: T")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyFtrlShapeFn(c, true /* sparse */);
})
.Doc(R"doc(
Update relevant entries in '*var' according to the Ftrl-proximal scheme.
That is for rows we have grad for, we update var, accum and linear as follows:
grad_with_shrinkage = grad + 2 * l2_shrinkage * var
accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
linear += grad_with_shrinkage +
(accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
accum = accum_new
var: Should be from a Variable().
accum: Should be from a Variable().
linear: Should be from a Variable().
grad: The gradient.
indices: A vector of indices into the first dimension of var and accum.
lr: Scaling factor. Must be a scalar.
l1: L1 regularization. Must be a scalar.
l2: onine L2 regularization. Must be a scalar.
l2: L2 shrinkage regulariation. Must be a scalar.
lr_power: Scaling factor. Must be a scalar.
out: Same as "var".
use_locking: If `True`, updating of the var and accum tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
)doc");
REGISTER_OP("ResourceApplyFtrlV2")
.Input("var: resource")
.Input("accum: resource")
.Input("linear: resource")
.Input("grad: T")
.Input("lr: T")
.Input("l1: T")
.Input("l2: T")
.Input("l2_shrinkage: T")
.Input("lr_power: T")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyFtrlShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the Ftrl-proximal scheme.
grad_with_shrinkage = grad + 2 * l2_shrinkage * var
accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
linear += grad_with_shrinkage +
(accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
accum = accum_new
var: Should be from a Variable().
accum: Should be from a Variable().
linear: Should be from a Variable().
grad: The gradient.
lr: Scaling factor. Must be a scalar.
l1: L1 regulariation. Must be a scalar.
l2: onine L2 regularization. Must be a scalar.
l2: L2 shrinkage regulariation. Must be a scalar.
lr_power: Scaling factor. Must be a scalar.
use_locking: If `True`, updating of the var and accum tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
)doc");
REGISTER_OP("ResourceSparseApplyFtrlV2")
.Input("var: resource")
.Input("accum: resource")
.Input("linear: resource")
.Input("grad: T")
.Input("indices: Tindices")
.Input("lr: T")
.Input("l1: T")
.Input("l2: T")
.Input("l2_shrinkage: T")
.Input("lr_power: T")
.Attr("T: numbertype")
.Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyFtrlShapeFn(c, true /* sparse */);
})
.Doc(R"doc(
Update relevant entries in '*var' according to the Ftrl-proximal scheme.
That is for rows we have grad for, we update var, accum and linear as follows:
grad_with_shrinkage = grad + 2 * l2_shrinkage * var
accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
linear += grad_with_shrinkage +
(accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
accum = accum_new
var: Should be from a Variable().
accum: Should be from a Variable().
linear: Should be from a Variable().
grad: The gradient.
indices: A vector of indices into the first dimension of var and accum.
lr: Scaling factor. Must be a scalar.
l1: L1 regularization. Must be a scalar.
l2: onine L2 regularization. Must be a scalar.
l2: L2 shrinkage regulariation. Must be a scalar.
lr_power: Scaling factor. Must be a scalar.
use_locking: If `True`, updating of the var and accum tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
)doc");
} // namespace tensorflow

View File

@ -1,69 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/ops/training_ops.h"
namespace tensorflow {
static Status ApplyGradientDescentShapeFn(InferenceContext* c) {
ShapeHandle unused;
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); // alpha
TF_RETURN_IF_ERROR(c->Merge(s, c->input(2), &s)); // delta
if (c->num_outputs() > 0) {
c->set_output(0, s);
}
return Status::OK();
}
REGISTER_OP("ApplyGradientDescent")
.Input("var: Ref(T)")
.Input("alpha: T")
.Input("delta: T")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.SetShapeFn(ApplyGradientDescentShapeFn)
.Doc(R"doc(
Update '*var' by subtracting 'alpha' * 'delta' from it.
var: Should be from a Variable().
alpha: Scaling factor. Must be a scalar.
delta: The change.
out: Same as "var".
use_locking: If `True`, the subtraction will be protected by a lock;
otherwise the behavior is undefined, but may exhibit less contention.
)doc");
REGISTER_OP("ResourceApplyGradientDescent")
.Input("var: resource")
.Input("alpha: T")
.Input("delta: T")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.SetShapeFn(ApplyGradientDescentShapeFn)
.Doc(R"doc(
Update '*var' by subtracting 'alpha' * 'delta' from it.
var: Should be from a Variable().
alpha: Scaling factor. Must be a scalar.
delta: The change.
use_locking: If `True`, the subtraction will be protected by a lock;
otherwise the behavior is undefined, but may exhibit less contention.
)doc");
} // namespace tensorflow

View File

@ -1,179 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/ops/training_ops.h"
namespace tensorflow {
static Status ApplyMomentumShapeFn(InferenceContext* c, bool sparse) {
ShapeHandle unused;
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // accum
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); // lr
TF_RETURN_IF_ERROR(
HandleGradAndIndicesInputs(c, sparse, 3 /* grad_idx */, &s));
int idx = sparse ? 5 : 4;
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // momentum
if (c->num_outputs() > 0) {
c->set_output(0, s);
}
return Status::OK();
}
REGISTER_OP("ApplyMomentum")
.Input("var: Ref(T)")
.Input("accum: Ref(T)")
.Input("lr: T")
.Input("grad: T")
.Input("momentum: T")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.Attr("use_nesterov: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyMomentumShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the momentum scheme. Set use_nesterov = True if you
want to use Nesterov momentum.
accum = accum * momentum + grad
var -= lr * accum
var: Should be from a Variable().
accum: Should be from a Variable().
lr: Scaling factor. Must be a scalar.
grad: The gradient.
momentum: Momentum. Must be a scalar.
out: Same as "var".
use_locking: If `True`, updating of the var and accum tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
use_nesterov: If `True`, the tensor passed to compute grad will be
var - lr * momentum * accum, so in the end, the var you get is actually
var - lr * momentum * accum.
)doc");
REGISTER_OP("SparseApplyMomentum")
.Input("var: Ref(T)")
.Input("accum: Ref(T)")
.Input("lr: T")
.Input("grad: T")
.Input("indices: Tindices")
.Input("momentum: T")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false")
.Attr("use_nesterov: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyMomentumShapeFn(c, true /* sparse */);
})
.Doc(R"doc(
Update relevant entries in '*var' and '*accum' according to the momentum scheme.
Set use_nesterov = True if you want to use Nesterov momentum.
That is for rows we have grad for, we update var and accum as follows:
accum = accum * momentum + grad
var -= lr * accum
var: Should be from a Variable().
accum: Should be from a Variable().
lr: Learning rate. Must be a scalar.
grad: The gradient.
indices: A vector of indices into the first dimension of var and accum.
momentum: Momentum. Must be a scalar.
out: Same as "var".
use_locking: If `True`, updating of the var and accum tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
use_nesterov: If `True`, the tensor passed to compute grad will be
var - lr * momentum * accum, so in the end, the var you get is actually
var - lr * momentum * accum.
)doc");
REGISTER_OP("ResourceApplyMomentum")
.Input("var: resource")
.Input("accum: resource")
.Input("lr: T")
.Input("grad: T")
.Input("momentum: T")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.Attr("use_nesterov: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyMomentumShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the momentum scheme. Set use_nesterov = True if you
want to use Nesterov momentum.
accum = accum * momentum + grad
var -= lr * accum
var: Should be from a Variable().
accum: Should be from a Variable().
lr: Scaling factor. Must be a scalar.
grad: The gradient.
momentum: Momentum. Must be a scalar.
use_locking: If `True`, updating of the var and accum tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
use_nesterov: If `True`, the tensor passed to compute grad will be
var - lr * momentum * accum, so in the end, the var you get is actually
var - lr * momentum * accum.
)doc");
REGISTER_OP("ResourceSparseApplyMomentum")
.Input("var: resource")
.Input("accum: resource")
.Input("lr: T")
.Input("grad: T")
.Input("indices: Tindices")
.Input("momentum: T")
.Attr("T: numbertype")
.Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false")
.Attr("use_nesterov: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyMomentumShapeFn(c, true /* sparse */);
})
.Doc(R"doc(
Update relevant entries in '*var' and '*accum' according to the momentum scheme.
Set use_nesterov = True if you want to use Nesterov momentum.
That is for rows we have grad for, we update var and accum as follows:
accum = accum * momentum + grad
var -= lr * accum
var: Should be from a Variable().
accum: Should be from a Variable().
lr: Learning rate. Must be a scalar.
grad: The gradient.
indices: A vector of indices into the first dimension of var and accum.
momentum: Momentum. Must be a scalar.
use_locking: If `True`, updating of the var and accum tensors will be protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
use_nesterov: If `True`, the tensor passed to compute grad will be
var - lr * momentum * accum, so in the end, the var you get is actually
var - lr * momentum * accum.
)doc");
} // namespace tensorflow

View File

@ -1,183 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/ops/training_ops.h"
namespace tensorflow {
static Status ApplyProximalAdagradShapeFn(InferenceContext* c, bool sparse) {
ShapeHandle unused;
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // accum
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); // lr
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); // l1
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); // l2
TF_RETURN_IF_ERROR(
HandleGradAndIndicesInputs(c, sparse, 5 /* grad_idx */, &s));
if (c->num_outputs() > 0) {
c->set_output(0, s);
}
return Status::OK();
}
REGISTER_OP("ApplyProximalAdagrad")
.Input("var: Ref(T)")
.Input("accum: Ref(T)")
.Input("lr: T")
.Input("l1: T")
.Input("l2: T")
.Input("grad: T")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyProximalAdagradShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' and '*accum' according to FOBOS with Adagrad learning rate.
accum += grad * grad
prox_v = var - lr * grad * (1 / sqrt(accum))
var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
var: Should be from a Variable().
accum: Should be from a Variable().
grad: The gradient.
lr: Scaling factor. Must be a scalar.
l1: L1 regularization. Must be a scalar.
l2: L2 regularization. Must be a scalar.
out: Same as "var".
use_locking: If True, updating of the var and accum tensors will be protected by
a lock; otherwise the behavior is undefined, but may exhibit less contention.
)doc");
REGISTER_OP("ResourceApplyProximalAdagrad")
.Input("var: resource")
.Input("accum: resource")
.Input("lr: T")
.Input("l1: T")
.Input("l2: T")
.Input("grad: T")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyProximalAdagradShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' and '*accum' according to FOBOS with Adagrad learning rate.
accum += grad * grad
prox_v = var - lr * grad * (1 / sqrt(accum))
var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
var: Should be from a Variable().
accum: Should be from a Variable().
grad: The gradient.
lr: Scaling factor. Must be a scalar.
l1: L1 regularization. Must be a scalar.
l2: L2 regularization. Must be a scalar.
use_locking: If True, updating of the var and accum tensors will be protected by
a lock; otherwise the behavior is undefined, but may exhibit less contention.
)doc");
REGISTER_OP("SparseApplyProximalAdagrad")
.Input("var: Ref(T)")
.Input("accum: Ref(T)")
.Input("lr: T")
.Input("l1: T")
.Input("l2: T")
.Input("grad: T")
.Input("indices: Tindices")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyProximalAdagradShapeFn(c, true /* sparse */);
})
.Doc(R"doc(
Sparse update entries in '*var' and '*accum' according to FOBOS algorithm.
That is for rows we have grad for, we update var and accum as follows:
accum += grad * grad
prox_v = var
prox_v -= lr * grad * (1 / sqrt(accum))
var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
var: Should be from a Variable().
accum: Should be from a Variable().
lr: Learning rate. Must be a scalar.
l1: L1 regularization. Must be a scalar.
l2: L2 regularization. Must be a scalar.
grad: The gradient.
indices: A vector of indices into the first dimension of var and accum.
out: Same as "var".
use_locking: If True, updating of the var and accum tensors will be protected by
a lock; otherwise the behavior is undefined, but may exhibit less contention.
)doc");
REGISTER_OP("ResourceSparseApplyProximalAdagrad")
.Input("var: resource")
.Input("accum: resource")
.Input("lr: T")
.Input("l1: T")
.Input("l2: T")
.Input("grad: T")
.Input("indices: Tindices")
.Attr("T: numbertype")
.Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyProximalAdagradShapeFn(c, true /* sparse */);
})
.Doc(R"doc(
Sparse update entries in '*var' and '*accum' according to FOBOS algorithm.
That is for rows we have grad for, we update var and accum as follows:
accum += grad * grad
prox_v = var
prox_v -= lr * grad * (1 / sqrt(accum))
var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
var: Should be from a Variable().
accum: Should be from a Variable().
lr: Learning rate. Must be a scalar.
l1: L1 regularization. Must be a scalar.
l2: L2 regularization. Must be a scalar.
grad: The gradient.
indices: A vector of indices into the first dimension of var and accum.
use_locking: If True, updating of the var and accum tensors will be protected by
a lock; otherwise the behavior is undefined, but may exhibit less contention.
)doc");
static Status ApplyFtrlShapeFn(InferenceContext* c, bool sparse) {
ShapeHandle unused;
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // accum
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 2), &s)); // linear
TF_RETURN_IF_ERROR(
HandleGradAndIndicesInputs(c, sparse, 3 /* grad_idx */, &s));
int idx = sparse ? 5 : 4;
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // lr
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // l1
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // l2
TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused)); // lr_power
if (c->num_outputs() > 0) {
c->set_output(0, s);
}
return Status::OK();
}
} // namespace tensorflow

View File

@ -1,151 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/ops/training_ops.h"
namespace tensorflow {
static Status ApplyProximalGradientDescentShapeFn(InferenceContext* c,
bool sparse) {
ShapeHandle unused;
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); // alpha
TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); // l1
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); // l2
TF_RETURN_IF_ERROR(
HandleGradAndIndicesInputs(c, sparse, 4 /* grad_idx */, &s));
if (c->num_outputs() > 0) {
c->set_output(0, s);
}
return Status::OK();
}
REGISTER_OP("ApplyProximalGradientDescent")
.Input("var: Ref(T)")
.Input("alpha: T")
.Input("l1: T")
.Input("l2: T")
.Input("delta: T")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyProximalGradientDescentShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' as FOBOS algorithm with fixed learning rate.
prox_v = var - alpha * delta
var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
var: Should be from a Variable().
alpha: Scaling factor. Must be a scalar.
l1: L1 regularization. Must be a scalar.
l2: L2 regularization. Must be a scalar.
delta: The change.
out: Same as "var".
use_locking: If True, the subtraction will be protected by a lock;
otherwise the behavior is undefined, but may exhibit less contention.
)doc");
REGISTER_OP("SparseApplyProximalGradientDescent")
.Input("var: Ref(T)")
.Input("alpha: T")
.Input("l1: T")
.Input("l2: T")
.Input("grad: T")
.Input("indices: Tindices")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyProximalGradientDescentShapeFn(c, true /* sparse */);
})
.Doc(R"doc(
Sparse update '*var' as FOBOS algorithm with fixed learning rate.
That is for rows we have grad for, we update var as follows:
prox_v = var - alpha * grad
var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
var: Should be from a Variable().
alpha: Scaling factor. Must be a scalar.
l1: L1 regularization. Must be a scalar.
l2: L2 regularization. Must be a scalar.
grad: The gradient.
indices: A vector of indices into the first dimension of var and accum.
out: Same as "var".
use_locking: If True, the subtraction will be protected by a lock;
otherwise the behavior is undefined, but may exhibit less contention.
)doc");
REGISTER_OP("ResourceApplyProximalGradientDescent")
.Input("var: resource")
.Input("alpha: T")
.Input("l1: T")
.Input("l2: T")
.Input("delta: T")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyProximalGradientDescentShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' as FOBOS algorithm with fixed learning rate.
prox_v = var - alpha * delta
var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
var: Should be from a Variable().
alpha: Scaling factor. Must be a scalar.
l1: L1 regularization. Must be a scalar.
l2: L2 regularization. Must be a scalar.
delta: The change.
use_locking: If True, the subtraction will be protected by a lock;
otherwise the behavior is undefined, but may exhibit less contention.
)doc");
REGISTER_OP("ResourceSparseApplyProximalGradientDescent")
.Input("var: resource")
.Input("alpha: T")
.Input("l1: T")
.Input("l2: T")
.Input("grad: T")
.Input("indices: Tindices")
.Attr("T: numbertype")
.Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyProximalGradientDescentShapeFn(c, true /* sparse */);
})
.Doc(R"doc(
Sparse update '*var' as FOBOS algorithm with fixed learning rate.
That is for rows we have grad for, we update var as follows:
prox_v = var - alpha * grad
var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
var: Should be from a Variable().
alpha: Scaling factor. Must be a scalar.
l1: L1 regularization. Must be a scalar.
l2: L2 regularization. Must be a scalar.
grad: The gradient.
indices: A vector of indices into the first dimension of var and accum.
use_locking: If True, the subtraction will be protected by a lock;
otherwise the behavior is undefined, but may exhibit less contention.
)doc");
} // namespace tensorflow

View File

@ -1,425 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/ops/training_ops.h"
namespace tensorflow {
static Status ApplyRMSPropShapeFn(InferenceContext* c, bool sparse) {
ShapeHandle unused;
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // ms
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 2), &s)); // mom
TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); // lr
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); // rho
TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); // momentum
TF_RETURN_IF_ERROR(c->WithRank(c->input(6), 0, &unused)); // epsilon
TF_RETURN_IF_ERROR(
HandleGradAndIndicesInputs(c, sparse, 7 /* grad_idx */, &s));
if (c->num_outputs() > 0) {
c->set_output(0, s);
}
return Status::OK();
}
static Status ApplyCenteredRMSPropShapeFn(InferenceContext* c, bool sparse) {
ShapeHandle unused;
ShapeHandle s = ShapeOrHandleShape(c, 0); // var
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s)); // ms
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 2), &s)); // mg
TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 3), &s)); // mom
TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); // lr
TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); // rho
TF_RETURN_IF_ERROR(c->WithRank(c->input(6), 0, &unused)); // momentum
TF_RETURN_IF_ERROR(c->WithRank(c->input(7), 0, &unused)); // epsilon
TF_RETURN_IF_ERROR(
HandleGradAndIndicesInputs(c, sparse, 8 /* grad_idx */, &s));
if (c->num_outputs() > 0) {
c->set_output(0, s);
}
return Status::OK();
}
REGISTER_OP("ApplyRMSProp")
.Input("var: Ref(T)")
.Input("ms: Ref(T)")
.Input("mom: Ref(T)")
.Input("lr: T")
.Input("rho: T")
.Input("momentum: T")
.Input("epsilon: T")
.Input("grad: T")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyRMSPropShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the RMSProp algorithm.
Note that in dense implementation of this algorithm, ms and mom will
update even if the grad is zero, but in this sparse implementation, ms
and mom will not update in iterations during which the grad is zero.
mean_square = decay * mean_square + (1-decay) * gradient ** 2
Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
ms <- rho * ms_{t-1} + (1-rho) * grad * grad
mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
var <- var - mom
var: Should be from a Variable().
ms: Should be from a Variable().
mom: Should be from a Variable().
lr: Scaling factor. Must be a scalar.
epsilon: Ridge term. Must be a scalar.
rho: Decay rate. Must be a scalar.
grad: The gradient.
out: Same as "var".
use_locking: If `True`, updating of the var, ms, and mom tensors is protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
)doc");
REGISTER_OP("ApplyCenteredRMSProp")
.Input("var: Ref(T)")
.Input("mg: Ref(T)")
.Input("ms: Ref(T)")
.Input("mom: Ref(T)")
.Input("lr: T")
.Input("rho: T")
.Input("momentum: T")
.Input("epsilon: T")
.Input("grad: T")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyCenteredRMSPropShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the centered RMSProp algorithm.
The centered RMSProp algorithm uses an estimate of the centered second moment
(i.e., the variance) for normalization, as opposed to regular RMSProp, which
uses the (uncentered) second moment. This often helps with training, but is
slightly more expensive in terms of computation and memory.
Note that in dense implementation of this algorithm, mg, ms, and mom will
update even if the grad is zero, but in this sparse implementation, mg, ms,
and mom will not update in iterations during which the grad is zero.
mean_square = decay * mean_square + (1-decay) * gradient ** 2
mean_grad = decay * mean_grad + (1-decay) * gradient
Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
mg <- rho * mg_{t-1} + (1-rho) * grad
ms <- rho * ms_{t-1} + (1-rho) * grad * grad
mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)
var <- var - mom
var: Should be from a Variable().
mg: Should be from a Variable().
ms: Should be from a Variable().
mom: Should be from a Variable().
lr: Scaling factor. Must be a scalar.
epsilon: Ridge term. Must be a scalar.
rho: Decay rate. Must be a scalar.
grad: The gradient.
out: Same as "var".
use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
protected by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
)doc");
REGISTER_OP("SparseApplyRMSProp")
.Input("var: Ref(T)")
.Input("ms: Ref(T)")
.Input("mom: Ref(T)")
.Input("lr: T")
.Input("rho: T")
.Input("momentum: T")
.Input("epsilon: T")
.Input("grad: T")
.Input("indices: Tindices")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyRMSPropShapeFn(c, true /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the RMSProp algorithm.
Note that in dense implementation of this algorithm, ms and mom will
update even if the grad is zero, but in this sparse implementation, ms
and mom will not update in iterations during which the grad is zero.
mean_square = decay * mean_square + (1-decay) * gradient ** 2
Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
ms <- rho * ms_{t-1} + (1-rho) * grad * grad
mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
var <- var - mom
var: Should be from a Variable().
ms: Should be from a Variable().
mom: Should be from a Variable().
lr: Scaling factor. Must be a scalar.
epsilon: Ridge term. Must be a scalar.
rho: Decay rate. Must be a scalar.
grad: The gradient.
indices: A vector of indices into the first dimension of var, ms and mom.
out: Same as "var".
use_locking: If `True`, updating of the var, ms, and mom tensors is protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
)doc");
REGISTER_OP("SparseApplyCenteredRMSProp")
.Input("var: Ref(T)")
.Input("mg: Ref(T)")
.Input("ms: Ref(T)")
.Input("mom: Ref(T)")
.Input("lr: T")
.Input("rho: T")
.Input("momentum: T")
.Input("epsilon: T")
.Input("grad: T")
.Input("indices: Tindices")
.Output("out: Ref(T)")
.Attr("T: numbertype")
.Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyCenteredRMSPropShapeFn(c, true /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the centered RMSProp algorithm.
The centered RMSProp algorithm uses an estimate of the centered second moment
(i.e., the variance) for normalization, as opposed to regular RMSProp, which
uses the (uncentered) second moment. This often helps with training, but is
slightly more expensive in terms of computation and memory.
Note that in dense implementation of this algorithm, mg, ms, and mom will
update even if the grad is zero, but in this sparse implementation, mg, ms,
and mom will not update in iterations during which the grad is zero.
mean_square = decay * mean_square + (1-decay) * gradient ** 2
mean_grad = decay * mean_grad + (1-decay) * gradient
Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
ms <- rho * ms_{t-1} + (1-rho) * grad * grad
mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
var <- var - mom
var: Should be from a Variable().
mg: Should be from a Variable().
ms: Should be from a Variable().
mom: Should be from a Variable().
lr: Scaling factor. Must be a scalar.
epsilon: Ridge term. Must be a scalar.
rho: Decay rate. Must be a scalar.
grad: The gradient.
indices: A vector of indices into the first dimension of var, ms and mom.
out: Same as "var".
use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
protected by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
)doc");
REGISTER_OP("ResourceApplyRMSProp")
.Input("var: resource")
.Input("ms: resource")
.Input("mom: resource")
.Input("lr: T")
.Input("rho: T")
.Input("momentum: T")
.Input("epsilon: T")
.Input("grad: T")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyRMSPropShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the RMSProp algorithm.
Note that in dense implementation of this algorithm, ms and mom will
update even if the grad is zero, but in this sparse implementation, ms
and mom will not update in iterations during which the grad is zero.
mean_square = decay * mean_square + (1-decay) * gradient ** 2
Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
ms <- rho * ms_{t-1} + (1-rho) * grad * grad
mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
var <- var - mom
var: Should be from a Variable().
ms: Should be from a Variable().
mom: Should be from a Variable().
lr: Scaling factor. Must be a scalar.
epsilon: Ridge term. Must be a scalar.
rho: Decay rate. Must be a scalar.
grad: The gradient.
use_locking: If `True`, updating of the var, ms, and mom tensors is protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
)doc");
REGISTER_OP("ResourceApplyCenteredRMSProp")
.Input("var: resource")
.Input("mg: resource")
.Input("ms: resource")
.Input("mom: resource")
.Input("lr: T")
.Input("rho: T")
.Input("momentum: T")
.Input("epsilon: T")
.Input("grad: T")
.Attr("T: numbertype")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyCenteredRMSPropShapeFn(c, false /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the centered RMSProp algorithm.
The centered RMSProp algorithm uses an estimate of the centered second moment
(i.e., the variance) for normalization, as opposed to regular RMSProp, which
uses the (uncentered) second moment. This often helps with training, but is
slightly more expensive in terms of computation and memory.
Note that in dense implementation of this algorithm, mg, ms, and mom will
update even if the grad is zero, but in this sparse implementation, mg, ms,
and mom will not update in iterations during which the grad is zero.
mean_square = decay * mean_square + (1-decay) * gradient ** 2
mean_grad = decay * mean_grad + (1-decay) * gradient
Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
mg <- rho * mg_{t-1} + (1-rho) * grad
ms <- rho * ms_{t-1} + (1-rho) * grad * grad
mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)
var <- var - mom
var: Should be from a Variable().
mg: Should be from a Variable().
ms: Should be from a Variable().
mom: Should be from a Variable().
lr: Scaling factor. Must be a scalar.
epsilon: Ridge term. Must be a scalar.
rho: Decay rate. Must be a scalar.
grad: The gradient.
use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
protected by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
)doc");
REGISTER_OP("ResourceSparseApplyRMSProp")
.Input("var: resource")
.Input("ms: resource")
.Input("mom: resource")
.Input("lr: T")
.Input("rho: T")
.Input("momentum: T")
.Input("epsilon: T")
.Input("grad: T")
.Input("indices: Tindices")
.Attr("T: numbertype")
.Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyRMSPropShapeFn(c, true /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the RMSProp algorithm.
Note that in dense implementation of this algorithm, ms and mom will
update even if the grad is zero, but in this sparse implementation, ms
and mom will not update in iterations during which the grad is zero.
mean_square = decay * mean_square + (1-decay) * gradient ** 2
Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
ms <- rho * ms_{t-1} + (1-rho) * grad * grad
mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
var <- var - mom
var: Should be from a Variable().
ms: Should be from a Variable().
mom: Should be from a Variable().
lr: Scaling factor. Must be a scalar.
epsilon: Ridge term. Must be a scalar.
rho: Decay rate. Must be a scalar.
grad: The gradient.
indices: A vector of indices into the first dimension of var, ms and mom.
use_locking: If `True`, updating of the var, ms, and mom tensors is protected
by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
)doc");
REGISTER_OP("ResourceSparseApplyCenteredRMSProp")
.Input("var: resource")
.Input("mg: resource")
.Input("ms: resource")
.Input("mom: resource")
.Input("lr: T")
.Input("rho: T")
.Input("momentum: T")
.Input("epsilon: T")
.Input("grad: T")
.Input("indices: Tindices")
.Attr("T: numbertype")
.Attr("Tindices: {int32, int64}")
.Attr("use_locking: bool = false")
.SetShapeFn([](InferenceContext* c) {
return ApplyCenteredRMSPropShapeFn(c, true /* sparse */);
})
.Doc(R"doc(
Update '*var' according to the centered RMSProp algorithm.
The centered RMSProp algorithm uses an estimate of the centered second moment
(i.e., the variance) for normalization, as opposed to regular RMSProp, which
uses the (uncentered) second moment. This often helps with training, but is
slightly more expensive in terms of computation and memory.
Note that in dense implementation of this algorithm, mg, ms, and mom will
update even if the grad is zero, but in this sparse implementation, mg, ms,
and mom will not update in iterations during which the grad is zero.
mean_square = decay * mean_square + (1-decay) * gradient ** 2
mean_grad = decay * mean_grad + (1-decay) * gradient
Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
ms <- rho * ms_{t-1} + (1-rho) * grad * grad
mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
var <- var - mom
var: Should be from a Variable().
mg: Should be from a Variable().
ms: Should be from a Variable().
mom: Should be from a Variable().
lr: Scaling factor. Must be a scalar.
epsilon: Ridge term. Must be a scalar.
rho: Decay rate. Must be a scalar.
grad: The gradient.
indices: A vector of indices into the first dimension of var, ms and mom.
use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
protected by a lock; otherwise the behavior is undefined, but may exhibit less
contention.
)doc");
} // namespace tensorflow

File diff suppressed because it is too large Load Diff

View File

@ -1,64 +0,0 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef THIRD_PARTY_TENSORFLOW_CORE_OPS_TRAINING_OPS_H_
#define THIRD_PARTY_TENSORFLOW_CORE_OPS_TRAINING_OPS_H_
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/ops/training_ops.h"
namespace tensorflow {
using shape_inference::DimensionHandle;
using shape_inference::InferenceContext;
using shape_inference::ShapeHandle;
static ShapeHandle ShapeOrHandleShape(InferenceContext* c, int input) {
auto* handle_data = c->input_handle_shapes_and_types(input);
if (handle_data != nullptr && !handle_data->empty() &&
(*handle_data)[0].dtype != DT_INVALID) {
return (*handle_data)[0].shape;
}
return c->input(input);
}
// Handle the gradient and, if <sparse>, indices inputs.
// <s> is an input+output parameter, containing the current known input shape to
// the gradient.
static Status HandleGradAndIndicesInputs(InferenceContext* c, bool sparse,
int grad_idx, ShapeHandle* s) {
ShapeHandle grad = ShapeOrHandleShape(c, grad_idx);
if (!sparse) {
TF_RETURN_IF_ERROR(c->Merge(*s, grad, s));
return Status::OK();
}
// Indices is a vector where indices.dim[0].rank == grad[0].rank.
ShapeHandle indices;
TF_RETURN_IF_ERROR(c->WithRank(c->input(grad_idx + 1), 1, &indices));
DimensionHandle unused;
TF_RETURN_IF_ERROR(c->Merge(c->Dim(indices, 0), c->Dim(grad, 0), &unused));
// Trailing part of grad matches trailing part of *s.
ShapeHandle grad_unknown_first;
TF_RETURN_IF_ERROR(
c->ReplaceDim(grad, 0, c->UnknownDim(), &grad_unknown_first));
TF_RETURN_IF_ERROR(c->Merge(*s, grad_unknown_first, s));
return Status::OK();
}
} // namespace tensorflow
#endif // THIRD_PARTY_TENSORFLOW_CORE_OPS_TRAINING_OPS_H_

View File

@ -65,16 +65,7 @@ tf_java_op_gen_srcjar(
"sparse_ops",
"state_ops",
"string_ops",
"adadelta_ops",
"adagrad_da_ops",
"adagrad_ops",
"adam_ops",
"ftrl_ops",
"momentum_ops",
"gradient_descent_ops",
"proximal_adagrad_ops",
"proximal_gradient_descent_ops",
"rms_prop_ops",
"training_ops",
"user_ops",
],
)

View File

@ -1363,53 +1363,8 @@ tf_gen_op_wrapper_private_py(
)
tf_gen_op_wrapper_private_py(
name = "adagrad_ops_gen",
out = "training/gen_adagrad_ops.py",
)
tf_gen_op_wrapper_private_py(
name = "adagrad_da_ops_gen",
out = "training/gen_adagrad_da_ops.py",
)
tf_gen_op_wrapper_private_py(
name = "adadelta_ops_gen",
out = "training/gen_adadelta_ops.py",
)
tf_gen_op_wrapper_private_py(
name = "adam_ops_gen",
out = "training/gen_adam_ops.py",
)
tf_gen_op_wrapper_private_py(
name = "ftrl_ops_gen",
out = "training/gen_ftrl_ops.py",
)
tf_gen_op_wrapper_private_py(
name = "gradient_descent_ops_gen",
out = "training/gen_gradient_descent_ops.py",
)
tf_gen_op_wrapper_private_py(
name = "momentum_ops_gen",
out = "training/gen_momentum_ops.py",
)
tf_gen_op_wrapper_private_py(
name = "proximal_adagrad_ops_gen",
out = "training/gen_proximal_adagrad_ops.py",
)
tf_gen_op_wrapper_private_py(
name = "proximal_gradient_descent_ops_gen",
out = "training/gen_proximal_gradient_descent_ops.py",
)
tf_gen_op_wrapper_private_py(
name = "rms_prop_ops_gen",
out = "training/gen_rms_prop_ops.py",
name = "training_ops_gen",
out = "training/gen_training_ops.py",
)
py_library(
@ -2640,10 +2595,6 @@ py_library(
),
srcs_version = "PY2AND3",
deps = [
":adadelta_ops_gen",
":adagrad_da_ops_gen",
":adagrad_ops_gen",
":adam_ops_gen",
":array_ops",
":checkpoint_ops_gen",
":client",
@ -2652,8 +2603,6 @@ py_library(
":errors",
":framework",
":framework_for_generated_wrappers",
":ftrl_ops_gen",
":gradient_descent_ops_gen",
":gradients",
":init_ops",
":io_ops",
@ -2661,21 +2610,18 @@ py_library(
":lib",
":lookup_ops",
":math_ops",
":momentum_ops_gen",
":platform",
":protos_all_py",
":proximal_adagrad_ops_gen",
":proximal_gradient_descent_ops_gen",
":pywrap_tensorflow",
":random_ops",
":resource_variable_ops",
":resources",
":rms_prop_ops_gen",
":sdca_ops",
":sparse_ops",
":state_ops",
":string_ops",
":summary",
":training_ops_gen",
":util",
":variable_scope",
":variables",

View File

@ -19,16 +19,8 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.python.training import gen_training_ops
# go/tf-wildcard-import
# pylint: disable=wildcard-import
from tensorflow.python.training.gen_adadelta_ops import *
from tensorflow.python.training.gen_adagrad_da_ops import *
from tensorflow.python.training.gen_adagrad_ops import *
from tensorflow.python.training.gen_adam_ops import *
from tensorflow.python.training.gen_ftrl_ops import *
from tensorflow.python.training.gen_gradient_descent_ops import *
from tensorflow.python.training.gen_momentum_ops import *
from tensorflow.python.training.gen_proximal_adagrad_ops import *
from tensorflow.python.training.gen_proximal_gradient_descent_ops import *
from tensorflow.python.training.gen_rms_prop_ops import *
from tensorflow.python.training.gen_training_ops import *
# pylint: enable=wildcard-import

View File

@ -196,7 +196,7 @@ def tf_opts_nortti_if_android():
# Given a list of "op_lib_names" (a list of files in the ops directory
# without their .cc extensions), generate a library for that file.
def tf_gen_op_libs(op_lib_names, deps=None, extra_srcs=[]):
def tf_gen_op_libs(op_lib_names, deps=None):
# Make library out of each op so it can also be used to generate wrappers
# for various languages.
if not deps:
@ -205,7 +205,7 @@ def tf_gen_op_libs(op_lib_names, deps=None, extra_srcs=[]):
native.cc_library(
name=n + "_op_lib",
copts=tf_copts(),
srcs=extra_srcs + ["ops/" + n + ".cc"],
srcs=["ops/" + n + ".cc"],
deps=deps + [clean_dep("//tensorflow/core:framework")],
visibility=["//visibility:public"],
alwayslink=1,