Automated g4 rollback of changelist 168576795

PiperOrigin-RevId: 168606478
2017-09-13 15:40:28 -07:00 · 2017-09-13 15:40:28 -07:00 · f95b1cf115
commit f95b1cf115
parent 78f1dd5031
20 changed files with 1812 additions and 2172 deletions
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@ -416,16 +416,7 @@ tf_gen_op_wrappers_cc(
        "sparse_ops",
        "state_ops",
        "string_ops",
-        "adadelta_ops",
-        "adagrad_da_ops",
-        "adagrad_ops",
-        "adam_ops",
-        "ftrl_ops",
-        "momentum_ops",
-        "gradient_descent_ops",
-        "proximal_adagrad_ops",
-        "proximal_gradient_descent_ops",
-        "rms_prop_ops",
+        "training_ops",
        "user_ops",
    ],
    other_hdrs = [
--- a/tensorflow/cc/ops/standard_ops.h
+++ b/tensorflow/cc/ops/standard_ops.h
@ -16,34 +16,25 @@ limitations under the License.
 #ifndef THIRD_PARTY_TENSORFLOW_CC_OPS_STANDARD_OPS_H_
 #define THIRD_PARTY_TENSORFLOW_CC_OPS_STANDARD_OPS_H_

-#include "tensorflow/cc/ops/adadelta_ops.h"
-#include "tensorflow/cc/ops/adagrad_da_ops.h"
-#include "tensorflow/cc/ops/adagrad_ops.h"
-#include "tensorflow/cc/ops/adam_ops.h"
 #include "tensorflow/cc/ops/array_ops.h"
 #include "tensorflow/cc/ops/candidate_sampling_ops.h"
 #include "tensorflow/cc/ops/const_op.h"
 #include "tensorflow/cc/ops/control_flow_ops.h"
 #include "tensorflow/cc/ops/data_flow_ops.h"
-#include "tensorflow/cc/ops/ftrl_ops.h"
-#include "tensorflow/cc/ops/gradient_descent_ops.h"
 #include "tensorflow/cc/ops/image_ops.h"
 #include "tensorflow/cc/ops/io_ops.h"
 #include "tensorflow/cc/ops/linalg_ops.h"
 #include "tensorflow/cc/ops/logging_ops.h"
 #include "tensorflow/cc/ops/lookup_ops.h"
 #include "tensorflow/cc/ops/math_ops.h"
-#include "tensorflow/cc/ops/momentum_ops.h"
 #include "tensorflow/cc/ops/nn_ops.h"
 #include "tensorflow/cc/ops/no_op.h"
 #include "tensorflow/cc/ops/parsing_ops.h"
-#include "tensorflow/cc/ops/proximal_adagrad_ops.h"
-#include "tensorflow/cc/ops/proximal_gradient_descent_ops.h"
 #include "tensorflow/cc/ops/random_ops.h"
-#include "tensorflow/cc/ops/rms_prop_ops.h"
 #include "tensorflow/cc/ops/sparse_ops.h"
 #include "tensorflow/cc/ops/state_ops.h"
 #include "tensorflow/cc/ops/string_ops.h"
+#include "tensorflow/cc/ops/training_ops.h"
 #include "tensorflow/cc/ops/user_ops.h"

 #endif  // THIRD_PARTY_TENSORFLOW_CC_OPS_STANDARD_OPS_H_
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@ -535,7 +535,6 @@ cc_library(

 # Generates library per group of ops.
 tf_gen_op_libs(
-    extra_srcs = ["ops/training_ops.h"],
    op_lib_names = [
        "bitwise_ops",
        "candidate_sampling_ops",
@ -568,16 +567,7 @@ tf_gen_op_libs(
        "stateless_random_ops",
        "string_ops",
        "summary_ops",
-        "adadelta_ops",
-        "adagrad_da_ops",
-        "adagrad_ops",
-        "adam_ops",
-        "ftrl_ops",
-        "momentum_ops",
-        "gradient_descent_ops",
-        "proximal_adagrad_ops",
-        "proximal_gradient_descent_ops",
-        "rms_prop_ops",
+        "training_ops",
    ],
 )

@ -655,16 +645,7 @@ cc_library(
        ":state_ops_op_lib",
        ":stateless_random_ops_op_lib",
        ":string_ops_op_lib",
-        ":adadelta_ops_op_lib",
-        ":adagrad_da_ops_op_lib",
-        ":adagrad_ops_op_lib",
-        ":adam_ops_op_lib",
-        ":ftrl_ops_op_lib",
-        ":momentum_ops_op_lib",
-        ":gradient_descent_ops_op_lib",
-        ":proximal_adagrad_ops_op_lib",
-        ":proximal_gradient_descent_ops_op_lib",
-        ":rms_prop_ops_op_lib",
+        ":training_ops_op_lib",
        ":user_ops_op_lib",
        ":word2vec_ops",
    ] + tf_additional_cloud_op_deps(),
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@ -3888,18 +3888,9 @@ tf_kernel_library(
        ":bounds_check",
        ":training_op_helpers",
        ":variable_ops",
-        "//tensorflow/core:adadelta_ops_op_lib",
-        "//tensorflow/core:adagrad_da_ops_op_lib",
-        "//tensorflow/core:adagrad_ops_op_lib",
-        "//tensorflow/core:adam_ops_op_lib",
        "//tensorflow/core:framework",
-        "//tensorflow/core:ftrl_ops_op_lib",
-        "//tensorflow/core:gradient_descent_ops_op_lib",
        "//tensorflow/core:lib",
-        "//tensorflow/core:momentum_ops_op_lib",
-        "//tensorflow/core:proximal_adagrad_ops_op_lib",
-        "//tensorflow/core:proximal_gradient_descent_ops_op_lib",
-        "//tensorflow/core:rms_prop_ops_op_lib",
+        "//tensorflow/core:training_ops_op_lib",
        "//third_party/eigen3",
    ],
 )
--- a/tensorflow/core/ops/adadelta_ops.cc
+++ b/tensorflow/core/ops/adadelta_ops.cc
@ -1,163 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/ops/training_ops.h"
-
-namespace tensorflow {
-
-static Status ApplyAdadeltaShapeFn(InferenceContext* c, bool sparse) {
-  ShapeHandle unused;
-  ShapeHandle s = ShapeOrHandleShape(c, 0);                       // var
-  TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s));  // accum
-  TF_RETURN_IF_ERROR(
-      c->Merge(s, ShapeOrHandleShape(c, 2), &s));            // accum update
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));  // lr
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));  // rho
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));  // epsilon
-  TF_RETURN_IF_ERROR(
-      HandleGradAndIndicesInputs(c, sparse, 6 /* grad_idx */, &s));
-  if (c->num_outputs() > 0) {
-    c->set_output(0, s);
-  }
-  return Status::OK();
-}
-
-REGISTER_OP("ApplyAdadelta")
-    .Input("var: Ref(T)")
-    .Input("accum: Ref(T)")
-    .Input("accum_update: Ref(T)")
-    .Input("lr: T")
-    .Input("rho: T")
-    .Input("epsilon: T")
-    .Input("grad: T")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyAdadeltaShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the adadelta scheme.
-
-accum = rho() * accum + (1 - rho()) * grad.square();
-update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad;
-update_accum = rho() * update_accum + (1 - rho()) * update.square();
-var -= update;
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-accum_update: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-rho: Decay factor. Must be a scalar.
-epsilon: Constant factor. Must be a scalar.
-grad: The gradient.
-out: Same as "var".
-use_locking: If True, updating of the var, accum and update_accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
-
-REGISTER_OP("SparseApplyAdadelta")
-    .Input("var: Ref(T)")
-    .Input("accum: Ref(T)")
-    .Input("accum_update: Ref(T)")
-    .Input("lr: T")
-    .Input("rho: T")
-    .Input("epsilon: T")
-    .Input("grad: T")
-    .Input("indices: Tindices")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("Tindices: {int32, int64}")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyAdadeltaShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-var: Should be from a Variable().
-accum: Should be from a Variable().
-accum_update:: Should be from a Variable().
-lr: Learning rate. Must be a scalar.
-rho: Decay factor. Must be a scalar.
-epsilon: Constant factor. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-out: Same as "var".
-use_locking: If True, updating of the var and accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
-
-REGISTER_OP("ResourceApplyAdadelta")
-    .Input("var: resource")
-    .Input("accum: resource")
-    .Input("accum_update: resource")
-    .Input("lr: T")
-    .Input("rho: T")
-    .Input("epsilon: T")
-    .Input("grad: T")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyAdadeltaShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the adadelta scheme.
-
-accum = rho() * accum + (1 - rho()) * grad.square();
-update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad;
-update_accum = rho() * update_accum + (1 - rho()) * update.square();
-var -= update;
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-accum_update: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-rho: Decay factor. Must be a scalar.
-epsilon: Constant factor. Must be a scalar.
-grad: The gradient.
-use_locking: If True, updating of the var, accum and update_accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
-
-REGISTER_OP("ResourceSparseApplyAdadelta")
-    .Input("var: resource")
-    .Input("accum: resource")
-    .Input("accum_update: resource")
-    .Input("lr: T")
-    .Input("rho: T")
-    .Input("epsilon: T")
-    .Input("grad: T")
-    .Input("indices: Tindices")
-    .Attr("T: numbertype")
-    .Attr("Tindices: {int32, int64}")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyAdadeltaShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-var: Should be from a Variable().
-accum: Should be from a Variable().
-accum_update:: Should be from a Variable().
-lr: Learning rate. Must be a scalar.
-rho: Decay factor. Must be a scalar.
-epsilon: Constant factor. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-use_locking: If True, updating of the var and accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
-
-}  // namespace tensorflow
--- a/tensorflow/core/ops/adagrad_da_ops.cc
+++ b/tensorflow/core/ops/adagrad_da_ops.cc
@ -1,168 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/ops/training_ops.h"
-
-namespace tensorflow {
-
-static Status ApplyAdagradDAShapeFn(InferenceContext* c, bool sparse) {
-  ShapeHandle unused;
-  ShapeHandle s = ShapeOrHandleShape(c, 0);  // var
-  TF_RETURN_IF_ERROR(
-      c->Merge(s, ShapeOrHandleShape(c, 1), &s));  // grad_accumulator
-  TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 2),
-                              &s));  // gradient_squared_accumulator
-  TF_RETURN_IF_ERROR(
-      HandleGradAndIndicesInputs(c, sparse, 3 /* grad_idx */, &s));
-  int idx = sparse ? 5 : 4;
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused));  // lr
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused));  // l1
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused));  // l2
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused));  // global step
-  if (c->num_outputs() > 0) {
-    c->set_output(0, s);
-  }
-  return Status::OK();
-}
-
-REGISTER_OP("ApplyAdagradDA")
-    .Input("var: Ref(T)")
-    .Input("gradient_accumulator: Ref(T)")
-    .Input("gradient_squared_accumulator: Ref(T)")
-    .Input("grad: T")
-    .Input("lr: T")
-    .Input("l1: T")
-    .Input("l2: T")
-    .Input("global_step: int64")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyAdagradDAShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the proximal adagrad scheme.
-
-var: Should be from a Variable().
-gradient_accumulator: Should be from a Variable().
-gradient_squared_accumulator: Should be from a Variable().
-grad: The gradient.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-global_step: Training step number. Must be a scalar.
-out: Same as "var".
-use_locking: If True, updating of the var and accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
-
-REGISTER_OP("SparseApplyAdagradDA")
-    .Input("var: Ref(T)")
-    .Input("gradient_accumulator: Ref(T)")
-    .Input("gradient_squared_accumulator: Ref(T)")
-    .Input("grad: T")
-    .Input("indices: Tindices")
-    .Input("lr: T")
-    .Input("l1: T")
-    .Input("l2: T")
-    .Input("global_step: int64")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("Tindices: {int32, int64}")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyAdagradDAShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update entries in '*var' and '*accum' according to the proximal adagrad scheme.
-
-var: Should be from a Variable().
-gradient_accumulator: Should be from a Variable().
-gradient_squared_accumulator: Should be from a Variable().
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-lr: Learning rate. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-global_step: Training step number. Must be a scalar.
-out: Same as "var".
-use_locking: If True, updating of the var and accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
-
-REGISTER_OP("ResourceApplyAdagradDA")
-    .Input("var: resource")
-    .Input("gradient_accumulator: resource")
-    .Input("gradient_squared_accumulator: resource")
-    .Input("grad: T")
-    .Input("lr: T")
-    .Input("l1: T")
-    .Input("l2: T")
-    .Input("global_step: int64")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyAdagradDAShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the proximal adagrad scheme.
-
-var: Should be from a Variable().
-gradient_accumulator: Should be from a Variable().
-gradient_squared_accumulator: Should be from a Variable().
-grad: The gradient.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-global_step: Training step number. Must be a scalar.
-use_locking: If True, updating of the var and accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
-
-REGISTER_OP("ResourceSparseApplyAdagradDA")
-    .Input("var: resource")
-    .Input("gradient_accumulator: resource")
-    .Input("gradient_squared_accumulator: resource")
-    .Input("grad: T")
-    .Input("indices: Tindices")
-    .Input("lr: T")
-    .Input("l1: T")
-    .Input("l2: T")
-    .Input("global_step: int64")
-    .Attr("T: numbertype")
-    .Attr("Tindices: {int32, int64}")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyAdagradDAShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update entries in '*var' and '*accum' according to the proximal adagrad scheme.
-
-var: Should be from a Variable().
-gradient_accumulator: Should be from a Variable().
-gradient_squared_accumulator: Should be from a Variable().
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-lr: Learning rate. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-global_step: Training step number. Must be a scalar.
-use_locking: If True, updating of the var and accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
-
-}  // namespace tensorflow
--- a/tensorflow/core/ops/adagrad_ops.cc
+++ b/tensorflow/core/ops/adagrad_ops.cc
@ -1,147 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/ops/training_ops.h"
-
-namespace tensorflow {
-
-static Status ApplyAdagradShapeFn(InferenceContext* c, bool sparse) {
-  ShapeHandle unused;
-  ShapeHandle s = ShapeOrHandleShape(c, 0);                       // var
-  TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s));  // accum
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));  // lr
-  TF_RETURN_IF_ERROR(
-      HandleGradAndIndicesInputs(c, sparse, 3 /* grad_idx */, &s));
-  if (c->num_outputs() > 0) {
-    c->set_output(0, s);
-  }
-  return Status::OK();
-}
-
-REGISTER_OP("ApplyAdagrad")
-    .Input("var: Ref(T)")
-    .Input("accum: Ref(T)")
-    .Input("lr: T")
-    .Input("grad: T")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyAdagradShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the adagrad scheme.
-
-accum += grad * grad
-var -= lr * grad * (1 / sqrt(accum))
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-grad: The gradient.
-out: Same as "var".
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
-
-REGISTER_OP("ResourceApplyAdagrad")
-    .Input("var: resource")
-    .Input("accum: resource")
-    .Input("lr: T")
-    .Input("grad: T")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyAdagradShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the adagrad scheme.
-
-accum += grad * grad
-var -= lr * grad * (1 / sqrt(accum))
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-grad: The gradient.
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
-
-REGISTER_OP("SparseApplyAdagrad")
-    .Input("var: Ref(T)")
-    .Input("accum: Ref(T)")
-    .Input("lr: T")
-    .Input("grad: T")
-    .Input("indices: Tindices")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("Tindices: {int32, int64}")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyAdagradShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update relevant entries in '*var' and '*accum' according to the adagrad scheme.
-
-That is for rows we have grad for, we update var and accum as follows:
-accum += grad * grad
-var -= lr * grad * (1 / sqrt(accum))
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-lr: Learning rate. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-out: Same as "var".
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
-
-REGISTER_OP("ResourceSparseApplyAdagrad")
-    .Input("var: resource")
-    .Input("accum: resource")
-    .Input("lr: T")
-    .Input("grad: T")
-    .Input("indices: Tindices")
-    .Attr("T: numbertype")
-    .Attr("Tindices: {int32, int64}")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyAdagradShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update relevant entries in '*var' and '*accum' according to the adagrad scheme.
-
-That is for rows we have grad for, we update var and accum as follows:
-accum += grad * grad
-var -= lr * grad * (1 / sqrt(accum))
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-lr: Learning rate. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
-
-}  // namespace tensorflow
--- a/tensorflow/core/ops/adam_ops.cc
+++ b/tensorflow/core/ops/adam_ops.cc
@ -1,125 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/ops/training_ops.h"
-
-namespace tensorflow {
-
-static Status ApplyAdamShapeFn(InferenceContext* c, bool sparse) {
-  ShapeHandle unused;
-  ShapeHandle s = ShapeOrHandleShape(c, 0);                       // var
-  TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s));  // m
-  TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 2), &s));  // v
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));  // beta1_power
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));  // beta2_power
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));  // lr
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(6), 0, &unused));  // beta1
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(7), 0, &unused));  // beta2
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(8), 0, &unused));  // epsilon
-  TF_RETURN_IF_ERROR(
-      HandleGradAndIndicesInputs(c, sparse, 9 /* grad_idx */, &s));
-  if (c->num_outputs() > 0) {
-    c->set_output(0, s);
-  }
-  return Status::OK();
-}
-
-REGISTER_OP("ApplyAdam")
-    .Input("var: Ref(T)")
-    .Input("m: Ref(T)")
-    .Input("v: Ref(T)")
-    .Input("beta1_power: T")
-    .Input("beta2_power: T")
-    .Input("lr: T")
-    .Input("beta1: T")
-    .Input("beta2: T")
-    .Input("epsilon: T")
-    .Input("grad: T")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .Attr("use_nesterov: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyAdamShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the Adam algorithm.
-
-lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
-m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t
-v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t
-variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
-
-var: Should be from a Variable().
-m: Should be from a Variable().
-v: Should be from a Variable().
-beta1_power: Must be a scalar.
-beta2_power: Must be a scalar.
-lr: Scaling factor. Must be a scalar.
-beta1: Momentum factor. Must be a scalar.
-beta2: Momentum factor. Must be a scalar.
-epsilon: Ridge term. Must be a scalar.
-grad: The gradient.
-out: Same as "var".
-use_locking: If `True`, updating of the var, m, and v tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-use_nesterov: If `True`, uses the nesterov update.
-)doc");
-
-REGISTER_OP("ResourceApplyAdam")
-    .Input("var: resource")
-    .Input("m: resource")
-    .Input("v: resource")
-    .Input("beta1_power: T")
-    .Input("beta2_power: T")
-    .Input("lr: T")
-    .Input("beta1: T")
-    .Input("beta2: T")
-    .Input("epsilon: T")
-    .Input("grad: T")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .Attr("use_nesterov: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyAdamShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the Adam algorithm.
-
-lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t)
-m_t <- beta1 * m_{t-1} + (1 - beta1) * g_t
-v_t <- beta2 * v_{t-1} + (1 - beta2) * g_t * g_t
-variable <- variable - lr_t * m_t / (sqrt(v_t) + epsilon)
-
-var: Should be from a Variable().
-m: Should be from a Variable().
-v: Should be from a Variable().
-beta1_power: Must be a scalar.
-beta2_power: Must be a scalar.
-lr: Scaling factor. Must be a scalar.
-beta1: Momentum factor. Must be a scalar.
-beta2: Momentum factor. Must be a scalar.
-epsilon: Ridge term. Must be a scalar.
-grad: The gradient.
-use_locking: If `True`, updating of the var, m, and v tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-use_nesterov: If `True`, uses the nesterov update.
-)doc");
-
-}  // namespace tensorflow
--- a/tensorflow/core/ops/ftrl_ops.cc
+++ b/tensorflow/core/ops/ftrl_ops.cc
@ -1,368 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/ops/training_ops.h"
-
-namespace tensorflow {
-
-static Status ApplyFtrlShapeFn(InferenceContext* c, bool sparse) {
-  ShapeHandle unused;
-  ShapeHandle s = ShapeOrHandleShape(c, 0);                       // var
-  TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s));  // accum
-  TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 2), &s));  // linear
-  TF_RETURN_IF_ERROR(
-      HandleGradAndIndicesInputs(c, sparse, 3 /* grad_idx */, &s));
-  int idx = sparse ? 5 : 4;
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused));  // lr
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused));  // l1
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused));  // l2
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused));  // lr_power
-  if (c->num_outputs() > 0) {
-    c->set_output(0, s);
-  }
-  return Status::OK();
-}
-
-REGISTER_OP("ApplyFtrl")
-    .Input("var: Ref(T)")
-    .Input("accum: Ref(T)")
-    .Input("linear: Ref(T)")
-    .Input("grad: T")
-    .Input("lr: T")
-    .Input("l1: T")
-    .Input("l2: T")
-    .Input("lr_power: T")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyFtrlShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the Ftrl-proximal scheme.
-
-accum_new = accum + grad * grad
-linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-accum = accum_new
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-linear: Should be from a Variable().
-grad: The gradient.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regulariation. Must be a scalar.
-l2: L2 regulariation. Must be a scalar.
-lr_power: Scaling factor. Must be a scalar.
-out: Same as "var".
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
-
-REGISTER_OP("SparseApplyFtrl")
-    .Input("var: Ref(T)")
-    .Input("accum: Ref(T)")
-    .Input("linear: Ref(T)")
-    .Input("grad: T")
-    .Input("indices: Tindices")
-    .Input("lr: T")
-    .Input("l1: T")
-    .Input("l2: T")
-    .Input("lr_power: T")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("Tindices: {int32, int64}")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyFtrlShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update relevant entries in '*var' according to the Ftrl-proximal scheme.
-
-That is for rows we have grad for, we update var, accum and linear as follows:
-accum_new = accum + grad * grad
-linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-accum = accum_new
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-linear: Should be from a Variable().
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-lr_power: Scaling factor. Must be a scalar.
-out: Same as "var".
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
-
-REGISTER_OP("ResourceApplyFtrl")
-    .Input("var: resource")
-    .Input("accum: resource")
-    .Input("linear: resource")
-    .Input("grad: T")
-    .Input("lr: T")
-    .Input("l1: T")
-    .Input("l2: T")
-    .Input("lr_power: T")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyFtrlShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the Ftrl-proximal scheme.
-
-accum_new = accum + grad * grad
-linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-accum = accum_new
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-linear: Should be from a Variable().
-grad: The gradient.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regulariation. Must be a scalar.
-l2: L2 regulariation. Must be a scalar.
-lr_power: Scaling factor. Must be a scalar.
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
-
-REGISTER_OP("ResourceSparseApplyFtrl")
-    .Input("var: resource")
-    .Input("accum: resource")
-    .Input("linear: resource")
-    .Input("grad: T")
-    .Input("indices: Tindices")
-    .Input("lr: T")
-    .Input("l1: T")
-    .Input("l2: T")
-    .Input("lr_power: T")
-    .Attr("T: numbertype")
-    .Attr("Tindices: {int32, int64}")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyFtrlShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update relevant entries in '*var' according to the Ftrl-proximal scheme.
-
-That is for rows we have grad for, we update var, accum and linear as follows:
-accum_new = accum + grad * grad
-linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-accum = accum_new
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-linear: Should be from a Variable().
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-lr_power: Scaling factor. Must be a scalar.
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
-
-REGISTER_OP("ApplyFtrlV2")
-    .Input("var: Ref(T)")
-    .Input("accum: Ref(T)")
-    .Input("linear: Ref(T)")
-    .Input("grad: T")
-    .Input("lr: T")
-    .Input("l1: T")
-    .Input("l2: T")
-    .Input("l2_shrinkage: T")
-    .Input("lr_power: T")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyFtrlShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the Ftrl-proximal scheme.
-
-grad_with_shrinkage = grad + 2 * l2_shrinkage * var
-accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
-linear += grad_with_shrinkage +
-    (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-accum = accum_new
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-linear: Should be from a Variable().
-grad: The gradient.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regulariation. Must be a scalar.
-l2: online L2 regulariation. Must be a scalar.
-l2: L2 shrinkage regulariation. Must be a scalar.
-lr_power: Scaling factor. Must be a scalar.
-out: Same as "var".
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
-
-REGISTER_OP("SparseApplyFtrlV2")
-    .Input("var: Ref(T)")
-    .Input("accum: Ref(T)")
-    .Input("linear: Ref(T)")
-    .Input("grad: T")
-    .Input("indices: Tindices")
-    .Input("lr: T")
-    .Input("l1: T")
-    .Input("l2: T")
-    .Input("l2_shrinkage: T")
-    .Input("lr_power: T")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("Tindices: {int32, int64}")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyFtrlShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update relevant entries in '*var' according to the Ftrl-proximal scheme.
-
-That is for rows we have grad for, we update var, accum and linear as follows:
-grad_with_shrinkage = grad + 2 * l2_shrinkage * var
-accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
-linear += grad_with_shrinkage +
-    (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-accum = accum_new
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-linear: Should be from a Variable().
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: onine L2 regularization. Must be a scalar.
-l2: L2 shrinkage regulariation. Must be a scalar.
-lr_power: Scaling factor. Must be a scalar.
-out: Same as "var".
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
-
-REGISTER_OP("ResourceApplyFtrlV2")
-    .Input("var: resource")
-    .Input("accum: resource")
-    .Input("linear: resource")
-    .Input("grad: T")
-    .Input("lr: T")
-    .Input("l1: T")
-    .Input("l2: T")
-    .Input("l2_shrinkage: T")
-    .Input("lr_power: T")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyFtrlShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the Ftrl-proximal scheme.
-
-grad_with_shrinkage = grad + 2 * l2_shrinkage * var
-accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
-linear += grad_with_shrinkage +
-    (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-accum = accum_new
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-linear: Should be from a Variable().
-grad: The gradient.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regulariation. Must be a scalar.
-l2: onine L2 regularization. Must be a scalar.
-l2: L2 shrinkage regulariation. Must be a scalar.
-lr_power: Scaling factor. Must be a scalar.
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
-
-REGISTER_OP("ResourceSparseApplyFtrlV2")
-    .Input("var: resource")
-    .Input("accum: resource")
-    .Input("linear: resource")
-    .Input("grad: T")
-    .Input("indices: Tindices")
-    .Input("lr: T")
-    .Input("l1: T")
-    .Input("l2: T")
-    .Input("l2_shrinkage: T")
-    .Input("lr_power: T")
-    .Attr("T: numbertype")
-    .Attr("Tindices: {int32, int64}")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyFtrlShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update relevant entries in '*var' according to the Ftrl-proximal scheme.
-
-That is for rows we have grad for, we update var, accum and linear as follows:
-grad_with_shrinkage = grad + 2 * l2_shrinkage * var
-accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
-linear += grad_with_shrinkage +
-    (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-accum = accum_new
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-linear: Should be from a Variable().
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: onine L2 regularization. Must be a scalar.
-l2: L2 shrinkage regulariation. Must be a scalar.
-lr_power: Scaling factor. Must be a scalar.
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
-
-}  // namespace tensorflow
--- a/tensorflow/core/ops/gradient_descent_ops.cc
+++ b/tensorflow/core/ops/gradient_descent_ops.cc
@ -1,69 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/ops/training_ops.h"
-
-namespace tensorflow {
-
-static Status ApplyGradientDescentShapeFn(InferenceContext* c) {
-  ShapeHandle unused;
-  ShapeHandle s = ShapeOrHandleShape(c, 0);                  // var
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));  // alpha
-  TF_RETURN_IF_ERROR(c->Merge(s, c->input(2), &s));          // delta
-  if (c->num_outputs() > 0) {
-    c->set_output(0, s);
-  }
-  return Status::OK();
-}
-
-REGISTER_OP("ApplyGradientDescent")
-    .Input("var: Ref(T)")
-    .Input("alpha: T")
-    .Input("delta: T")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn(ApplyGradientDescentShapeFn)
-    .Doc(R"doc(
-Update '*var' by subtracting 'alpha' * 'delta' from it.
-
-var: Should be from a Variable().
-alpha: Scaling factor. Must be a scalar.
-delta: The change.
-out: Same as "var".
-use_locking: If `True`, the subtraction will be protected by a lock;
-  otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
-
-REGISTER_OP("ResourceApplyGradientDescent")
-    .Input("var: resource")
-    .Input("alpha: T")
-    .Input("delta: T")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn(ApplyGradientDescentShapeFn)
-    .Doc(R"doc(
-Update '*var' by subtracting 'alpha' * 'delta' from it.
-
-var: Should be from a Variable().
-alpha: Scaling factor. Must be a scalar.
-delta: The change.
-use_locking: If `True`, the subtraction will be protected by a lock;
-  otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
-
-}  // namespace tensorflow
--- a/tensorflow/core/ops/momentum_ops.cc
+++ b/tensorflow/core/ops/momentum_ops.cc
@ -1,179 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/ops/training_ops.h"
-
-namespace tensorflow {
-
-static Status ApplyMomentumShapeFn(InferenceContext* c, bool sparse) {
-  ShapeHandle unused;
-  ShapeHandle s = ShapeOrHandleShape(c, 0);                       // var
-  TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s));  // accum
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));       // lr
-  TF_RETURN_IF_ERROR(
-      HandleGradAndIndicesInputs(c, sparse, 3 /* grad_idx */, &s));
-  int idx = sparse ? 5 : 4;
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused));  // momentum
-  if (c->num_outputs() > 0) {
-    c->set_output(0, s);
-  }
-  return Status::OK();
-}
-
-REGISTER_OP("ApplyMomentum")
-    .Input("var: Ref(T)")
-    .Input("accum: Ref(T)")
-    .Input("lr: T")
-    .Input("grad: T")
-    .Input("momentum: T")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .Attr("use_nesterov: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyMomentumShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the momentum scheme. Set use_nesterov = True if you
-want to use Nesterov momentum.
-
-accum = accum * momentum + grad
-var -= lr * accum
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-grad: The gradient.
-momentum: Momentum. Must be a scalar.
-out: Same as "var".
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-use_nesterov: If `True`, the tensor passed to compute grad will be
-var - lr * momentum * accum, so in the end, the var you get is actually
-var - lr * momentum * accum.
-)doc");
-
-REGISTER_OP("SparseApplyMomentum")
-    .Input("var: Ref(T)")
-    .Input("accum: Ref(T)")
-    .Input("lr: T")
-    .Input("grad: T")
-    .Input("indices: Tindices")
-    .Input("momentum: T")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("Tindices: {int32, int64}")
-    .Attr("use_locking: bool = false")
-    .Attr("use_nesterov: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyMomentumShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update relevant entries in '*var' and '*accum' according to the momentum scheme.
-Set use_nesterov = True if you want to use Nesterov momentum.
-
-That is for rows we have grad for, we update var and accum as follows:
-
-accum = accum * momentum + grad
-var -= lr * accum
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-lr: Learning rate. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-momentum: Momentum. Must be a scalar.
-out: Same as "var".
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-use_nesterov: If `True`, the tensor passed to compute grad will be
-var - lr * momentum * accum, so in the end, the var you get is actually
-var - lr * momentum * accum.
-)doc");
-
-REGISTER_OP("ResourceApplyMomentum")
-    .Input("var: resource")
-    .Input("accum: resource")
-    .Input("lr: T")
-    .Input("grad: T")
-    .Input("momentum: T")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .Attr("use_nesterov: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyMomentumShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the momentum scheme. Set use_nesterov = True if you
-want to use Nesterov momentum.
-
-accum = accum * momentum + grad
-var -= lr * accum
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-grad: The gradient.
-momentum: Momentum. Must be a scalar.
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-use_nesterov: If `True`, the tensor passed to compute grad will be
-var - lr * momentum * accum, so in the end, the var you get is actually
-var - lr * momentum * accum.
-)doc");
-
-REGISTER_OP("ResourceSparseApplyMomentum")
-    .Input("var: resource")
-    .Input("accum: resource")
-    .Input("lr: T")
-    .Input("grad: T")
-    .Input("indices: Tindices")
-    .Input("momentum: T")
-    .Attr("T: numbertype")
-    .Attr("Tindices: {int32, int64}")
-    .Attr("use_locking: bool = false")
-    .Attr("use_nesterov: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyMomentumShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update relevant entries in '*var' and '*accum' according to the momentum scheme.
-Set use_nesterov = True if you want to use Nesterov momentum.
-
-That is for rows we have grad for, we update var and accum as follows:
-
-accum = accum * momentum + grad
-var -= lr * accum
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-lr: Learning rate. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-momentum: Momentum. Must be a scalar.
-use_locking: If `True`, updating of the var and accum tensors will be protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-use_nesterov: If `True`, the tensor passed to compute grad will be
-var - lr * momentum * accum, so in the end, the var you get is actually
-var - lr * momentum * accum.
-)doc");
-
-}  // namespace tensorflow
--- a/tensorflow/core/ops/proximal_adagrad_ops.cc
+++ b/tensorflow/core/ops/proximal_adagrad_ops.cc
@ -1,183 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/ops/training_ops.h"
-
-namespace tensorflow {
-
-static Status ApplyProximalAdagradShapeFn(InferenceContext* c, bool sparse) {
-  ShapeHandle unused;
-  ShapeHandle s = ShapeOrHandleShape(c, 0);                       // var
-  TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s));  // accum
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));  // lr
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));  // l1
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));  // l2
-  TF_RETURN_IF_ERROR(
-      HandleGradAndIndicesInputs(c, sparse, 5 /* grad_idx */, &s));
-  if (c->num_outputs() > 0) {
-    c->set_output(0, s);
-  }
-  return Status::OK();
-}
-
-REGISTER_OP("ApplyProximalAdagrad")
-    .Input("var: Ref(T)")
-    .Input("accum: Ref(T)")
-    .Input("lr: T")
-    .Input("l1: T")
-    .Input("l2: T")
-    .Input("grad: T")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyProximalAdagradShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' and '*accum' according to FOBOS with Adagrad learning rate.
-accum += grad * grad
-prox_v = var - lr * grad * (1 / sqrt(accum))
-var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-grad: The gradient.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-out: Same as "var".
-use_locking: If True, updating of the var and accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
-
-REGISTER_OP("ResourceApplyProximalAdagrad")
-    .Input("var: resource")
-    .Input("accum: resource")
-    .Input("lr: T")
-    .Input("l1: T")
-    .Input("l2: T")
-    .Input("grad: T")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyProximalAdagradShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' and '*accum' according to FOBOS with Adagrad learning rate.
-accum += grad * grad
-prox_v = var - lr * grad * (1 / sqrt(accum))
-var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-grad: The gradient.
-lr: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-use_locking: If True, updating of the var and accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
-
-REGISTER_OP("SparseApplyProximalAdagrad")
-    .Input("var: Ref(T)")
-    .Input("accum: Ref(T)")
-    .Input("lr: T")
-    .Input("l1: T")
-    .Input("l2: T")
-    .Input("grad: T")
-    .Input("indices: Tindices")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("Tindices: {int32, int64}")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyProximalAdagradShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Sparse update entries in '*var' and '*accum' according to FOBOS algorithm.
-
-That is for rows we have grad for, we update var and accum as follows:
-accum += grad * grad
-prox_v = var
-prox_v -= lr * grad * (1 / sqrt(accum))
-var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-lr: Learning rate. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-out: Same as "var".
-use_locking: If True, updating of the var and accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
-
-REGISTER_OP("ResourceSparseApplyProximalAdagrad")
-    .Input("var: resource")
-    .Input("accum: resource")
-    .Input("lr: T")
-    .Input("l1: T")
-    .Input("l2: T")
-    .Input("grad: T")
-    .Input("indices: Tindices")
-    .Attr("T: numbertype")
-    .Attr("Tindices: {int32, int64}")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyProximalAdagradShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Sparse update entries in '*var' and '*accum' according to FOBOS algorithm.
-
-That is for rows we have grad for, we update var and accum as follows:
-accum += grad * grad
-prox_v = var
-prox_v -= lr * grad * (1 / sqrt(accum))
-var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
-
-var: Should be from a Variable().
-accum: Should be from a Variable().
-lr: Learning rate. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-use_locking: If True, updating of the var and accum tensors will be protected by
-a lock; otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
-
-static Status ApplyFtrlShapeFn(InferenceContext* c, bool sparse) {
-  ShapeHandle unused;
-  ShapeHandle s = ShapeOrHandleShape(c, 0);                       // var
-  TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s));  // accum
-  TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 2), &s));  // linear
-  TF_RETURN_IF_ERROR(
-      HandleGradAndIndicesInputs(c, sparse, 3 /* grad_idx */, &s));
-  int idx = sparse ? 5 : 4;
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused));  // lr
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused));  // l1
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused));  // l2
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(idx++), 0, &unused));  // lr_power
-  if (c->num_outputs() > 0) {
-    c->set_output(0, s);
-  }
-  return Status::OK();
-}
-
-}  // namespace tensorflow
--- a/tensorflow/core/ops/proximal_gradient_descent_ops.cc
+++ b/tensorflow/core/ops/proximal_gradient_descent_ops.cc
@ -1,151 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/ops/training_ops.h"
-
-namespace tensorflow {
-
-static Status ApplyProximalGradientDescentShapeFn(InferenceContext* c,
-                                                  bool sparse) {
-  ShapeHandle unused;
-  ShapeHandle s = ShapeOrHandleShape(c, 0);                  // var
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));  // alpha
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));  // l1
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));  // l2
-  TF_RETURN_IF_ERROR(
-      HandleGradAndIndicesInputs(c, sparse, 4 /* grad_idx */, &s));
-  if (c->num_outputs() > 0) {
-    c->set_output(0, s);
-  }
-  return Status::OK();
-}
-
-REGISTER_OP("ApplyProximalGradientDescent")
-    .Input("var: Ref(T)")
-    .Input("alpha: T")
-    .Input("l1: T")
-    .Input("l2: T")
-    .Input("delta: T")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyProximalGradientDescentShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' as FOBOS algorithm with fixed learning rate.
-prox_v = var - alpha * delta
-var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
-
-var: Should be from a Variable().
-alpha: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-delta: The change.
-out: Same as "var".
-use_locking: If True, the subtraction will be protected by a lock;
-  otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
-
-REGISTER_OP("SparseApplyProximalGradientDescent")
-    .Input("var: Ref(T)")
-    .Input("alpha: T")
-    .Input("l1: T")
-    .Input("l2: T")
-    .Input("grad: T")
-    .Input("indices: Tindices")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("Tindices: {int32, int64}")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyProximalGradientDescentShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Sparse update '*var' as FOBOS algorithm with fixed learning rate.
-
-That is for rows we have grad for, we update var as follows:
-prox_v = var - alpha * grad
-var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
-
-var: Should be from a Variable().
-alpha: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-out: Same as "var".
-use_locking: If True, the subtraction will be protected by a lock;
-  otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
-
-REGISTER_OP("ResourceApplyProximalGradientDescent")
-    .Input("var: resource")
-    .Input("alpha: T")
-    .Input("l1: T")
-    .Input("l2: T")
-    .Input("delta: T")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyProximalGradientDescentShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' as FOBOS algorithm with fixed learning rate.
-prox_v = var - alpha * delta
-var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
-
-var: Should be from a Variable().
-alpha: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-delta: The change.
-use_locking: If True, the subtraction will be protected by a lock;
-  otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
-
-REGISTER_OP("ResourceSparseApplyProximalGradientDescent")
-    .Input("var: resource")
-    .Input("alpha: T")
-    .Input("l1: T")
-    .Input("l2: T")
-    .Input("grad: T")
-    .Input("indices: Tindices")
-    .Attr("T: numbertype")
-    .Attr("Tindices: {int32, int64}")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyProximalGradientDescentShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Sparse update '*var' as FOBOS algorithm with fixed learning rate.
-
-That is for rows we have grad for, we update var as follows:
-prox_v = var - alpha * grad
-var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
-
-var: Should be from a Variable().
-alpha: Scaling factor. Must be a scalar.
-l1: L1 regularization. Must be a scalar.
-l2: L2 regularization. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var and accum.
-use_locking: If True, the subtraction will be protected by a lock;
-  otherwise the behavior is undefined, but may exhibit less contention.
-)doc");
-
-}  // namespace tensorflow
--- a/tensorflow/core/ops/rms_prop_ops.cc
+++ b/tensorflow/core/ops/rms_prop_ops.cc
@ -1,425 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/ops/training_ops.h"
-
-namespace tensorflow {
-
-static Status ApplyRMSPropShapeFn(InferenceContext* c, bool sparse) {
-  ShapeHandle unused;
-  ShapeHandle s = ShapeOrHandleShape(c, 0);                       // var
-  TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s));  // ms
-  TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 2), &s));  // mom
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused));  // lr
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));  // rho
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));  // momentum
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(6), 0, &unused));  // epsilon
-  TF_RETURN_IF_ERROR(
-      HandleGradAndIndicesInputs(c, sparse, 7 /* grad_idx */, &s));
-  if (c->num_outputs() > 0) {
-    c->set_output(0, s);
-  }
-  return Status::OK();
-}
-
-static Status ApplyCenteredRMSPropShapeFn(InferenceContext* c, bool sparse) {
-  ShapeHandle unused;
-  ShapeHandle s = ShapeOrHandleShape(c, 0);                       // var
-  TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 1), &s));  // ms
-  TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 2), &s));  // mg
-  TF_RETURN_IF_ERROR(c->Merge(s, ShapeOrHandleShape(c, 3), &s));  // mom
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused));  // lr
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused));  // rho
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(6), 0, &unused));  // momentum
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(7), 0, &unused));  // epsilon
-  TF_RETURN_IF_ERROR(
-      HandleGradAndIndicesInputs(c, sparse, 8 /* grad_idx */, &s));
-  if (c->num_outputs() > 0) {
-    c->set_output(0, s);
-  }
-  return Status::OK();
-}
-
-REGISTER_OP("ApplyRMSProp")
-    .Input("var: Ref(T)")
-    .Input("ms: Ref(T)")
-    .Input("mom: Ref(T)")
-    .Input("lr: T")
-    .Input("rho: T")
-    .Input("momentum: T")
-    .Input("epsilon: T")
-    .Input("grad: T")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyRMSPropShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the RMSProp algorithm.
-Note that in dense implementation of this algorithm, ms and mom will
-update even if the grad is zero, but in this sparse implementation, ms
-and mom will not update in iterations during which the grad is zero.
-
-mean_square = decay * mean_square + (1-decay) * gradient ** 2
-Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
-
-ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-var <- var - mom
-
-var: Should be from a Variable().
-ms: Should be from a Variable().
-mom: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-epsilon: Ridge term. Must be a scalar.
-rho: Decay rate. Must be a scalar.
-grad: The gradient.
-out: Same as "var".
-use_locking: If `True`, updating of the var, ms, and mom tensors is protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
-
-REGISTER_OP("ApplyCenteredRMSProp")
-    .Input("var: Ref(T)")
-    .Input("mg: Ref(T)")
-    .Input("ms: Ref(T)")
-    .Input("mom: Ref(T)")
-    .Input("lr: T")
-    .Input("rho: T")
-    .Input("momentum: T")
-    .Input("epsilon: T")
-    .Input("grad: T")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyCenteredRMSPropShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the centered RMSProp algorithm.
-The centered RMSProp algorithm uses an estimate of the centered second moment
-(i.e., the variance) for normalization, as opposed to regular RMSProp, which
-uses the (uncentered) second moment. This often helps with training, but is
-slightly more expensive in terms of computation and memory.
-
-Note that in dense implementation of this algorithm, mg, ms, and mom will
-update even if the grad is zero, but in this sparse implementation, mg, ms,
-and mom will not update in iterations during which the grad is zero.
-
-mean_square = decay * mean_square + (1-decay) * gradient ** 2
-mean_grad = decay * mean_grad + (1-decay) * gradient
-
-Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
-
-mg <- rho * mg_{t-1} + (1-rho) * grad
-ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)
-var <- var - mom
-
-var: Should be from a Variable().
-mg: Should be from a Variable().
-ms: Should be from a Variable().
-mom: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-epsilon: Ridge term. Must be a scalar.
-rho: Decay rate. Must be a scalar.
-grad: The gradient.
-out: Same as "var".
-use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
-  protected by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
-
-REGISTER_OP("SparseApplyRMSProp")
-    .Input("var: Ref(T)")
-    .Input("ms: Ref(T)")
-    .Input("mom: Ref(T)")
-    .Input("lr: T")
-    .Input("rho: T")
-    .Input("momentum: T")
-    .Input("epsilon: T")
-    .Input("grad: T")
-    .Input("indices: Tindices")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("Tindices: {int32, int64}")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyRMSPropShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the RMSProp algorithm.
-Note that in dense implementation of this algorithm, ms and mom will
-update even if the grad is zero, but in this sparse implementation, ms
-and mom will not update in iterations during which the grad is zero.
-
-mean_square = decay * mean_square + (1-decay) * gradient ** 2
-Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
-
-ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-var <- var - mom
-
-var: Should be from a Variable().
-ms: Should be from a Variable().
-mom: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-epsilon: Ridge term. Must be a scalar.
-rho: Decay rate. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var, ms and mom.
-out: Same as "var".
-use_locking: If `True`, updating of the var, ms, and mom tensors is protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
-
-REGISTER_OP("SparseApplyCenteredRMSProp")
-    .Input("var: Ref(T)")
-    .Input("mg: Ref(T)")
-    .Input("ms: Ref(T)")
-    .Input("mom: Ref(T)")
-    .Input("lr: T")
-    .Input("rho: T")
-    .Input("momentum: T")
-    .Input("epsilon: T")
-    .Input("grad: T")
-    .Input("indices: Tindices")
-    .Output("out: Ref(T)")
-    .Attr("T: numbertype")
-    .Attr("Tindices: {int32, int64}")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyCenteredRMSPropShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the centered RMSProp algorithm.
-The centered RMSProp algorithm uses an estimate of the centered second moment
-(i.e., the variance) for normalization, as opposed to regular RMSProp, which
-uses the (uncentered) second moment. This often helps with training, but is
-slightly more expensive in terms of computation and memory.
-
-Note that in dense implementation of this algorithm, mg, ms, and mom will
-update even if the grad is zero, but in this sparse implementation, mg, ms,
-and mom will not update in iterations during which the grad is zero.
-
-mean_square = decay * mean_square + (1-decay) * gradient ** 2
-mean_grad = decay * mean_grad + (1-decay) * gradient
-Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
-
-ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-var <- var - mom
-
-var: Should be from a Variable().
-mg: Should be from a Variable().
-ms: Should be from a Variable().
-mom: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-epsilon: Ridge term. Must be a scalar.
-rho: Decay rate. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var, ms and mom.
-out: Same as "var".
-use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
-  protected by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
-
-REGISTER_OP("ResourceApplyRMSProp")
-    .Input("var: resource")
-    .Input("ms: resource")
-    .Input("mom: resource")
-    .Input("lr: T")
-    .Input("rho: T")
-    .Input("momentum: T")
-    .Input("epsilon: T")
-    .Input("grad: T")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyRMSPropShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the RMSProp algorithm.
-Note that in dense implementation of this algorithm, ms and mom will
-update even if the grad is zero, but in this sparse implementation, ms
-and mom will not update in iterations during which the grad is zero.
-
-mean_square = decay * mean_square + (1-decay) * gradient ** 2
-Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
-
-ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-var <- var - mom
-
-var: Should be from a Variable().
-ms: Should be from a Variable().
-mom: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-epsilon: Ridge term. Must be a scalar.
-rho: Decay rate. Must be a scalar.
-grad: The gradient.
-use_locking: If `True`, updating of the var, ms, and mom tensors is protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
-
-REGISTER_OP("ResourceApplyCenteredRMSProp")
-    .Input("var: resource")
-    .Input("mg: resource")
-    .Input("ms: resource")
-    .Input("mom: resource")
-    .Input("lr: T")
-    .Input("rho: T")
-    .Input("momentum: T")
-    .Input("epsilon: T")
-    .Input("grad: T")
-    .Attr("T: numbertype")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyCenteredRMSPropShapeFn(c, false /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the centered RMSProp algorithm.
-The centered RMSProp algorithm uses an estimate of the centered second moment
-(i.e., the variance) for normalization, as opposed to regular RMSProp, which
-uses the (uncentered) second moment. This often helps with training, but is
-slightly more expensive in terms of computation and memory.
-
-Note that in dense implementation of this algorithm, mg, ms, and mom will
-update even if the grad is zero, but in this sparse implementation, mg, ms,
-and mom will not update in iterations during which the grad is zero.
-
-mean_square = decay * mean_square + (1-decay) * gradient ** 2
-mean_grad = decay * mean_grad + (1-decay) * gradient
-
-Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
-
-mg <- rho * mg_{t-1} + (1-rho) * grad
-ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)
-var <- var - mom
-
-var: Should be from a Variable().
-mg: Should be from a Variable().
-ms: Should be from a Variable().
-mom: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-epsilon: Ridge term. Must be a scalar.
-rho: Decay rate. Must be a scalar.
-grad: The gradient.
-use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
-  protected by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
-
-REGISTER_OP("ResourceSparseApplyRMSProp")
-    .Input("var: resource")
-    .Input("ms: resource")
-    .Input("mom: resource")
-    .Input("lr: T")
-    .Input("rho: T")
-    .Input("momentum: T")
-    .Input("epsilon: T")
-    .Input("grad: T")
-    .Input("indices: Tindices")
-    .Attr("T: numbertype")
-    .Attr("Tindices: {int32, int64}")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyRMSPropShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the RMSProp algorithm.
-Note that in dense implementation of this algorithm, ms and mom will
-update even if the grad is zero, but in this sparse implementation, ms
-and mom will not update in iterations during which the grad is zero.
-
-mean_square = decay * mean_square + (1-decay) * gradient ** 2
-Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
-
-ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-var <- var - mom
-
-var: Should be from a Variable().
-ms: Should be from a Variable().
-mom: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-epsilon: Ridge term. Must be a scalar.
-rho: Decay rate. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var, ms and mom.
-use_locking: If `True`, updating of the var, ms, and mom tensors is protected
-  by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
-
-REGISTER_OP("ResourceSparseApplyCenteredRMSProp")
-    .Input("var: resource")
-    .Input("mg: resource")
-    .Input("ms: resource")
-    .Input("mom: resource")
-    .Input("lr: T")
-    .Input("rho: T")
-    .Input("momentum: T")
-    .Input("epsilon: T")
-    .Input("grad: T")
-    .Input("indices: Tindices")
-    .Attr("T: numbertype")
-    .Attr("Tindices: {int32, int64}")
-    .Attr("use_locking: bool = false")
-    .SetShapeFn([](InferenceContext* c) {
-      return ApplyCenteredRMSPropShapeFn(c, true /* sparse */);
-    })
-    .Doc(R"doc(
-Update '*var' according to the centered RMSProp algorithm.
-The centered RMSProp algorithm uses an estimate of the centered second moment
-(i.e., the variance) for normalization, as opposed to regular RMSProp, which
-uses the (uncentered) second moment. This often helps with training, but is
-slightly more expensive in terms of computation and memory.
-
-Note that in dense implementation of this algorithm, mg, ms, and mom will
-update even if the grad is zero, but in this sparse implementation, mg, ms,
-and mom will not update in iterations during which the grad is zero.
-
-mean_square = decay * mean_square + (1-decay) * gradient ** 2
-mean_grad = decay * mean_grad + (1-decay) * gradient
-Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
-
-ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-var <- var - mom
-
-var: Should be from a Variable().
-mg: Should be from a Variable().
-ms: Should be from a Variable().
-mom: Should be from a Variable().
-lr: Scaling factor. Must be a scalar.
-epsilon: Ridge term. Must be a scalar.
-rho: Decay rate. Must be a scalar.
-grad: The gradient.
-indices: A vector of indices into the first dimension of var, ms and mom.
-use_locking: If `True`, updating of the var, mg, ms, and mom tensors is
-  protected by a lock; otherwise the behavior is undefined, but may exhibit less
-  contention.
-)doc");
-
-}  // namespace tensorflow
--- a/tensorflow/core/ops/training_ops.cc
+++ b/tensorflow/core/ops/training_ops.cc
--- a/tensorflow/core/ops/training_ops.h
+++ b/tensorflow/core/ops/training_ops.h
@ -1,64 +0,0 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef THIRD_PARTY_TENSORFLOW_CORE_OPS_TRAINING_OPS_H_
-#define THIRD_PARTY_TENSORFLOW_CORE_OPS_TRAINING_OPS_H_
-
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/ops/training_ops.h"
-
-namespace tensorflow {
-
-using shape_inference::DimensionHandle;
-using shape_inference::InferenceContext;
-using shape_inference::ShapeHandle;
-
-static ShapeHandle ShapeOrHandleShape(InferenceContext* c, int input) {
-  auto* handle_data = c->input_handle_shapes_and_types(input);
-  if (handle_data != nullptr && !handle_data->empty() &&
-      (*handle_data)[0].dtype != DT_INVALID) {
-    return (*handle_data)[0].shape;
-  }
-  return c->input(input);
-}
-
-// Handle the gradient and, if <sparse>, indices inputs.
-// <s> is an input+output parameter, containing the current known input shape to
-// the gradient.
-static Status HandleGradAndIndicesInputs(InferenceContext* c, bool sparse,
-                                         int grad_idx, ShapeHandle* s) {
-  ShapeHandle grad = ShapeOrHandleShape(c, grad_idx);
-  if (!sparse) {
-    TF_RETURN_IF_ERROR(c->Merge(*s, grad, s));
-    return Status::OK();
-  }
-  // Indices is a vector where indices.dim[0].rank == grad[0].rank.
-  ShapeHandle indices;
-  TF_RETURN_IF_ERROR(c->WithRank(c->input(grad_idx + 1), 1, &indices));
-  DimensionHandle unused;
-  TF_RETURN_IF_ERROR(c->Merge(c->Dim(indices, 0), c->Dim(grad, 0), &unused));
-
-  // Trailing part of grad matches trailing part of *s.
-  ShapeHandle grad_unknown_first;
-  TF_RETURN_IF_ERROR(
-      c->ReplaceDim(grad, 0, c->UnknownDim(), &grad_unknown_first));
-  TF_RETURN_IF_ERROR(c->Merge(*s, grad_unknown_first, s));
-
-  return Status::OK();
-}
-
-}  // namespace tensorflow
-
-#endif  // THIRD_PARTY_TENSORFLOW_CORE_OPS_TRAINING_OPS_H_
--- a/tensorflow/java/BUILD
+++ b/tensorflow/java/BUILD
@ -65,16 +65,7 @@ tf_java_op_gen_srcjar(
        "sparse_ops",
        "state_ops",
        "string_ops",
-        "adadelta_ops",
-        "adagrad_da_ops",
-        "adagrad_ops",
-        "adam_ops",
-        "ftrl_ops",
-        "momentum_ops",
-        "gradient_descent_ops",
-        "proximal_adagrad_ops",
-        "proximal_gradient_descent_ops",
-        "rms_prop_ops",
+        "training_ops",
        "user_ops",
    ],
 )
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@ -1363,53 +1363,8 @@ tf_gen_op_wrapper_private_py(
 )

 tf_gen_op_wrapper_private_py(
-    name = "adagrad_ops_gen",
-    out = "training/gen_adagrad_ops.py",
-)
-
-tf_gen_op_wrapper_private_py(
-    name = "adagrad_da_ops_gen",
-    out = "training/gen_adagrad_da_ops.py",
-)
-
-tf_gen_op_wrapper_private_py(
-    name = "adadelta_ops_gen",
-    out = "training/gen_adadelta_ops.py",
-)
-
-tf_gen_op_wrapper_private_py(
-    name = "adam_ops_gen",
-    out = "training/gen_adam_ops.py",
-)
-
-tf_gen_op_wrapper_private_py(
-    name = "ftrl_ops_gen",
-    out = "training/gen_ftrl_ops.py",
-)
-
-tf_gen_op_wrapper_private_py(
-    name = "gradient_descent_ops_gen",
-    out = "training/gen_gradient_descent_ops.py",
-)
-
-tf_gen_op_wrapper_private_py(
-    name = "momentum_ops_gen",
-    out = "training/gen_momentum_ops.py",
-)
-
-tf_gen_op_wrapper_private_py(
-    name = "proximal_adagrad_ops_gen",
-    out = "training/gen_proximal_adagrad_ops.py",
-)
-
-tf_gen_op_wrapper_private_py(
-    name = "proximal_gradient_descent_ops_gen",
-    out = "training/gen_proximal_gradient_descent_ops.py",
-)
-
-tf_gen_op_wrapper_private_py(
-    name = "rms_prop_ops_gen",
-    out = "training/gen_rms_prop_ops.py",
+    name = "training_ops_gen",
+    out = "training/gen_training_ops.py",
 )

 py_library(
@ -2640,10 +2595,6 @@ py_library(
    ),
    srcs_version = "PY2AND3",
    deps = [
-        ":adadelta_ops_gen",
-        ":adagrad_da_ops_gen",
-        ":adagrad_ops_gen",
-        ":adam_ops_gen",
        ":array_ops",
        ":checkpoint_ops_gen",
        ":client",
@ -2652,8 +2603,6 @@ py_library(
        ":errors",
        ":framework",
        ":framework_for_generated_wrappers",
-        ":ftrl_ops_gen",
-        ":gradient_descent_ops_gen",
        ":gradients",
        ":init_ops",
        ":io_ops",
@ -2661,21 +2610,18 @@ py_library(
        ":lib",
        ":lookup_ops",
        ":math_ops",
-        ":momentum_ops_gen",
        ":platform",
        ":protos_all_py",
-        ":proximal_adagrad_ops_gen",
-        ":proximal_gradient_descent_ops_gen",
        ":pywrap_tensorflow",
        ":random_ops",
        ":resource_variable_ops",
        ":resources",
-        ":rms_prop_ops_gen",
        ":sdca_ops",
        ":sparse_ops",
        ":state_ops",
        ":string_ops",
        ":summary",
+        ":training_ops_gen",
        ":util",
        ":variable_scope",
        ":variables",
--- a/tensorflow/python/training/training_ops.py
+++ b/tensorflow/python/training/training_ops.py
@ -19,16 +19,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+from tensorflow.python.training import gen_training_ops
 # go/tf-wildcard-import
 # pylint: disable=wildcard-import
-from tensorflow.python.training.gen_adadelta_ops import *
-from tensorflow.python.training.gen_adagrad_da_ops import *
-from tensorflow.python.training.gen_adagrad_ops import *
-from tensorflow.python.training.gen_adam_ops import *
-from tensorflow.python.training.gen_ftrl_ops import *
-from tensorflow.python.training.gen_gradient_descent_ops import *
-from tensorflow.python.training.gen_momentum_ops import *
-from tensorflow.python.training.gen_proximal_adagrad_ops import *
-from tensorflow.python.training.gen_proximal_gradient_descent_ops import *
-from tensorflow.python.training.gen_rms_prop_ops import *
+from tensorflow.python.training.gen_training_ops import *
 # pylint: enable=wildcard-import
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@ -196,7 +196,7 @@ def tf_opts_nortti_if_android():

 # Given a list of "op_lib_names" (a list of files in the ops directory
 # without their .cc extensions), generate a library for that file.
-def tf_gen_op_libs(op_lib_names, deps=None, extra_srcs=[]):
+def tf_gen_op_libs(op_lib_names, deps=None):
  # Make library out of each op so it can also be used to generate wrappers
  # for various languages.
  if not deps:
@ -205,7 +205,7 @@ def tf_gen_op_libs(op_lib_names, deps=None, extra_srcs=[]):
    native.cc_library(
        name=n + "_op_lib",
        copts=tf_copts(),
-        srcs=extra_srcs + ["ops/" + n + ".cc"],
+        srcs=["ops/" + n + ".cc"],
        deps=deps + [clean_dep("//tensorflow/core:framework")],
        visibility=["//visibility:public"],
        alwayslink=1,