diff --git a/CODEOWNERS b/CODEOWNERS
index 271e3b5b2ff..3ef02ffd68c 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -13,55 +13,4 @@
 /tensorflow/tensorboard/ @jart
 /tensorflow/tools/docs/ @markdaoust
 
-# contrib
-
-# NEED OWNER: /tensorflow/contrib/all_reduce
-/tensorflow/contrib/autograph/ @mdanatg @kkimdev
-/tensorflow/contrib/batching/ @alextp @chrisolston
-/tensorflow/contrib/bayesflow/ @ebrevdo @rsepassi @jvdillon
-/tensorflow/contrib/boosted_trees/ @sshrdp @yk5 @nataliaponomareva
-/tensorflow/contrib/checkpoint/ @allenlavoie
-/tensorflow/contrib/contrib/cluster_resolver/ @frankchn
-/tensorflow/contrib/cmake/ @mrry
-/tensorflow/contrib/copy_graph/ @tucker @poxvoculi
-/tensorflow/contrib/crf/ @kentonl
-/tensorflow/contrib/data/ @mrry
-/tensorflow/tensorflow/contrib/distribute @joshl @priyag @sourabhbajaj @frankchn
-/tensorflow/contrib/distributions/ @jvdillon @langmore @rsepassi
-/tensorflow/contrib/eager @jaingaurav @alextp
-/tensorflow/contrib/factorization/ @agarwal-ashish @xavigonzalvo
-/tensorflow/contrib/ffmpeg/ @fredbertsch
-/tensorflow/contrib/framework/ @ebrevdo
-/tensorflow/contrib/graph_editor/ @purpledog
-# NEED OWNER: /tensorflow/contrib/grid_rnn/
-/tensorflow/contrib/hadoop @yongtang
-/tensorflow/contrib/hvx/ @satok16
-/tensorflow/contrib/integrate/ @shoyer
-/tensorflow/contrib/kernel_methods/ @petrosmol
-/tensorflow/contrib/ios_examples/ @petewarden
-/tensorflow/contrib/labeled_tensor/ @shoyer
-/tensorflow/contrib/layers/ @fchollet @martinwicke
-/tensorflow/contrib/learn/ @martinwicke @ispirmustafa @alextp
-/tensorflow/contrib/linear_optimizer/ @petrosmol @andreasst @katsiapis
-/tensorflow/contrib/lookup/ @ysuematsu @andreasst
-/tensorflow/contrib/losses/ @alextp @ispirmustafa
-/tensorflow/contrib/makefile/ @petewarden @satok16 @wolffg
-/tensorflow/contrib/metrics/ @alextp @honkentuber @ispirmustafa
-/tensorflow/contrib/opt/ @strategist333 @alextp
-/tensorflow/contrib/pi_examples/ @maciekcc
-/tensorflow/contrib/quantization/ @petewarden
-/tensorflow/contrib/rnn/ @ebrevdo @scottzhu
-/tensorflow/contrib/saved_model/ @nfiedel @sukritiramesh @allenlavoie
-/tensorflow/contrib/seq2seq/ @ebrevdo @lmthang
-/tensorflow/contrib/session_bundle/ @nfiedel @sukritiramesh
-/tensorflow/contrib/slim/ @sguada @thenbasilmanran
-/tensorflow/contrib/stateless/ @girving @alextp
-/tensorflow/contrib/tensor_forest/ @gilberthendry @thomascolthurst @yupbank
-/tensorflow/contrib/tensorrt/ @aaroey @smit-hinsu @azaks2
-# NEED OWNER: /tensorflow/contrib/testing/
-/tensorflow/contrib/timeseries/ @allenlavoie
-/tensorflow/contrib/tpu/ @frankchn @saeta @jhseu @sourabhbajaj
-/tensorflow/contrib/training/ @joel-shor @ebrevdo
-/tensorflow/contrib/util/ @sherrym
-
 /third_party/systemlibs/ @perfinion
diff --git a/README.md b/README.md
index 51ca43e1571..05b1e4de458 100644
--- a/README.md
+++ b/README.md
@@ -110,19 +110,19 @@ Build Type               | Status
 
 ### Community Supported Builds
 
-Build Type                                                                            | Status                                                                                                                                                                                        | Artifacts
-------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------
-**Linux AMD ROCm GPU** Nightly                                                        | [![Build Status](http://ml-ci.amd.com:21096/job/tensorflow-rocm-nightly/badge/icon)](http://ml-ci.amd.com:21096/job/tensorflow-rocm-nightly)                                                  | [Nightly](http://ml-ci.amd.com:21096/job/tensorflow-rocm-nightly/lastSuccessfulBuild/)
-**Linux AMD ROCm GPU** Stable Release                                                 | [![Build Status](http://ml-ci.amd.com:21096/job/tensorflow-rocm-release/badge/icon)](http://ml-ci.amd.com:21096/job/tensorflow-rocm-release/)                                                 | Release [1.15](http://ml-ci.amd.com:21096/job/tensorflow-rocm-release/lastSuccessfulBuild/) / [2.x](http://ml-ci.amd.com:21096/job/tensorflow-rocm-v2-release/lastSuccessfulBuild/)
-**Linux s390x** Nightly                                                               | [![Build Status](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/badge/icon)](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/)                                                             | [Nightly](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/)
-**Linux s390x CPU** Stable Release                                                    | [![Build Status](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_Release_Build/badge/icon)](https://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_Release_Build/)                                      | [Release](https://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_Release_Build/)
-**Linux ppc64le CPU** Nightly                                                         | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Build/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Build/)                                       | [Nightly](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Nightly_Artifact/)
-**Linux ppc64le CPU** Stable Release                                                  | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/)                       | Release [1.15](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/) / [2.x](https://powerci.osuosl.org/job/TensorFlow2_PPC64LE_CPU_Release_Build/)
-**Linux ppc64le GPU** Nightly                                                         | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Build/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Build/)                                       | [Nightly](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/)
-**Linux ppc64le GPU** Stable Release                                                  | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/)                       | Release [1.15](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/) / [2.x](https://powerci.osuosl.org/job/TensorFlow2_PPC64LE_GPU_Release_Build/)
-**Linux CPU with Intel® MKL-DNN** Nightly                                             | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/)                                     | [Nightly](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-whl-nightly/)
-**Linux CPU with Intel® MKL-DNN** <br> **Supports Python 2.7, 3.4, 3.5, 3.6 and 3.7** | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/lastStableBuild)      | [1.14.0 PyPI](https://pypi.org/project/intel-tensorflow/)
-**Red Hat® Enterprise Linux® 7.6 CPU & GPU** <br> Python 2.7, 3.6                     | [![Build Status](https://jenkins-tensorflow.apps.ci.centos.org/buildStatus/icon?job=tensorflow-rhel7-3.6&build=2)](https://jenkins-tensorflow.apps.ci.centos.org/job/tensorflow-rhel7-3.6/2/) | [1.13.1 PyPI](https://tensorflow.pypi.thoth-station.ninja/index/)
+Build Type                                                        | Status                                                                                                                                                                                        | Artifacts
+----------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------
+**Linux AMD ROCm GPU** Nightly                                    | [![Build Status](http://ml-ci.amd.com:21096/job/tensorflow-rocm-nightly/badge/icon)](http://ml-ci.amd.com:21096/job/tensorflow-rocm-nightly)                                                  | [Nightly](http://ml-ci.amd.com:21096/job/tensorflow-rocm-nightly/lastSuccessfulBuild/)
+**Linux AMD ROCm GPU** Stable Release                             | [![Build Status](http://ml-ci.amd.com:21096/job/tensorflow-rocm-release/badge/icon)](http://ml-ci.amd.com:21096/job/tensorflow-rocm-release/)                                                 | Release [1.15](http://ml-ci.amd.com:21096/job/tensorflow-rocm-release/lastSuccessfulBuild/) / [2.x](http://ml-ci.amd.com:21096/job/tensorflow-rocm-v2-release/lastSuccessfulBuild/)
+**Linux s390x** Nightly                                           | [![Build Status](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/badge/icon)](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/)                                                             | [Nightly](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/)
+**Linux s390x CPU** Stable Release                                | [![Build Status](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_Release_Build/badge/icon)](https://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_Release_Build/)                                      | [Release](https://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_Release_Build/)
+**Linux ppc64le CPU** Nightly                                     | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Build/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Build/)                                       | [Nightly](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Nightly_Artifact/)
+**Linux ppc64le CPU** Stable Release                              | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/)                       | Release [1.15](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/) / [2.x](https://powerci.osuosl.org/job/TensorFlow2_PPC64LE_CPU_Release_Build/)
+**Linux ppc64le GPU** Nightly                                     | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Build/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Build/)                                       | [Nightly](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/)
+**Linux ppc64le GPU** Stable Release                              | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/)                       | Release [1.15](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/) / [2.x](https://powerci.osuosl.org/job/TensorFlow2_PPC64LE_GPU_Release_Build/)
+**Linux CPU with Intel® MKL-DNN** Nightly                         | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-whl-nightly/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-whl-nightly/)                     | [Nightly](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-whl-nightly/)
+**Linux CPU with Intel® MKL-DNN** Stable Release                  | ![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/badge/icon)                                                                                              | Release [1.15](https://pypi.org/project/intel-tensorflow/1.15.0/) / [2.x](https://pypi.org/project/intel-tensorflow/)
+**Red Hat® Enterprise Linux® 7.6 CPU & GPU** <br> Python 2.7, 3.6 | [![Build Status](https://jenkins-tensorflow.apps.ci.centos.org/buildStatus/icon?job=tensorflow-rhel7-3.6&build=2)](https://jenkins-tensorflow.apps.ci.centos.org/job/tensorflow-rhel7-3.6/2/) | [1.13.1 PyPI](https://tensorflow.pypi.thoth-station.ninja/index/)
 
 ## Resources
 
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 0f299ec13f8..603c2a5c45c 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -195,6 +195,12 @@ config_setting(
     visibility = ["//visibility:public"],
 )
 
+config_setting(
+    name = "chromiumos",
+    values = {"crosstool_top": "//external:android/chromiumos"},
+    visibility = ["//visibility:public"],
+)
+
 config_setting(
     name = "linux_aarch64",
     values = {"cpu": "aarch64"},
@@ -453,6 +459,7 @@ package_group(
         "//tensorflow_estimator/python/estimator/...",
         "//tensorflow_models/official/...",
         "//third_party/py/autograph/...",
+        "//third_party/swift/tensorflow/x10/...",
     ],
 )
 
diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc
index 46ade1b2e77..8793e308466 100644
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc
@@ -233,7 +233,7 @@ tensorflow::Status GetReplacedFromExistingWorkers(
   std::vector<tensorflow::eager::KeepAliveResponse> responses(
       existing_workers->size());
   for (int i = 0; i < existing_workers->size(); i++) {
-    tensorflow::eager::EagerClient* eager_client;
+    tensorflow::core::RefCountPtr<tensorflow::eager::EagerClient> eager_client;
     statuses[i] =
         client_cache->GetClient(existing_workers->at(i), &eager_client);
     if (!statuses[i].ok()) {
@@ -282,7 +282,7 @@ tensorflow::Status CreateRemoteContexts(
       continue;
     }
 
-    tensorflow::eager::EagerClient* eager_client;
+    tensorflow::core::RefCountPtr<tensorflow::eager::EagerClient> eager_client;
     statuses[i] = remote_eager_workers->GetClient(remote_worker, &eager_client);
     if (eager_client == nullptr) {
       statuses[i] = tensorflow::errors::Internal(
@@ -340,7 +340,7 @@ tensorflow::Status UpdateRemoteContexts(
       continue;
     }
 
-    tensorflow::eager::EagerClient* eager_client;
+    tensorflow::core::RefCountPtr<tensorflow::eager::EagerClient> eager_client;
     statuses[i] = remote_eager_workers->GetClient(remote_worker, &eager_client);
     if (eager_client == nullptr) {
       statuses[i] = tensorflow::errors::Internal(
@@ -819,7 +819,7 @@ TF_CAPI_EXPORT extern bool TFE_ContextCheckAlive(TFE_Context* ctx,
   }
 
   // TODO(yuefengz): support partially specified `worker_name`.
-  tensorflow::eager::EagerClient* eager_client;
+  tensorflow::core::RefCountPtr<tensorflow::eager::EagerClient> eager_client;
   status->status = remote_eager_workers->GetClient(worker_name, &eager_client);
   if (!status->status.ok()) {
     return false;
diff --git a/tensorflow/compiler/mlir/lite/tests/end2end/custom_opdef.pbtxt b/tensorflow/compiler/mlir/lite/tests/end2end/custom_opdef.pbtxt
index 7036ef71b58..0fcee7d7e8f 100644
--- a/tensorflow/compiler/mlir/lite/tests/end2end/custom_opdef.pbtxt
+++ b/tensorflow/compiler/mlir/lite/tests/end2end/custom_opdef.pbtxt
@@ -38,6 +38,6 @@ versions {
 
 # CHECK: func @main(%arg0: tensor<4xi32>, %arg1: tensor<4xi32>) -> tensor<*xi32>
 # CHECK: attributes {tf.entry_function = {inputs = "input0,input1", outputs = "output"}} {
-# CHECK-NEXT:   %0 = "tf.BannaPotatoSaladWithColeslaw"(%arg0, %arg1) {T = "tfdtype$DT_INT32", device = "", name = "output"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<*xi32>
+# CHECK-NEXT:   %0 = "tf.BannaPotatoSaladWithColeslaw"(%arg0, %arg1) {T = i32, device = "", name = "output"} : (tensor<4xi32>, tensor<4xi32>) -> tensor<*xi32>
 # CHECK-NEXT:   return %0 : tensor<*xi32>
 # CHECK-NEXT: }
diff --git a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir
index 27eff39c397..ec618ffa276 100644
--- a/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir
+++ b/tensorflow/compiler/mlir/lite/tests/legalize-tf.mlir
@@ -1280,3 +1280,13 @@ func @conv2d_backprop_unsupported_data_format(%arg0: tensor<4xi32>, %arg1: tenso
   // CHECK-LABEL: conv2d_backprop_unsupported_data_format
   // CHECK: tf.Conv2DBackpropInput
 }
+
+func @assert_remove(%arg0: tensor<1xi32>, %arg1: tensor<1xi32>) -> tensor<1xi1> {
+  %0 = "tf.LessEqual"(%arg0, %arg1) : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi1>
+  "tf.Assert"(%0, %arg1) {summarize = 3} : (tensor<1xi1>, tensor<1xi32>) -> ()
+  return %0 : tensor<1xi1>
+  // CHECK-LABEL: assert_remove
+  // CHECK: tfl.less_equal
+  // CHECK-NOT: Assert
+  // CHECK: return
+}
diff --git a/tensorflow/compiler/mlir/lite/tests/optimize.mlir b/tensorflow/compiler/mlir/lite/tests/optimize.mlir
index f7913f11f72..1d51adb16f2 100644
--- a/tensorflow/compiler/mlir/lite/tests/optimize.mlir
+++ b/tensorflow/compiler/mlir/lite/tests/optimize.mlir
@@ -622,3 +622,12 @@ func @Relu1_2(%arg0: tensor<2x3xf32>) -> tensor<2x3xf32> {
 
   // CHECK: %[[relu_n1_to_1:[0-9].*]] = "tfl.relu_n1_to_1"
 }
+
+// CHECK-LABEL: fuse_relu_to_add
+func @fuse_relu_to_add(%arg0: tensor<2x3xf32>, %arg1: tensor<2x3xf32>) -> tensor<2x3xf32> {
+  %0 = "tfl.add"(%arg0, %arg1) {fused_activation_function = "NONE"} : (tensor<2x3xf32>, tensor<2x3xf32>) -> tensor<2x3xf32>
+  %1 = "tfl.relu_n1_to_1"(%0) : (tensor<2x3xf32>) -> tensor<2x3xf32>
+  return %1 : tensor<2x3xf32>
+  // CHECK: %[[RES:.*]] = tfl.add %arg0, %arg1 {fused_activation_function = "RELU_N1_TO_1"}
+  // CHECK: return %[[RES]]
+}
diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc
index bc6ff5e3b47..0512bc98cab 100644
--- a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc
@@ -68,6 +68,7 @@ struct LegalizeTF : public FunctionPass<LegalizeTF> {
 // TODO(antiagainst): Define this pattern in a table-driven manner once variadic
 // operands are properly supported in declarative rewrite rule specification.
 
+DECL_CONVERT_OP(Assert);
 DECL_CONVERT_OP(Concat);
 DECL_CONVERT_OP(ConcatV2);
 DECL_CONVERT_OP(MatMul);
@@ -86,7 +87,7 @@ PatternMatchResult ConvertTFConcatOp::matchAndRewrite(
     Operation* op, PatternRewriter& rewriter) const {
   auto tf_concat_op = cast<TF::ConcatOp>(op);
 
-  SmallVector<Value*, 4> values(tf_concat_op.values());
+  auto values = tf_concat_op.values();
   auto output_type = tf_concat_op.output()->getType();
   // Extract axis attribute from constant concat_dims tensor
   ElementsAttr axis;
@@ -105,7 +106,7 @@ PatternMatchResult ConvertTFConcatV2Op::matchAndRewrite(
     Operation* op, PatternRewriter& rewriter) const {
   auto tf_concat_op = cast<TF::ConcatV2Op>(op);
 
-  SmallVector<Value*, 4> values(tf_concat_op.values());
+  auto values = tf_concat_op.values();
   auto output_type = tf_concat_op.output()->getType();
   // Extract axis attribute from constant axis tensor
   ElementsAttr axis;
@@ -374,6 +375,14 @@ PatternMatchResult ConvertTFMatrixDiagV3Op::matchAndRewrite(
   return matchFailure();
 }
 
+// TF Lite doesn't support Assert, we just drop the assert from the graph.
+PatternMatchResult ConvertTFAssertOp::matchAndRewrite(
+    Operation* op, PatternRewriter& rewriter) const {
+  op->dropAllReferences();
+  op->erase();
+  return matchSuccess();
+}
+
 void LegalizeTF::runOnFunction() {
   OwningRewritePatternList patterns;
   auto* ctx = &getContext();
@@ -385,7 +394,8 @@ void LegalizeTF::runOnFunction() {
       .insert<ConvertTFConcatOp, ConvertTFConcatV2Op, ConvertTFMatMulOp,
               ConvertTFMatrixDiagV2Op, ConvertTFMatrixDiagV3Op, ConvertTFPackOp,
               ConvertTFReshapeOp, ConvertTFSplitOp, ConvertTFSplitVOp,
-              ConvertTFStridedSliceOp, ConvertTFUnpackOp>(ctx);
+              ConvertTFStridedSliceOp, ConvertTFUnpackOp, ConvertTFAssertOp>(
+          ctx);
   applyPatternsGreedily(func, patterns);
 }
 
diff --git a/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc b/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc
index bf0e7169584..3f50c3ad1c1 100644
--- a/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc
@@ -484,9 +484,9 @@ struct ConvertTensorListResize : public ConversionPattern {
                           &rewriter);
 
     // Inserts the two blocks' names into the symbol table held by the module.
-    // Using ModuleManager will ensure that the inserted symbol names are
+    // Using SymbolTable will ensure that the inserted symbol names are
     // unique.
-    ModuleManager manager(resize_op.getParentOfType<ModuleOp>());
+    SymbolTable manager(resize_op.getParentOfType<ModuleOp>());
     manager.insert(then_branch_op);
     manager.insert(else_branch_op);
 
@@ -754,8 +754,7 @@ struct ConvertWhile : public ConversionPattern {
     cloned.removeAttr("T");
     UpdateFunctionTypes(cloned);
 
-    SmallVector<Value *, 8> results(cloned.getResults());
-    rewriter.replaceOp(op, results);
+    rewriter.replaceOp(op, cloned.getResults());
     return matchSuccess();
   }
 };
diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc b/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc
index c8b54d26653..173785ba5b0 100644
--- a/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc
@@ -135,15 +135,15 @@ class FoldIfOp : public OpRewritePattern<TF::IfOp> {
 static void EraseDeadFuncs(const FuncSet& candiate_funcs, ModuleOp module) {
   if (candiate_funcs.empty()) return;
 
-  ModuleManager manager(module);
+  SymbolTable manager(module);
 
   // Identify the functions that are used as symbols in the module and shouldn't
   // be erased.
   FuncSet in_use_funcs;
-  manager.getModule().walk([&](Operation* op) {
+  manager.getOp()->walk([&](Operation* op) {
     for (auto attr : op->getAttrs()) {
       if (auto symbol = attr.second.dyn_cast<FlatSymbolRefAttr>()) {
-        auto func = manager.lookupSymbol<FuncOp>(symbol.getValue());
+        auto func = manager.lookup<FuncOp>(symbol.getValue());
         in_use_funcs.insert(func);
       }
     }
diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td b/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td
index 905f01d8413..a91f6de1971 100644
--- a/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td
+++ b/tensorflow/compiler/mlir/lite/transforms/optimize_patterns.td
@@ -44,12 +44,13 @@ multiclass FuseActFnIntoConvOpPat<dag ActFnOp, dag ActFnAttr> {
                                    $multiplier)>;
 }
 
-// TODO(hinsu): Also fuse ops corresponding to RELU_N1_TO_1 and SIGN_BIT fused
+// TODO(hinsu): Also fuse ops corresponding to SIGN_BIT fused
 // activation functions.
 // Currently we're not fusing tanh, sigmoid, hard_swish and other activations
 // those cannot be simply translated into clamping.
 foreach actFnPair = [[TFL_ReluOp, TFL_AF_Relu],
-                     [TFL_Relu6Op, TFL_AF_Relu6]] in
+                     [TFL_Relu6Op, TFL_AF_Relu6],
+                     [TFL_Relu1Op, TFL_AF_Relu1]] in
   defm : FuseActFnIntoConvOpPat<actFnPair[0], actFnPair[1]>;
 
 
@@ -291,3 +292,18 @@ def : Pat<(TFL_MaximumOp (TFL_MinimumOp $input,
            (ConstantOp $NegOne)),
           (TFL_Relu1Op $input),
           [(ValueEquals<"-1"> $NegOne), (ValueEquals<"1"> $One)]>;
+
+// Multi-pattern consisting of matching stand-alone op or op followed by relu.
+multiclass FusedBinaryActivationFuncOpPat<dag BinaryOp> {
+  foreach actFnPair = [[TFL_ReluOp, TFL_AF_Relu],
+                       [TFL_Relu6Op, TFL_AF_Relu6],
+                       [TFL_Relu1Op, TFL_AF_Relu1]] in {
+    def : Pat<(actFnPair[0] (BinaryOp $lhs, $rhs, TFL_AF_None)),
+              (BinaryOp $lhs, $rhs, actFnPair[1])>;
+  }
+}
+
+// Instantiated FusedBinary patterns for the from-to pairs of ops.
+foreach BinaryOps = [TFL_AddOp, TFL_DivOp,
+                     TFL_MulOp, TFL_SubOp] in
+  defm : FusedBinaryActivationFuncOpPat<BinaryOps>;
diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD
index 5484988d0f5..5f93210f06e 100644
--- a/tensorflow/compiler/mlir/tensorflow/BUILD
+++ b/tensorflow/compiler/mlir/tensorflow/BUILD
@@ -192,6 +192,37 @@ cc_library(
     alwayslink = 1,
 )
 
+gentbl(
+    name = "decompose_resource_ops_inc_gen",
+    tbl_outs = [
+        (
+            "-gen-rewriters",
+            "transforms/generated_decompose_resource_ops.inc",
+        ),
+    ],
+    tblgen = "@local_config_mlir//:mlir-tblgen",
+    td_file = "transforms/decompose_resource_ops.td",
+    td_srcs = [
+        ":tensorflow_ops_td_files",
+        "@local_config_mlir//:StdOpsTdFiles",
+    ],
+)
+
+cc_library(
+    name = "decompose_resource_ops",
+    srcs = [
+        "transforms/decompose_resource_ops.cc",
+    ],
+    hdrs = [
+        "transforms/decompose_resource_ops.h",
+    ],
+    deps = [
+        ":decompose_resource_ops_inc_gen",
+        ":tensorflow",
+        "@local_config_mlir//:IR",
+    ],
+)
+
 cc_library(
     name = "tensorflow_passes",
     srcs = [
@@ -199,6 +230,7 @@ cc_library(
         "transforms/bridge_pass.cc",
         "transforms/cluster_formation.cc",
         "transforms/cluster_outlining.cc",
+        "transforms/decompose_resource_ops_pass.cc",
         "transforms/delete_unused_funcs.cc",
         "transforms/executor_island_coarsening.cc",
         "transforms/fold_switch.cc",
@@ -213,6 +245,7 @@ cc_library(
         "transforms/raise_control_flow.cc",
         "transforms/replicate_invariant_op_hoisting.cc",
         "transforms/replicate_to_island.cc",
+        "transforms/resource_device_inference.cc",
         "transforms/resource_op_lifting.cc",
         "transforms/shape_inference.cc",
         "transforms/shape_inference_pass.cc",
@@ -236,6 +269,8 @@ cc_library(
         ":bridge_logger",
         ":convert_tensor",
         ":convert_type",
+        ":decompose_resource_ops",
+        ":decompose_resource_ops_inc_gen",
         ":device_util",
         ":error_util",
         ":export_tf_dialect_op",
@@ -368,12 +403,14 @@ cc_library(
         ":convert_tensor",
         ":convert_type",
         ":mangling_util",
+        ":tensorflow",
         "//tensorflow/compiler/xla:status_macros",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:graph",
         "//tensorflow/core:lib",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/platform:protobuf",
         "//tensorflow/stream_executor/lib",
         "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/memory",
@@ -564,7 +601,6 @@ cc_library(
     hdrs = ["utils/error_util.h"],
     deps = [
         "//tensorflow/core:lib",
-        "//tensorflow/stream_executor/lib",
         "@llvm//:support",
         "@local_config_mlir//:IR",
     ],
@@ -808,7 +844,6 @@ cc_library(
         "//tensorflow/core:framework",
         "//tensorflow/core/platform:logging",
         "//tensorflow/stream_executor/lib",
-        "@com_google_absl//absl/types:span",
         "@llvm//:support",
         "@local_config_mlir//:IR",
         "@local_config_mlir//:Parser",
diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc
index 8d43c9330d0..898393479b0 100644
--- a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc
+++ b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc
@@ -34,6 +34,7 @@ limitations under the License.
 #include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
 #include "mlir/Support/LLVM.h"  // TF:local_config_mlir
 #include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 #include "tensorflow/compiler/tf2xla/resource_operation_table.h"
@@ -99,12 +100,13 @@ void ResourceAliasAnalysis::AnalyzeFunction(FuncOp func_op) {
   auto forward_input_to_output = [&](Value* operand, Value* result) {
     if (!mlir::getElementTypeOrSelf(result->getType()).isa<TF::ResourceType>())
       return;
+    auto& result_ids = resource_value_to_ids_[result];
     auto operand_it = resource_value_to_ids_.find(operand);
     assert(operand_it != resource_value_to_ids_.end() &&
            "A resource-type output does not have the corresponding "
            "resource-type input.");
-    resource_value_to_ids_[result].insert(operand_it->getSecond().begin(),
-                                          operand_it->getSecond().end());
+    result_ids.insert(operand_it->getSecond().begin(),
+                      operand_it->getSecond().end());
   };
   // TODO(yuanzx): Consider control-flow ops.
   func_op.walk([&](Operation* op) {
@@ -119,6 +121,16 @@ void ResourceAliasAnalysis::AnalyzeFunction(FuncOp func_op) {
         forward_input_to_output(std::get<0>(operand_and_result),
                                 std::get<1>(operand_and_result));
       }
+    } else if (auto replicate = llvm::dyn_cast<tf_device::ReplicateOp>(op)) {
+      // The nested block for RepliateOp is handled separately in side-effect
+      // analysis. Inside that block, we can still treat its block arguments as
+      // different resources.
+      for (auto arg : replicate.GetBody().getArguments()) {
+        if (mlir::getElementTypeOrSelf(arg->getType())
+                .isa<TF::ResourceType>()) {
+          resource_value_to_ids_[arg].insert(next_unique_id++);
+        }
+      }
     } else {
       for (auto result : op->getResults()) {
         if (!mlir::getElementTypeOrSelf(result->getType())
@@ -261,9 +273,36 @@ void SideEffectAnalysis::AddPredecessorsForAccess(int64_t resource_id,
 
 void SideEffectAnalysis::AnalyzeFunction(
     FuncOp func_op, const ResourceAliasAnalysis& alias_analysis) {
-  // This function populates control_predecessors_ and control_successors_ by
-  // walking through func_op's body, and tracking resource accesses in
-  // per_resource_access_info_.
+  // AnalyzeRegion() recursively analyzes the function body, and only populates
+  // control_predecessors_.
+  AnalyzeRegion(&func_op.getBody(), alias_analysis);
+  // Populate sorted_control_predecessors_ and sorted_control_successors_ based
+  // on control_predecessors.
+  for (auto& entry : control_predecessors_) {
+    auto op = entry.getFirst();
+    auto& sorted_predecessors = sorted_control_predecessors_[op];
+    for (auto predecessor : entry.getSecond()) {
+      sorted_predecessors.push_back(predecessor);
+      sorted_control_successors_[predecessor].push_back(op);
+    }
+  }
+  control_predecessors_.clear();
+  for (auto& entry : sorted_control_predecessors_) {
+    llvm::sort(entry.getSecond(), [](Operation* a, Operation* b) {
+      return a->isBeforeInBlock(b);
+    });
+  }
+  for (auto& entry : sorted_control_successors_) {
+    llvm::sort(entry.getSecond(), [](Operation* a, Operation* b) {
+      return a->isBeforeInBlock(b);
+    });
+  }
+}
+
+void SideEffectAnalysis::AnalyzeRegion(
+    Region* region, const ResourceAliasAnalysis& alias_analysis) {
+  // This function populates control_predecessors_ by walking through the
+  // region, and tracking resource accesses in per_resource_access_info_.
 
   // Returns whether an access to `resource` can skip control edges from
   // prevoius accesses to unknown resources, due to that earlier accesses to
@@ -284,82 +323,93 @@ void SideEffectAnalysis::AnalyzeFunction(
                      (it->second.tracked_last_unknown_read || no_unknown_read);
   };
 
-  func_op.walk([&](Operation* op) {
-    // We do not need explicit control edges for declaration ops.
-    if (OpIsDeclaration(op, alias_analysis)) return;
-
-    auto resource_op_info = GetResourceInfoForOp(op);
-    if (!resource_op_info && op->hasNoSideEffect()) return;
-
-    llvm::SmallDenseSet<int64_t, 8> resources =
-        resource_op_info ? FindAccessedResources(op, alias_analysis)
-                         : UnknownResourceSet();
-    assert(!resources.empty());
-    const bool is_unknown = resources.count(kUnknownResourceId) > 0;
-    const bool read_only = OpIsReadOnly(op);
-    bool indirectly_tracked_unknown_access = false;
-    // First add edges from known resources.
-    if (is_unknown) {
-      for (auto& entry : per_resource_access_info_) {
-        if (entry.getFirst() == kUnknownResourceId) continue;
-        AddPredecessorsForAccess(entry.getFirst(), op, read_only);
-        indirectly_tracked_unknown_access |=
-            unknown_access_indirectly_tracked_by_resource(entry.getFirst(),
-                                                          read_only);
+  // We explicitly iterates through the regions and blocks, in order to handle
+  // different nested regions separately.
+  for (auto& block : *region) {
+    for (auto& op : block) {
+      if (op.getNumRegions() > 0) {
+        llvm::SmallVector<SideEffectAnalysis, 4> child_analyses;
+        for (auto& child_region : op.getRegions()) {
+          child_analyses.emplace_back();
+          child_analyses.back().AnalyzeRegion(&child_region, alias_analysis);
+        }
+        ConsumeChildAnalyses(std::move(child_analyses));
       }
-    } else {
-      for (int64_t resource : resources) {
-        AddPredecessorsForAccess(resource, op, read_only);
-        indirectly_tracked_unknown_access |=
-            unknown_access_indirectly_tracked_by_resource(resource, read_only);
-        // Update access info for known resources.
-        TrackAccess(resource, op, read_only);
-      }
-    }
-    // If not indirectly tracked, add edges from the unknown resource.
-    if (!indirectly_tracked_unknown_access) {
-      AddPredecessorsForAccess(kUnknownResourceId, op, read_only);
-    }
-    if (is_unknown) {
-      // Update access info for unknown resource.
-      TrackAccess(kUnknownResourceId, op, read_only);
-    }
-  });
 
-  // Populate control_successors_ based on control_predecessors_.
-  for (auto& entry : control_predecessors_) {
-    auto op = entry.getFirst();
-    for (auto predecessor : entry.getSecond()) {
-      control_successors_[predecessor].insert(op);
+      // We do not need explicit control edges for declaration ops.
+      if (OpIsDeclaration(&op, alias_analysis)) continue;
+
+      auto resource_op_info = GetResourceInfoForOp(&op);
+      if (!resource_op_info && op.hasNoSideEffect()) continue;
+
+      llvm::SmallDenseSet<int64_t, 8> resources =
+          resource_op_info ? FindAccessedResources(&op, alias_analysis)
+                           : UnknownResourceSet();
+      assert(!resources.empty());
+      const bool is_unknown = resources.count(kUnknownResourceId) > 0;
+      const bool read_only = OpIsReadOnly(&op);
+      bool indirectly_tracked_unknown_access = false;
+      // First add edges from known resources.
+      if (is_unknown) {
+        for (auto& entry : per_resource_access_info_) {
+          if (entry.getFirst() == kUnknownResourceId) continue;
+          AddPredecessorsForAccess(entry.getFirst(), &op, read_only);
+          indirectly_tracked_unknown_access |=
+              unknown_access_indirectly_tracked_by_resource(entry.getFirst(),
+                                                            read_only);
+        }
+      } else {
+        for (int64_t resource : resources) {
+          AddPredecessorsForAccess(resource, &op, read_only);
+          indirectly_tracked_unknown_access |=
+              unknown_access_indirectly_tracked_by_resource(resource,
+                                                            read_only);
+          // Update access info for known resources.
+          TrackAccess(resource, &op, read_only);
+        }
+      }
+      // If not indirectly tracked, add edges from the unknown resource.
+      if (!indirectly_tracked_unknown_access) {
+        AddPredecessorsForAccess(kUnknownResourceId, &op, read_only);
+      }
+      if (is_unknown) {
+        // Update access info for unknown resource.
+        TrackAccess(kUnknownResourceId, &op, read_only);
+      }
     }
   }
 }
 
-llvm::SmallVector<Operation*, 8> SideEffectAnalysis::DirectControlPredecessors(
+void SideEffectAnalysis::ConsumeChildAnalyses(
+    llvm::SmallVector<SideEffectAnalysis, 4>&& children) {
+  for (auto& child : children) {
+    for (auto& entry : child.control_predecessors_) {
+      control_predecessors_[entry.getFirst()] = std::move(entry.getSecond());
+    }
+  }
+}
+
+llvm::SmallVector<Operation*, 4> SideEffectAnalysis::DirectControlPredecessors(
     Operation* op, llvm::function_ref<bool(Operation*)> filter) const {
-  llvm::SmallVector<Operation*, 8> result;
-  auto it = control_predecessors_.find(op);
-  if (it == control_predecessors_.end()) return result;
+  llvm::SmallVector<Operation*, 4> result;
+  auto it = sorted_control_predecessors_.find(op);
+  if (it == sorted_control_predecessors_.end()) return result;
   result.reserve(it->getSecond().size());
   for (auto predecessor : it->getSecond()) {
     if (!filter || filter(predecessor)) result.push_back(predecessor);
   }
-  llvm::sort(result,
-             [](Operation* a, Operation* b) { return a->isBeforeInBlock(b); });
   return result;
 }
 
-llvm::SmallVector<Operation*, 8> SideEffectAnalysis::DirectControlSuccessors(
+llvm::SmallVector<Operation*, 4> SideEffectAnalysis::DirectControlSuccessors(
     Operation* op, llvm::function_ref<bool(Operation*)> filter) const {
-  llvm::SmallVector<Operation*, 8> result;
-  auto it = control_successors_.find(op);
-  if (it == control_successors_.end()) return result;
+  llvm::SmallVector<Operation*, 4> result;
+  auto it = sorted_control_successors_.find(op);
+  if (it == sorted_control_successors_.end()) return result;
   result.reserve(it->getSecond().size());
   for (auto successor : it->getSecond()) {
     if (!filter || filter(successor)) result.push_back(successor);
   }
-  llvm::sort(result,
-             [](Operation* a, Operation* b) { return a->isBeforeInBlock(b); });
   return result;
 }
 
diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h
index 5eee28a6ae0..3d65217db27 100644
--- a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h
+++ b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h
@@ -32,6 +32,9 @@ namespace TF {
 
 // An analysis that runs on a function and maps each resource-type value to a
 // set of unique int64_t IDs representing the possible resources it could alias.
+//
+// If there are nested regions, each region is handled separately. This means
+// cross-region aliasing cannot be checked by this analysis.
 class ResourceAliasAnalysis {
  public:
   explicit ResourceAliasAnalysis(Operation* op);
@@ -63,8 +66,12 @@ class ResourceAliasAnalysis {
 // interfering with all known resource op accesses. It distinguishes accesses
 // based on whether they are read-only, and read-only ops do not interfer with
 // each other.
+//
+// If there are nested regions, each region is handled separately, and control
+// dependencies are only tracked for ops under the same parent op.
 class SideEffectAnalysis {
  public:
+  explicit SideEffectAnalysis() = default;
   explicit SideEffectAnalysis(Operation* op);
   SideEffectAnalysis(SideEffectAnalysis&& other) = default;
   ~SideEffectAnalysis() = default;
@@ -72,23 +79,32 @@ class SideEffectAnalysis {
   // Returns a vector of ops that are direct control predecessors of `op`,
   // sorted in program order. If `filter` is provided, only predecessors that
   // pass the filter (returning true) will be included.
-  llvm::SmallVector<Operation*, 8> DirectControlPredecessors(
+  llvm::SmallVector<Operation*, 4> DirectControlPredecessors(
       Operation* op,
       llvm::function_ref<bool(Operation*)> filter = nullptr) const;
 
   // Returns a vector of ops that are direct control successors of `op`, sorted
   // in program order. If `filter` is provided, only successors that pass the
   // filter (returning true) will be included.
-  llvm::SmallVector<Operation*, 8> DirectControlSuccessors(
+  llvm::SmallVector<Operation*, 4> DirectControlSuccessors(
       Operation* op,
       llvm::function_ref<bool(Operation*)> filter = nullptr) const;
 
  private:
-  // Runs the analysis on `func_op` and populates control_predecessors_ and
-  // control_successors_.
+  // Runs the analysis on `func_op` and populates sorted_control_predecessors_
+  // and sorted_control_successors_.
   void AnalyzeFunction(FuncOp func_op,
                        const ResourceAliasAnalysis& alias_analysis);
 
+  // Runs the analysis on `region` and populates control_predecessors_.
+  void AnalyzeRegion(Region* region,
+                     const ResourceAliasAnalysis& alias_analysis);
+
+  // Moves the control_predecessors_ fields in `children` analyses to this
+  // current analysis.
+  void ConsumeChildAnalyses(
+      llvm::SmallVector<SideEffectAnalysis, 4>&& children);
+
   // Updates control_predecessors_ for `op` that is being visted, on the given
   // `resource_id`.
   void AddPredecessorsForAccess(int64_t resource_id, Operation* op,
@@ -98,11 +114,14 @@ class SideEffectAnalysis {
   void TrackAccess(int64_t resource_id, Operation* op, bool read_only);
 
   // Maps from an op to its control predecessors.
-  llvm::SmallDenseMap<Operation*, llvm::SmallPtrSet<Operation*, 8>, 8>
+  llvm::SmallDenseMap<Operation*, llvm::SmallPtrSet<Operation*, 4>, 8>
       control_predecessors_;
-  // Maps from an op to its control successors.
-  llvm::SmallDenseMap<Operation*, llvm::SmallPtrSet<Operation*, 8>, 8>
-      control_successors_;
+  // Maps from an op to its control predecessors sorted in program order.
+  llvm::SmallDenseMap<Operation*, llvm::SmallVector<Operation*, 4>, 8>
+      sorted_control_predecessors_;
+  // Maps from an op to its control successors sorted in program order.
+  llvm::SmallDenseMap<Operation*, llvm::SmallVector<Operation*, 4>, 8>
+      sorted_control_successors_;
 
   // Internal per-resource data structure when we build the dependencies.
   struct PerResourceAcessInfo {
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc
index 20483691a92..ffba86e78ff 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc
@@ -332,8 +332,7 @@ struct DropEmptyLaunch : public OpRewritePattern<LaunchOp> {
     if (&block.front() != &block.back()) return matchFailure();
 
     // Map launch results to return operands.
-    llvm::SmallVector<Value*, 8> new_rets(block.front().getOperands());
-    rewriter.replaceOp(op, new_rets);
+    rewriter.replaceOp(op, block.front().getOperands());
 
     return matchSuccess();
   }
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc
index d2174255a05..5a018a39fd7 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc
@@ -408,8 +408,7 @@ ParseResult ParseIslandOp(OpAsmParser &parser, OperationState &result) {
     if (!wrapped_op) return failure();
     OpBuilder builder(parser.getBuilder().getContext());
     builder.setInsertionPointToEnd(&block);
-    builder.create<YieldOp>(wrapped_op->getLoc(),
-                            llvm::to_vector<8>(wrapped_op->getResults()));
+    builder.create<YieldOp>(wrapped_op->getLoc(), wrapped_op->getResults());
     result.location = wrapped_op->getLoc();
   } else if (parser.parseRegion(body, llvm::None, llvm::None)) {
     return failure();
@@ -1065,8 +1064,7 @@ struct DropEmptyGraph : public OpRewritePattern<GraphOp> {
     if (&block.front() != &block.back()) return matchFailure();
 
     // Map graph results to fetch operands.
-    llvm::SmallVector<Value *, 8> new_rets(op.GetFetch().fetches());
-    rewriter.replaceOp(op, new_rets);
+    rewriter.replaceOp(op, op.GetFetch().fetches());
 
     return matchSuccess();
   }
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
index cdc545d5681..5b5c028c89d 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td
@@ -94,6 +94,8 @@ Inputs must be of same size and shape.
 
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
   TF_DerivedOperandSizeAttr N = TF_DerivedOperandSizeAttr<0>;
+
+  let hasFolder = 1;
 }
 
 def TF_AddV2Op : TF_Op<"AddV2", [Broadcastable, Commutative, NoSideEffect]>,
@@ -143,6 +145,8 @@ retained with length 1.
   );
 
   TF_DerivedOperandTypeAttr Tidx = TF_DerivedOperandTypeAttr<1>;
+
+  let verifier = [{ return Verify(*this); }];
 }
 
 def TF_AnyOp : TF_Op<"Any", [NoSideEffect]> {
@@ -169,6 +173,8 @@ retained with length 1.
   );
 
   TF_DerivedOperandTypeAttr Tidx = TF_DerivedOperandTypeAttr<1>;
+
+  let verifier = [{ return Verify(*this); }];
 }
 
 def TF_ArgMaxOp : TF_Op<"ArgMax", [NoSideEffect]> {
@@ -2116,6 +2122,28 @@ tf.math.greater_equal(x, y) ==> [True, False, True, True]
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 
+def TF_HashTableV2Op : TF_Op<"HashTableV2", []> {
+  let summary = "Creates a non-initialized hash table.";
+
+  let description = [{
+This op creates a hash table, specifying the type of its keys and values.
+Before using the table you will have to initialize it.  After initialization the
+table will be immutable.
+  }];
+
+  let arguments = (ins
+    StrAttr:$container,
+    StrAttr:$shared_name,
+    DefaultValuedAttr<BoolAttr, "false">:$use_node_name_sharing,
+    TypeAttr:$key_dtype,
+    TypeAttr:$value_dtype
+  );
+
+  let results = (outs
+    TF_ResourceTensor:$table_handle
+  );
+}
+
 def TF_IdentityNOp : TF_Op<"IdentityN", [NoSideEffect]> {
   let summary = [{
 Returns a list of tensors with the same shapes and contents as the input
@@ -2473,7 +2501,7 @@ def TF_LogicalAndOp : TF_Op<"LogicalAnd", [Broadcastable, Commutative, NoSideEff
 }
 
 def TF_LogicalNotOp : TF_Op<"LogicalNot", [NoSideEffect, SameOperandsAndResultType]> {
-  let summary = "Returns the truth value of NOT x element-wise.";
+  let summary = "Returns the truth value of `NOT x` element-wise.";
 
   let description = [{
   }];
@@ -4334,6 +4362,37 @@ Resize `images` to `size` using nearest neighbor interpolation.
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
 }
 
+def TF_ResourceApplyAdamOp : TF_Op<"ResourceApplyAdam", []> {
+  let summary = "Update '*var' according to the Adam algorithm.";
+
+  let description = [{
+$$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$
+$$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
+$$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
+$$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$
+  }];
+
+  let arguments = (ins
+    TF_ResourceTensor:$var,
+    TF_ResourceTensor:$m,
+    TF_ResourceTensor:$v,
+    TensorOf<[BF16, F16, F32, F64, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$beta1_power,
+    TensorOf<[BF16, F16, F32, F64, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$beta2_power,
+    TensorOf<[BF16, F16, F32, F64, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$lr,
+    TensorOf<[BF16, F16, F32, F64, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$beta1,
+    TensorOf<[BF16, F16, F32, F64, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$beta2,
+    TensorOf<[BF16, F16, F32, F64, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$epsilon,
+    TensorOf<[BF16, F16, F32, F64, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$grad,
+
+    DefaultValuedAttr<BoolAttr, "false">:$use_locking,
+    DefaultValuedAttr<BoolAttr, "false">:$use_nesterov
+  );
+
+  let results = (outs);
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<3>;
+}
+
 def TF_ResourceApplyGradientDescentOp : TF_Op<"ResourceApplyGradientDescent", []> {
   let summary = "Update '*var' by subtracting 'alpha' * 'delta' from it.";
 
@@ -4353,6 +4412,34 @@ def TF_ResourceApplyGradientDescentOp : TF_Op<"ResourceApplyGradientDescent", []
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<1>;
 }
 
+def TF_ResourceApplyKerasMomentumOp : TF_Op<"ResourceApplyKerasMomentum", []> {
+  let summary = [{
+Update '*var' according to the momentum scheme.
+  }];
+
+  let description = [{
+Set use_nesterov = True if you want to use Nesterov momentum.
+
+accum = accum * momentum - lr * grad
+var += accum
+  }];
+
+  let arguments = (ins
+    TF_ResourceTensor:$var,
+    TF_ResourceTensor:$accum,
+    TensorOf<[BF16, F16, F32, F64, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$lr,
+    TensorOf<[BF16, F16, F32, F64, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$grad,
+    TensorOf<[BF16, F16, F32, F64, I16, I32, I64, I8, TF_Complex128, TF_Complex64, TF_Qint32, TF_Qint8, TF_Quint8, TF_Uint16, TF_Uint32, TF_Uint64, TF_Uint8]>:$momentum,
+
+    DefaultValuedAttr<BoolAttr, "false">:$use_locking,
+    DefaultValuedAttr<BoolAttr, "false">:$use_nesterov
+  );
+
+  let results = (outs);
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<2>;
+}
+
 def TF_ReverseSequenceOp : TF_Op<"ReverseSequence", [NoSideEffect]> {
   let summary = "Reverses variable length slices.";
 
@@ -5117,6 +5204,8 @@ def TF_SplitVOp : TF_Op<"SplitV", [NoSideEffect]> {
   TF_DerivedOperandTypeAttr Tlen = TF_DerivedOperandTypeAttr<1>;
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
   TF_DerivedResultSizeAttr num_split = TF_DerivedResultSizeAttr<0>;
+
+  let verifier = [{ return Verify(*this); }];
 }
 
 def TF_SqrtOp : TF_Op<"Sqrt", [NoSideEffect, SameOperandsAndResultType]> {
@@ -5491,6 +5580,65 @@ output. For the internal use of the distributed TPU compiler.
   TF_DerivedResultTypeListAttr Tresults = TF_DerivedResultTypeListAttr<0>;
 }
 
+def TF_TPUReplicatedInputOp : TF_Op<"TPUReplicatedInput", [NoSideEffect]> {
+  let summary = "Connects N inputs to an N-way replicated TPU computation.";
+
+  let description = [{
+This operation holds a replicated input to a `tpu.replicate()` computation subgraph.
+Each replicated input has the same shape and type alongside the output.
+
+For example:
+```
+%a = "tf.opA"()
+%b = "tf.opB"()
+%replicated_input = "tf.TPUReplicatedInput"(%a, %b)
+%computation = "tf.Computation"(%replicated_input)
+```
+The above computation has a replicated input of two replicas.
+  }];
+
+  let arguments = (ins
+    Variadic<TF_Tensor>:$inputs,
+
+    DefaultValuedAttr<BoolAttr, "false">:$is_mirrored_variable,
+    DefaultValuedAttr<I64Attr, "-1">:$index
+  );
+
+  let results = (outs
+    TF_Tensor:$output
+  );
+
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+  TF_DerivedOperandSizeAttr N = TF_DerivedOperandSizeAttr<0>;
+}
+
+def TF_TPUReplicatedOutputOp : TF_Op<"TPUReplicatedOutput", [NoSideEffect]> {
+  let summary = "Connects N outputs from an N-way replicated TPU computation.";
+
+  let description = [{
+This operation holds a replicated output from a `tpu.replicate()` computation subgraph.
+Each replicated output has the same shape and type alongside the input.
+
+For example:
+```
+%computation = "tf.Computation"()
+%replicated_output:2 = "tf.TPUReplicatedOutput"(%computation)
+```
+The above computation has a replicated output of two replicas.
+  }];
+
+  let arguments = (ins
+    TF_Tensor:$input
+  );
+
+  let results = (outs
+    Variadic<TF_Tensor>:$outputs
+  );
+
+  TF_DerivedResultSizeAttr num_replicas = TF_DerivedResultSizeAttr<0>;
+  TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+}
+
 def TF_TanhOp : TF_Op<"Tanh", [NoSideEffect, SameOperandsAndResultType]> {
   let summary = "Computes hyperbolic tangent of `x` element-wise.";
 
@@ -5905,6 +6053,8 @@ This is the opposite of `pack`.
 
   TF_DerivedResultSizeAttr num = TF_DerivedResultSizeAttr<0>;
   TF_DerivedOperandTypeAttr T = TF_DerivedOperandTypeAttr<0>;
+
+  let verifier = [{ return Verify(*this); }];
 }
 
 def TF_VariableShapeOp : TF_Op<"VariableShape", []> {
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc
index 1bd9accbb78..9d2f634161c 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc
@@ -301,6 +301,15 @@ void AddOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
   results.insert<AddToAddV2>(context);
 }
 
+//===----------------------------------------------------------------------===//
+// AddNOp
+//===----------------------------------------------------------------------===//
+
+OpFoldResult AddNOp::fold(ArrayRef<Attribute> operands) {
+  if (operands.size() == 1) return *inputs().begin();
+  return {};
+}
+
 //===----------------------------------------------------------------------===//
 // AddV2Op
 //===----------------------------------------------------------------------===//
@@ -310,6 +319,49 @@ void AddV2Op::getCanonicalizationPatterns(OwningRewritePatternList &results,
   results.insert<AddV2OfNegLeft, AddV2OfNegRight>(context);
 }
 
+//===----------------------------------------------------------------------===//
+// AllOp
+//===----------------------------------------------------------------------===//
+
+// Verifies an reduction op's `input` and reduction `dims`.
+static LogicalResult VerifyReductionInputAndDims(Value *input, Value *dims,
+                                                 Location loc) {
+  auto dims_type = dims->getType().dyn_cast<RankedTensorType>();
+  if (!dims_type) return success();
+  if (dims_type.getRank() > 1)
+    return emitError(loc, "dimensions can only be 0D or 1D tensor");
+
+  auto input_type = input->getType().dyn_cast<RankedTensorType>();
+  if (!input_type) return success();
+  int64_t rank = input_type.getRank();
+
+  DenseIntElementsAttr dims_attr;
+  if (!matchPattern(dims, m_Constant(&dims_attr))) return success();
+  for (const auto &dim_pair : llvm::enumerate(dims_attr)) {
+    int64_t cur_dim = dim_pair.value().getSExtValue();
+    if (cur_dim < -rank || cur_dim >= rank)
+      return emitError(loc)
+             << dim_pair.index() << "-th dimension should be in the range of [-"
+             << rank << ", " << rank << ")";
+  }
+
+  return success();
+}
+
+static LogicalResult Verify(AllOp op) {
+  return VerifyReductionInputAndDims(op.input(), op.reduction_indices(),
+                                     op.getLoc());
+}
+
+//===----------------------------------------------------------------------===//
+// AnyOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(AnyOp op) {
+  return VerifyReductionInputAndDims(op.input(), op.reduction_indices(),
+                                     op.getLoc());
+}
+
 //===----------------------------------------------------------------------===//
 // AssertOp
 //===----------------------------------------------------------------------===//
@@ -1542,17 +1594,23 @@ static LogicalResult Verify(SoftmaxCrossEntropyWithLogitsOp op) {
 // SplitOp
 //===----------------------------------------------------------------------===//
 
-static LogicalResult Verify(SplitOp op) {
+// Verifies the input and split dimension operands for tf.Split/tf.SplitV.
+// Writes the split dimension's index (adjusted with input rank) via `dim_index`
+// if it's a constant.
+template <class Op>
+LogicalResult VerifySplitInputAndSplitDim(Op op, Optional<int64_t> *dim_index) {
+  *dim_index = llvm::None;
+
   Value *split_dim = op.split_dim();
-  auto split_dim_type = split_dim->getType().dyn_cast<RankedTensorType>();
-  if (!split_dim_type) return success();
-  if (split_dim_type.getRank() != 0)
-    return op.emitOpError("split dimension should be an integer scalar tensor");
+  if (auto split_dim_type = split_dim->getType().dyn_cast<RankedTensorType>())
+    if (split_dim_type.getRank() != 0)
+      return op.emitOpError(
+          "split dimension should be an integer scalar tensor");
 
   // We can perform further verification if the input tensor to be split has
   // known rank and the split dimension tensor is a constant.
 
-  auto input_type = op.value()->getType().dyn_cast<RankedTensorType>();
+  auto input_type = op.value()->getType().template dyn_cast<RankedTensorType>();
   if (!input_type) return success();
 
   int64_t input_rank = input_type.getRank();
@@ -1562,21 +1620,95 @@ static LogicalResult Verify(SplitOp op) {
   DenseIntElementsAttr split_dim_attr;
   if (!matchPattern(split_dim, m_Constant(&split_dim_attr))) return success();
 
-  int64_t dim_index = (*split_dim_attr.begin()).getSExtValue();
+  int64_t index = (*split_dim_attr.begin()).getSExtValue();
 
-  if (dim_index + input_rank < 0 || dim_index >= input_rank) {
+  if (index + input_rank < 0 || index >= input_rank) {
     return op.emitOpError("split dimension must be in range [-")
            << input_rank << ", " << input_rank << ")";
   }
 
-  if (dim_index < 0) dim_index += input_rank;
+  if (index < 0) index += input_rank;
+  *dim_index = index;
 
-  int64_t input_dim_size = input_type.getDimSize(dim_index);
-  if (input_dim_size < 0) return success();
+  return success();
+}
+
+static LogicalResult Verify(SplitOp op) {
+  Optional<int64_t> dim_index;
+  if (failed(VerifySplitInputAndSplitDim(op, &dim_index))) return failure();
+  if (!dim_index) return success();
+
+  int64_t input_dim_size =
+      op.value()->getType().cast<RankedTensorType>().getDimSize(*dim_index);
+  if (input_dim_size == ShapedType::kDynamicSize) return success();
 
   if (input_dim_size % op.getNumResults() != 0)
     return op.emitOpError("dimension #")
-           << dim_index << " not divisible by the number of result tensors";
+           << *dim_index << " not divisible by the number of result tensors";
+
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// SplitVOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(SplitVOp op) {
+  auto split_sizes_type =
+      op.size_splits()->getType().dyn_cast<RankedTensorType>();
+  if (!split_sizes_type) return success();
+
+  if (split_sizes_type.getRank() != 1 ||
+      split_sizes_type.getDimSize(0) != op.getNumResults())
+    return op.emitOpError("split sizes should be a 1D tensor of ")
+           << op.getNumResults() << " elements";
+
+  Optional<int64_t> dim_index = 0;
+  if (failed(VerifySplitInputAndSplitDim(op, &dim_index))) return failure();
+  if (!dim_index) return success();
+
+  int64_t input_dim_size =
+      op.value()->getType().cast<RankedTensorType>().getDimSize(*dim_index);
+  if (input_dim_size == ShapedType::kDynamicSize) return success();
+
+  // If split sizes come from a constant, they must sum to the dimension size
+  // along split_dim, and we can have no more than one dynamic dimension.
+  DenseIntElementsAttr split_sizes_attr;
+  if (!matchPattern(op.size_splits(), m_Constant(&split_sizes_attr)))
+    return success();
+
+  int64_t total_dim_size = 0;  // Total dimension size assigned to splits
+  llvm::Optional<int> dynamic_dim_index;
+
+  SmallVector<int64_t, 4> split_sizes;
+  split_sizes.reserve(
+      split_sizes_attr.getType().cast<ShapedType>().getNumElements());
+
+  for (auto dim : llvm::enumerate(split_sizes_attr)) {
+    int64_t dim_val = dim.value().getSExtValue();
+    split_sizes.push_back(dim_val);
+    if (dim_val == ShapedType::kDynamicSize) {
+      // We cannot have more than one dynamic dimension.
+      if (dynamic_dim_index)
+        return op.emitOpError(
+            "cannot have more than one dynamic dimension in split sizes");
+      dynamic_dim_index = dim.index();
+    } else {
+      total_dim_size += dim_val;
+    }
+  }
+
+  if (!dynamic_dim_index && total_dim_size != input_dim_size)
+    return op.emitOpError(
+               "split sizes must sum up to the dimension size along split "
+               "dimension, found ")
+           << total_dim_size << " vs " << input_dim_size;
+
+  if (dynamic_dim_index && total_dim_size > input_dim_size)
+    return op.emitOpError(
+               "split sizes must sum up to be less than or equal to the "
+               "dimension size along split dimension, found ")
+           << total_dim_size << " vs " << input_dim_size;
 
   return success();
 }
@@ -1787,6 +1919,30 @@ void TruncateDivOp::getCanonicalizationPatterns(
   results.insert<TruncateDivWithSqrtDivisor>(context);
 }
 
+//===----------------------------------------------------------------------===//
+// UnpackOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult Verify(UnpackOp op) {
+  auto value_type = op.value()->getType().dyn_cast<RankedTensorType>();
+  if (!value_type) return success();
+
+  int64_t value_rank = value_type.getRank();
+  int64_t axis = op.axis().getSExtValue();
+  if (axis < -value_rank || axis >= value_rank)
+    return op.emitOpError("axis attribute must be in the range of [-")
+           << value_rank << ", " << value_rank << ')';
+
+  axis = GetDimForAxis(axis, value_rank);
+  int64_t dim_size = value_type.getDimSize(axis);
+  if (ShapedType::isDynamic(dim_size)) return success();
+
+  if (dim_size != op.getNumResults())
+    return op.emitOpError("result count must be equal to ") << dim_size;
+
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // VariableShapeOp
 //===----------------------------------------------------------------------===//
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td
index 8d975e909bb..9b6196cda5b 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td
@@ -196,6 +196,42 @@ retained with length 1.
   TF_DerivedOperandTypeAttr Tidx = TF_DerivedOperandTypeAttr<1>;
 }
 
+def TF_LegacyCallOp : TF_Op<"LegacyCall",
+                            [CallOpInterface, NoSideEffect]> {
+  let summary =
+    "returns `f(inputs)`, where `f` is a function.";
+
+  let description = [{
+    The LegacyCall operation represents a direct call to a function that is
+    within the same symbol scope as the call and is mapped to a GraphDef node
+    with the function name as the op name. Unlike a PartitionedCall which
+    represents asynchronously executing a function across multiple devices, a
+    LegacyCall represents a function call with the only attribute
+    _diable_call_shape_inference.
+  }];
+
+  let arguments = (ins
+    Variadic<TF_Tensor>:$args,
+
+    FlatSymbolRefAttr:$f,
+    DefaultValuedAttr<BoolAttr, "false">:$_disable_call_shape_inference
+  );
+
+  let results = (outs
+    Variadic<TF_Tensor>:$output
+  );
+
+  let extraClassDeclaration = [{
+    // Gets the argument operands to the called function.
+    operand_range getArgOperands() { return args(); }
+
+    // Returns the callee of this operation.
+    CallInterfaceCallable getCallableForCallee() {
+      return getAttrOfType<SymbolRefAttr>("f");
+    }
+  }];
+}
+
 def TF_PartitionedCallOp : TF_Op<"PartitionedCall",
                                  [CallOpInterface, NoSideEffect]> {
   let summary =
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/breakup-islands.mlir b/tensorflow/compiler/mlir/tensorflow/tests/breakup-islands.mlir
index 67c3982fe3b..d5a5c16cbff 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/breakup-islands.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/breakup-islands.mlir
@@ -18,7 +18,7 @@ func @multiple_return(%arg0: tensor<*xi32>, %arg1: tensor<i32>) -> (tensor<*xi32
 // CHECK-LABEL: func @multiple_return
 // CHECK:   %[[GRAPH:.*]]:2 = tf_executor.graph {
 // CHECK:     %[[ADD1:.*]], %[[ADD1_control:.*]] = tf_executor.island wraps "tf.Add"(%arg0, %arg1)
-// CHECK:     %[[ADD2:.*]], %[[ADD2_control:.*]] = tf_executor.island(%[[ADD1_control]]) wraps "tf.Add"(%[[ADD1]], %arg1)
+// CHECK:     %[[ADD2:.*]], %[[ADD2_control:.*]] = tf_executor.island wraps "tf.Add"(%[[ADD1]], %arg1)
 // CHECK:     tf_executor.fetch %[[ADD1]], %[[ADD2]] :
 // CHECK:   }
 // CHECK:  return %[[GRAPH]]#0, %[[GRAPH]]#1
@@ -41,7 +41,12 @@ func @multiple_islands(%arg0: tensor<*xi32>, %arg1: tensor<i32>) -> (tensor<*xi3
       %res = "tf.Print"(%sub) { message = "sub result" } : (tensor<*xi32>) -> (tensor<*xi32>)
       tf_executor.yield
     }
-    tf_executor.fetch %island1#1, %island2#1, %island3 : tensor<*xi32>, tensor<*xi32>, !tf_executor.control
+    %island4 = tf_executor.island(%island1#2, %island2#2) {
+      %add = "tf.Add"(%island1#1, %island1#1) : (tensor<*xi32>, tensor<*xi32>) -> tensor<*xi32>
+      %res = "tf.Print"(%add) { message = "add result" } : (tensor<*xi32>) -> (tensor<*xi32>)
+      tf_executor.yield
+    }
+    tf_executor.fetch %island1#1, %island2#1, %island3, %island4 : tensor<*xi32>, tensor<*xi32>, !tf_executor.control, !tf_executor.control
   }
   return %graph#0, %graph#1 : tensor<*xi32>, tensor<*xi32>
 }
@@ -49,12 +54,17 @@ func @multiple_islands(%arg0: tensor<*xi32>, %arg1: tensor<i32>) -> (tensor<*xi3
 // CHECK-LABEL: func @multiple_islands
 // CHECK:  %[[GRAPH:.*]]:2 = tf_executor.graph {
 // CHECK:    %[[ADD1:.*]], %[[ADD1_control:.*]] = tf_executor.island wraps "tf.Add"(%arg0, %arg1)
-// CHECK:    %[[ADD2:.*]], %[[ADD2_control:.*]] = tf_executor.island(%[[ADD1_control]]) wraps "tf.Add"(%[[ADD1]], %arg1)
+// CHECK:    %[[ADD2:.*]], %[[ADD2_control:.*]] = tf_executor.island wraps "tf.Add"(%[[ADD1]], %arg1)
 // CHECK:    %[[SUB1:.*]], %[[SUB1_control:.*]] = tf_executor.island(%[[ADD2_control]]) wraps "tf.Sub"(%arg0, %arg1)
-// CHECK:    %[[MUL:.*]], %[[MUL_control:.*]] = tf_executor.island(%[[SUB1_control]]) wraps "tf.Mul"(%[[SUB1]], %arg1)
+// CHECK:    %[[MUL:.*]], %[[MUL_control:.*]] = tf_executor.island wraps "tf.Mul"(%[[SUB1]], %arg1)
 // CHECK:    %[[SUB2:.*]], %[[SUB2_control:.*]] = tf_executor.island(%[[ADD2_control]], %[[MUL_control]]) wraps "tf.Sub"(%[[ADD1]], %[[SUB1]])
-// CHECK:    %[[PRINT:.*]], %[[PRINT_control:.*]] = tf_executor.island(%[[SUB2_control]]) wraps "tf.Print"(%[[SUB2]]) {message = "sub result"}
-// CHECK:    tf_executor.fetch %[[ADD2]], %[[MUL]], %[[PRINT_control]] :
+// CHECK:    %[[PRINT1:.*]], %[[PRINT1_control:.*]] = tf_executor.island wraps "tf.Print"(%[[SUB2]]) {message = "sub result"}
+// CHECK:    %[[ISLAND1:.*]] = tf_executor.island(%[[ADD2_control]], %[[MUL_control]]) {
+// CHECK:      tf_executor.yield
+// CHECK:    }
+// CHECK:    %[[ADD3:.*]], %[[ADD3_control:.*]] = tf_executor.island(%[[ISLAND1]], %[[ADD2_control]]) wraps "tf.Add"(%[[ADD2]], %[[ADD2]])
+// CHECK:    %[[PRINT2:.*]], %[[PRINT2_control:.*]] = tf_executor.island wraps "tf.Print"(%[[ADD3]]) {message = "add result"}
+// CHECK:    tf_executor.fetch %[[ADD2]], %[[MUL]], %[[PRINT1_control]], %[[PRINT2_control:.*]] :
 // CHECK:  }
 // CHECK:  return %[[GRAPH]]#0, %[[GRAPH]]#1
 
@@ -74,8 +84,8 @@ func @dangling_print(%arg0: tensor<*xi32>, %arg1: tensor<i32>) -> (tensor<*xi32>
 // CHECK-LABEL:  func @dangling_print
 // CHECK:  %[[GRAPH:.*]]:2 = tf_executor.graph {
 // CHECK:    %[[ADD1:.*]], %[[ADD1_control:.*]] = tf_executor.island wraps "tf.Add"(%arg0, %arg1)
-// CHECK:    %[[ADD2:.*]], %[[ADD2_control:.*]] = tf_executor.island(%[[ADD1_control]]) wraps "tf.Add"(%[[ADD1_control:.*]], %arg1)
-// CHECK:    %[[PRINT:.*]], %[[PRINT_control:.*]] = tf_executor.island(%[[ADD2_control]]) wraps "tf.Print"(%[[ADD2_control:.*]]) {message = "add result"}
+// CHECK:    %[[ADD2:.*]], %[[ADD2_control:.*]] = tf_executor.island wraps "tf.Add"(%[[ADD1_control:.*]], %arg1)
+// CHECK:    %[[PRINT:.*]], %[[PRINT_control:.*]] = tf_executor.island wraps "tf.Print"(%[[ADD2_control:.*]]) {message = "add result"}
 // CHECK:    tf_executor.fetch %[[ADD1]], %[[ADD2]], %[[PRINT_control]] :
 // CHECK:  }
 // CHECK:  return %[[GRAPH]]#0, %[[GRAPH]]#1
@@ -103,11 +113,14 @@ func @switch_and_merge(%arg0: tensor<*xi32>, %arg1: tensor<i32>) -> (tensor<*xi3
 // CHECK-LABEL:  func @switch_and_merge(%arg0: tensor<*xi32>, %arg1: tensor<i32>) -> (tensor<*xi32>, tensor<i32>) {
 // CHECK: %[[GRAPH:.*]]:2 = tf_executor.graph {
 // CHECK:   %[[ADD1:.*]], %[[ADD1_control:.*]] = tf_executor.island wraps "tf.Add"(%arg0, %arg1)
-// CHECK:   %[[LESS:.*]], %[[LESS_control:.*]] = tf_executor.island(%[[ADD1_control]]) wraps "tf.Less"(%arg1, %arg1)
-// CHECK:   %[[PRINT1:.*]], %[[PRINT1_control:.*]] = tf_executor.island(%[[LESS_control]]) wraps "tf.Print"(%[[ADD1]]) {message = "add result 1"}
-// CHECK:   %[[SWITCH_false:.*]], %[[SWITCH_true:.*]], {{.*}} = tf_executor.Switch %[[ADD1]], %[[LESS]], %[[PRINT1_control]]
+// CHECK:   %[[LESS:.*]], %[[LESS_control:.*]] = tf_executor.island wraps "tf.Less"(%arg1, %arg1)
+// CHECK:   %[[PRINT1:.*]], %[[PRINT1_control:.*]] = tf_executor.island wraps "tf.Print"(%[[ADD1]]) {message = "add result 1"}
+// CHECK:   %[[ISLAND1:.*]] = tf_executor.island(%[[LESS_control]], %[[PRINT1_control]]) {
+// CHECK:     tf_executor.yield
+// CHECK:   }
+// CHECK:   %[[SWITCH_false:.*]], %[[SWITCH_true:.*]], {{.*}} = tf_executor.Switch %[[ADD1]], %[[LESS]], %[[ISLAND1]]
 // CHECK:   %[[ADD2:.*]], %[[ADD2_control:.*]] = tf_executor.island wraps "tf.Add"(%[[SWITCH_false]], %arg1)
-// CHECK:   %[[PRINT2:.*]], %[[PRINT2_control:.*]] = tf_executor.island(%[[ADD2_control]]) wraps "tf.Print"(%[[ADD2]]) {message = "add result 2"}
+// CHECK:   %[[PRINT2:.*]], %[[PRINT2_control:.*]] = tf_executor.island wraps "tf.Print"(%[[ADD2]]) {message = "add result 2"}
 // CHECK:   %[[MERGE:.*]], %[[MERGE_index:.*]], %{{.*}} = tf_executor.Merge %[[ADD2]], %[[SWITCH_true]], %[[PRINT2_control]]
 // CHECK:   tf_executor.fetch %[[MERGE]], %[[MERGE_index]]
 // CHECK: }
@@ -130,7 +143,7 @@ func @control_flow_plumbing(%arg0: tensor<*xi32>, %arg1: tensor<i32>) -> tensor<
 // CHECK: %[[GRAPH:.*]] = tf_executor.graph {
 // CHECK:   %[[PRINT:.*]], %[[PRINT_control:.*]] = tf_executor.island wraps "tf.Print"(%arg0) {message = "Random Print"}
 // CHECK:   %[[ADD1:.*]], %[[ADD1_control:.*]] = tf_executor.island(%[[PRINT_control]]) wraps "tf.Add"(%arg0, %arg1)
-// CHECK:   %[[ADD2:.*]], %[[ADD2_control:.*]] = tf_executor.island(%[[ADD1_control]]) wraps "tf.Add"(%[[ADD1]], %arg1)
+// CHECK:   %[[ADD2:.*]], %[[ADD2_control:.*]] = tf_executor.island wraps "tf.Add"(%[[ADD1]], %arg1)
 // CHECK:   tf_executor.fetch %[[ADD2]] : tensor<*xi32>
 // CHECK: }
 // CHECK: return %[[GRAPH]] : tensor<*xi32>
@@ -150,6 +163,77 @@ func @fetching_arg(%arg0: tensor<*xi32>) {
 // CHECK-LABEL: func @fetching_arg
 // CHECK: tf_executor.graph {
 // CHECK:   %[[ADD1:.*]], %[[ADD1_control:.*]] = tf_executor.island wraps "tf.Add"(%arg0, %arg0)
-// CHECK:   %[[ADD2:.*]], %[[ADD2_control:.*]] = tf_executor.island(%[[ADD1_control]]) wraps "tf.Add"(%[[ADD1]], %arg0)
+// CHECK:   %[[ADD2:.*]], %[[ADD2_control:.*]] = tf_executor.island wraps "tf.Add"(%[[ADD1]], %arg0)
 // CHECK:   tf_executor.fetch %[[ADD2_control]] : !tf_executor.control
 // CHECK: }
+
+func @non_aliasing_reads_writes(
+  %arg0: tensor<*x!tf.resource<tensor<32xf32>>>,
+  %arg1: tensor<*x!tf.resource<tensor<32xf32>>>,
+  %arg2: tensor<32xf32>) -> (tensor<32xf32>) {
+  %graph = tf_executor.graph {
+    %island:2 = tf_executor.island {
+      %read0 = "tf.ReadVariableOp"(%arg0) : (tensor<*x!tf.resource<tensor<32xf32>>>) -> tensor<32xf32>
+      "tf.AssignVariableOp"(%arg0, %arg2) : (tensor<*x!tf.resource<tensor<32xf32>>>, tensor<32xf32>) -> ()
+      %read1 = "tf.ReadVariableOp"(%arg1) : (tensor<*x!tf.resource<tensor<32xf32>>>) -> tensor<32xf32>
+      %var_handle = "tf.VarHandleOp"() {container = "c", shared_name = "v0"} : () -> tensor<*x!tf.resource<tensor<32xf32>>>
+      %read2 = "tf.ReadVariableOp"(%var_handle) : (tensor<*x!tf.resource<tensor<32xf32>>>) -> tensor<32xf32>
+      "tf.AssignVariableOp"(%arg1, %read0) : (tensor<*x!tf.resource<tensor<32xf32>>>, tensor<32xf32>) -> ()
+      "tf.AssignVariableOp"(%arg0, %read2) : (tensor<*x!tf.resource<tensor<32xf32>>>, tensor<32xf32>) -> ()
+      %read3 = "tf.ReadVariableOp"(%arg0) : (tensor<*x!tf.resource<tensor<32xf32>>>) -> tensor<32xf32>
+      tf_executor.yield %read3 : tensor<32xf32>
+    }
+    tf_executor.fetch %island#0 : tensor<32xf32>
+  }
+  return %graph : tensor<32xf32>
+}
+
+// CHECK-LABEL: func @non_aliasing_reads_writes
+// CHECK: %[[GRAPH:.*]] = tf_executor.graph {
+// CHECK:   %[[READ0:.*]], %[[READ0_CONTROL:.*]] = tf_executor.island wraps "tf.ReadVariableOp"(%arg0)
+// CHECK:   %[[ASSIGN0_CONTROL:.*]] = tf_executor.island(%[[READ0_CONTROL]]) wraps "tf.AssignVariableOp"(%arg0, %arg2)
+// CHECK:   %[[READ1:.*]], %[[READ1_CONTROL:.*]] = tf_executor.island wraps "tf.ReadVariableOp"(%arg1)
+// CHECK:   %[[VH0:.*]], %[[VH0_CONTROL:.*]] = tf_executor.island wraps "tf.VarHandleOp"() {container = "c", shared_name = "v0"}
+// CHECK:   %[[READ2:.*]], %[[READ2_CONTROL:.*]] = tf_executor.island wraps "tf.ReadVariableOp"(%[[VH0]])
+// CHECK:   %[[ASSIGN1_CONTROL:.*]] = tf_executor.island(%[[READ1_CONTROL]]) wraps "tf.AssignVariableOp"(%arg1, %[[READ0:.*]])
+// CHECK:   %[[ASSIGN2_CONTROL:.*]] = tf_executor.island(%[[ASSIGN0_CONTROL]]) wraps "tf.AssignVariableOp"(%arg0, %[[READ2]])
+// CHECK:   %[[READ3:.*]], %[[READ3_CONTROL:.*]]  = tf_executor.island(%[[ASSIGN2_CONTROL]]) wraps "tf.ReadVariableOp"(%arg0)
+// CHECK:   %[[ISLAND1:.*]] = tf_executor.island(%[[ASSIGN1_CONTROL]], %[[READ3_CONTROL]]) {
+// CHECK:      tf_executor.yield
+// CHECK:    }
+// CHECK:   tf_executor.fetch %[[READ3]], %[[ISLAND1]] : tensor<32xf32>, !tf_executor.control
+// CHECK: }
+
+func @unknown_side_effecting_op(%arg0: tensor<32xf32>) -> () {
+  tf_executor.graph {
+    %island = tf_executor.island {
+      %vh0 = "tf.VarHandleOp"() {container = "c", shared_name = "v0"} : () -> tensor<*x!tf.resource<tensor<32xf32>>>
+      %vh1 = "tf.VarHandleOp"() {container = "c", shared_name = "v1"} : () -> tensor<*x!tf.resource<tensor<32xf32>>>
+      %read0 = "tf.ReadVariableOp"(%vh0) : (tensor<*x!tf.resource<tensor<32xf32>>>) -> tensor<32xf32>
+      "tf.AssignVariableOp"(%vh1, %arg0) : (tensor<*x!tf.resource<tensor<32xf32>>>, tensor<32xf32>) -> ()
+      "tf._UnknownSideEffectingOp_"() : () -> ()
+      %read1 = "tf.ReadVariableOp"(%vh1) : (tensor<*x!tf.resource<tensor<32xf32>>>) -> tensor<32xf32>
+      "tf.AssignVariableOp"(%vh0, %read1) : (tensor<*x!tf.resource<tensor<32xf32>>>, tensor<32xf32>) -> ()
+      "tf.AssignVariableOp"(%vh1, %read0) : (tensor<*x!tf.resource<tensor<32xf32>>>, tensor<32xf32>) -> ()
+      tf_executor.yield
+    }
+    tf_executor.fetch %island : !tf_executor.control
+  }
+  return
+}
+
+// CHECK-LABEL: func @unknown_side_effecting_op
+// CHECK: tf_executor.graph {
+// CHECK:   %[[VH0:.*]], %[[VH0_CONTROL:.*]] = tf_executor.island wraps "tf.VarHandleOp"() {container = "c", shared_name = "v0"}
+// CHECK:   %[[VH1:.*]], %[[VH1_CONTROL:.*]] = tf_executor.island wraps "tf.VarHandleOp"() {container = "c", shared_name = "v1"}
+// CHECK:   %[[READ0:.*]], %[[READ0_CONTROL:.*]] = tf_executor.island wraps "tf.ReadVariableOp"(%[[VH0]])
+// CHECK:   %[[ASSIGN0_CONTROL:.*]] = tf_executor.island wraps "tf.AssignVariableOp"(%[[VH1]], %arg0)
+// CHECK:   %[[UNKNOWN_CONTROL:.*]] = tf_executor.island(%[[READ0_CONTROL]], %[[ASSIGN0_CONTROL]]) wraps "tf._UnknownSideEffectingOp_"()
+// CHECK:   %[[READ1:.*]], %[[READ1_CONTROL:.*]] = tf_executor.island(%[[UNKNOWN_CONTROL]]) wraps "tf.ReadVariableOp"(%[[VH1]])
+// CHECK:   %[[ASSIGN1_CONTROL:.*]] = tf_executor.island(%[[UNKNOWN_CONTROL]]) wraps "tf.AssignVariableOp"(%[[VH0]], %[[READ1]])
+// CHECK:   %[[ASSIGN2_CONTROL:.*]] = tf_executor.island(%[[READ1_CONTROL]]) wraps "tf.AssignVariableOp"(%[[VH1]], %[[READ0]])
+// CHECK:   %[[ISLAND1:.*]] = tf_executor.island(%[[ASSIGN1_CONTROL]], %[[ASSIGN2_CONTROL]]) {
+// CHECK:     tf_executor.yield
+// CHECK:   }
+// CHECK:   tf_executor.fetch %[[ISLAND1]] : !tf_executor.control
+// CHECK: }
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir
index a2cc33a8201..18c63912a86 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/canonicalize.mlir
@@ -382,3 +382,10 @@ func @nonIdentityTranspose(%arg0: tensor<2x3x4x5x6xf32>) -> tensor<2x3x4x6x5xf32
   // CHECK: %1 = "tf.Transpose"(%arg0, %0) : (tensor<2x3x4x5x6xf32>, tensor<5xi32>) -> tensor<2x3x4x6x5xf32>
   // CHECK: return %1
 }
+
+// CHECK-LABEL: func @addN
+func @addN(%arg0: tensor<*xf32>) -> tensor<*xf32> {
+  // CHECK: return %arg0
+  %0 = "tf.AddN"(%arg0) : (tensor<*xf32>) -> tensor<*xf32>
+  return %0 : tensor<*xf32>
+}
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/decompose_resource_ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/decompose_resource_ops.mlir
new file mode 100644
index 00000000000..67d58b41199
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/tests/decompose_resource_ops.mlir
@@ -0,0 +1,67 @@
+// RUN: tf-opt %s -split-input-file -tf-device-decompose-resource-ops | FileCheck %s
+
+// -----
+
+// Tests that composite tf.AssignAddVariableOp operation is decomposed and
+// hoisted.
+
+// CHECK-LABEL: func @decompose_assign_add_variable_op
+func @decompose_assign_add_variable_op() -> () {
+
+  %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>
+
+  // CHECK: %[[ONE:[0-9]*]] = "tf.Const"() {value = dense<1> : tensor<i32>}
+  // CHECK: %[[RES_READ_VAL:[0-9]*]] = "tf.ReadVariableOp"
+  // CHECK: "tf.AddV2"(%[[RES_READ_VAL]], %[[ONE]])
+  // CHECK: "tf.AssignVariableOp"
+
+  %1 = "tf.Const"() {value = dense<1> : tensor<i32>} : () -> tensor<i32>
+  "tf.AssignAddVariableOp"(%0, %1) {dtype = "tfdtype$DT_INT32"} : (tensor<*x!tf.resource>, tensor<i32>) -> ()
+
+  return
+}
+
+// -----
+
+// Tests that composite tf.AssignSubVariableOp operation is decomposed using
+// SubOp.
+
+// CHECK-LABEL: func @decompose_assign_sub_variable_op
+func @decompose_assign_sub_variable_op() -> () {
+
+  %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>
+
+  // CHECK: %[[ONE:[0-9]*]] = "tf.Const"() {value = dense<1> : tensor<i32>}
+  // CHECK: %[[RES_READ_VAL:[0-9]*]] = "tf.ReadVariableOp"
+  // CHECK: "tf.Sub"(%[[RES_READ_VAL]], %[[ONE]])
+  // CHECK: "tf.AssignVariableOp"
+
+  %1 = "tf.Const"() {value = dense<1> : tensor<i32>} : () -> tensor<i32>
+  "tf.AssignSubVariableOp"(%0, %1) {dtype = "tfdtype$DT_INT32"} : (tensor<*x!tf.resource>, tensor<i32>) -> ()
+
+  return
+}
+
+// -----
+
+// Tests that composite tf.ResourceApplyGradientDescent operation is decomposed.
+
+// CHECK-LABEL: func @decompose_resource_apply_gradient_descent
+// CHECK-SAME: (%[[DELTA:.*]]: tensor<f32>)
+func @decompose_resource_apply_gradient_descent(%arg0: tensor<f32>) -> () {
+
+  %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>
+
+  // CHECK: %[[ALPHA:[0-9]*]] = "tf.Const"
+  // CHECK: %[[RES_HANDLE:[0-9]*]] = "tf.VarHandleOp"
+  // CHECK: %[[MUL:[0-9]*]] = "tf.Mul"(%[[DELTA]], %[[ALPHA]])
+  // CHECK: %[[RES_READ_VAL:[0-9]*]] = "tf.ReadVariableOp"(%[[RES_HANDLE]])
+  // CHECK: %[[SUB:[0-9]*]] = "tf.Sub"(%[[RES_READ_VAL]], %[[MUL]])
+  // CHECK: "tf.AssignVariableOp"(%[[RES_HANDLE]], %[[SUB]])
+
+  %1 = "tf.Const"() {T = f32, value = dense<[0.5]> : tensor<1xf32>} : () -> tensor<f32>
+  "tf.ResourceApplyGradientDescent"(%0, %1, %arg0) {use_locking = false} : (tensor<*x!tf.resource>, tensor<f32>, tensor<f32>) -> ()
+
+  return
+}
+
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/functional-control-flow-to-cfg.mlir b/tensorflow/compiler/mlir/tensorflow/tests/functional-control-flow-to-cfg.mlir
index 2a0434b69e0..a0390ec8738 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/functional-control-flow-to-cfg.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/functional-control-flow-to-cfg.mlir
@@ -49,40 +49,33 @@ func @testIf3Result(tensor<i1>, tensor<*xf32>) -> (tensor<*xf32>, tensor<*xi8>,
 
 // -----
 
-func @testIf1Then(tensor<2x?xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
-func @testIf1Else(tensor<*xf32>, tensor<2x?xf32>) -> tensor<*xf32>
+func @testIfThen(%arg0: tensor<!tf.variant>) -> tensor<!tf.variant> {
+  return %arg0 : tensor<!tf.variant>
+}
+func @testIfElse(%arg0: tensor<!tf.variant>) -> tensor<!tf.variant> {
+  return %arg0 : tensor<!tf.variant>
+}
 
-// CHECK-LABEL: func @testIf1Casts(%arg0: tensor<i1>, %arg1: tensor<2x2xf32>, %arg2: tensor<*xf32>)
-func @testIf1Casts(tensor<i1>, tensor<2x2xf32>, tensor<*xf32>) -> tensor<2x?xf32> {
-^bb0(%arg0: tensor<i1>, %arg1: tensor<2x2xf32>, %arg2: tensor<*xf32>):
-
-  %1 = "tf.If"(%arg0, %arg1, %arg2) {
-    then_branch = @testIf1Then, else_branch = @testIf1Else, is_stateless = false
-  } : (tensor<i1>, tensor<2x2xf32>, tensor<*xf32>) -> tensor<2x?xf32>
-
-// CHECK:  %0 = extract_element %arg0[] : tensor<i1>
-// CHECK:  cond_br %0, ^bb1, ^bb2
-// CHECK:^bb1:  // pred: ^bb0
-// CHECK:  %1 = tensor_cast %arg1 : tensor<2x2xf32> to tensor<2x?xf32>
-// CHECK:  %2 = tensor_cast %arg2 : tensor<*xf32> to tensor<2x2xf32>
-// CHECK:  %3 = call @testIf1Then(%1, %2) : (tensor<2x?xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
-// CHECK:  %4 = tensor_cast %3 : tensor<2x2xf32> to tensor<2x?xf32>
-// CHECK:  br ^bb3(%4 : tensor<2x?xf32>)
-
-// CHECK:^bb2:  // pred: ^bb0
-// CHECK:  %5 = tensor_cast %arg1 : tensor<2x2xf32> to tensor<*xf32>
-// CHECK:  %6 = tensor_cast %arg2 : tensor<*xf32> to tensor<2x?xf32>
-// CHECK:  %7 = call @testIf1Else(%5, %6) : (tensor<*xf32>, tensor<2x?xf32>) -> tensor<*xf32>
-// CHECK:  %8 = tensor_cast %7 : tensor<*xf32> to tensor<2x?xf32>
-// CHECK:  br ^bb3(%8 : tensor<2x?xf32>)
-
-// CHECK:^bb3(%9: tensor<2x?xf32>):	// 2 preds: ^bb1, ^bb2
-
-  %2 = "tf.Add"(%1, %1) : (tensor<2x?xf32>, tensor<2x?xf32>) -> tensor<2x?xf32>
-// CHECK:  %10 = "tf.Add"(%9, %9) : (tensor<2x?xf32>, tensor<2x?xf32>) -> tensor<2x?xf32>
-
-  return %2 : tensor<2x?xf32>
-// CHECK:  return %10 : tensor<2x?xf32>
+// CHECK-LABEL: func @testIfCasts(%arg0: tensor<i1>, %arg1: tensor<!tf.variant<tensor<f32>>>) -> tensor<!tf.variant<tensor<f32>>>
+func @testIfCasts(%arg0: tensor<i1>, %arg1: tensor<!tf.variant<tensor<f32>>>) -> tensor<!tf.variant<tensor<f32>>> {
+  %0 = "tf.If"(%arg0, %arg1) {
+    then_branch = @testIfThen, else_branch = @testIfElse, is_stateless = false
+  } : (tensor<i1>, tensor<!tf.variant<tensor<f32>>>) -> tensor<!tf.variant<tensor<f32>>>
+  return %0: tensor<!tf.variant<tensor<f32>>>
+// CHECK:   %0 = extract_element %arg0[] : tensor<i1>
+// CHECK:   cond_br %0, ^bb1, ^bb2
+// CHECK: ^bb1:
+// CHECK:   %1 = "tf.Cast"(%arg1) {Truncate = false} : (tensor<!tf.variant<tensor<f32>>>) -> tensor<!tf.variant>
+// CHECK:   %2 = call @testIfThen(%1) : (tensor<!tf.variant>) -> tensor<!tf.variant>
+// CHECK:   %3 = "tf.Cast"(%2) {Truncate = false} : (tensor<!tf.variant>) -> tensor<!tf.variant<tensor<f32>>>
+// CHECK:   br ^bb3(%3 : tensor<!tf.variant<tensor<f32>>>)
+// CHECK: ^bb2:
+// CHECK:   %4 = "tf.Cast"(%arg1) {Truncate = false} : (tensor<!tf.variant<tensor<f32>>>) -> tensor<!tf.variant>
+// CHECK:   %5 = call @testIfElse(%4) : (tensor<!tf.variant>) -> tensor<!tf.variant>
+// CHECK:   %6 = "tf.Cast"(%5) {Truncate = false} : (tensor<!tf.variant>) -> tensor<!tf.variant<tensor<f32>>>
+// CHECK:   br ^bb3(%6 : tensor<!tf.variant<tensor<f32>>>)
+// CHECK: ^bb3(%7: tensor<!tf.variant<tensor<f32>>>):
+// CHECK:   return %7 : tensor<!tf.variant<tensor<f32>>>
 }
 
 // -----
@@ -188,31 +181,36 @@ func @testComplexWhile1Result(tensor<*xf32>) -> (tensor<*xf32>) {
 
 // -----
 
-func @testWhileCond(tensor<?x3xf32>) -> (tensor<i1>)
-func @testWhileBody(tensor<*xf32>) -> (tensor<?x?xf32>)
+func @testWhileCond(%arg0: tensor<!tf.variant>) -> (tensor<i1>) {
+  %true = "tf.Const"() { value = dense<true> : tensor<i1> } : () -> (tensor<i1>)
+  return %true : tensor<i1>
+}
+func @testWhileBody(%arg0: tensor<!tf.variant<tensor<1x?xf32>>>) -> (tensor<!tf.variant<tensor<?x?xf32>>>) {
+  %0 = "tf.Cast"(%arg0) : (tensor<!tf.variant<tensor<1x?xf32>>>) -> tensor<!tf.variant<tensor<?x?xf32>>>
+  return %0 : tensor<!tf.variant<tensor<?x?xf32>>>
+}
 
-// CHECK-LABEL: func @testWhileCasts(%arg0: tensor<1x3xf32>)
-func @testWhileCasts(%arg0: tensor<1x3xf32>) -> (tensor<?x?xf32>) {
+// CHECK-LABEL: func @testWhileCasts(%arg0: tensor<!tf.variant<tensor<1x3xf32>>>) -> tensor<!tf.variant<tensor<*xf32>>>
+func @testWhileCasts(%arg0: tensor<!tf.variant<tensor<1x3xf32>>>) -> (tensor<!tf.variant<tensor<*xf32>>>) {
   %0 = "tf.While"(%arg0) {
     cond = @testWhileCond, body = @testWhileBody, is_stateless = false
-  } : (tensor<1x3xf32>) -> (tensor<?x?xf32>)
-
-// CHECK:   %0 = tensor_cast %arg0 : tensor<1x3xf32> to tensor<?x3xf32>
-// CHECK:   br ^bb1(%0 : tensor<?x3xf32>)
-// CHECK: ^bb1(%1: tensor<?x3xf32>):
-// CHECK:   %2 = call @testWhileCond(%1) : (tensor<?x3xf32>) -> tensor<i1>
+  } : (tensor<!tf.variant<tensor<1x3xf32>>>) -> (tensor<!tf.variant<tensor<*xf32>>>)
+  return %0 : tensor<!tf.variant<tensor<*xf32>>>
+// CHECK:   %0 = "tf.Cast"(%arg0) {Truncate = false} : (tensor<!tf.variant<tensor<1x3xf32>>>) -> tensor<!tf.variant>
+// CHECK:   br ^bb1(%0 : tensor<!tf.variant>)
+// CHECK: ^bb1(%1: tensor<!tf.variant>):        // 2 preds: ^bb0, ^bb2
+// CHECK:   %2 = call @testWhileCond(%1) : (tensor<!tf.variant>) -> tensor<i1>
 // CHECK:   %3 = extract_element %2[] : tensor<i1>
-// CHECK:   %4 = tensor_cast %1 : tensor<?x3xf32> to tensor<*xf32>
-// CHECK:   cond_br %3, ^bb2(%4 : tensor<*xf32>), ^bb3(%4 : tensor<*xf32>)
-// CHECK: ^bb2(%5: tensor<*xf32>):
-// CHECK:   %6 = call @testWhileBody(%5) : (tensor<*xf32>) -> tensor<?x?xf32>
-// CHECK:   %7 = tensor_cast %6 : tensor<?x?xf32> to tensor<?x3xf32>
-// CHECK:   br ^bb1(%7 : tensor<?x3xf32>)
-// CHECK: ^bb3(%8: tensor<*xf32>):
-// CHECK:   %9 = tensor_cast %8 : tensor<*xf32> to tensor<?x?xf32>
+// CHECK:   %4 = "tf.Cast"(%1) {Truncate = false} : (tensor<!tf.variant>) -> tensor<!tf.variant<tensor<1x?xf32>>>
+// CHECK:   cond_br %3, ^bb2(%4 : tensor<!tf.variant<tensor<1x?xf32>>>), ^bb3(%4 : tensor<!tf.variant<tensor<1x?xf32>>>)
+// CHECK: ^bb2(%5: tensor<!tf.variant<tensor<1x?xf32>>>):       // pred: ^bb1
+// CHECK:   %6 = call @testWhileBody(%5) : (tensor<!tf.variant<tensor<1x?xf32>>>) -> tensor<!tf.variant<tensor<?x?xf32>>>
+// CHECK:   %7 = "tf.Cast"(%6) {Truncate = false} : (tensor<!tf.variant<tensor<?x?xf32>>>) -> tensor<!tf.variant>
+// CHECK:   br ^bb1(%7 : tensor<!tf.variant>)
+// CHECK: ^bb3(%8: tensor<!tf.variant<tensor<1x?xf32>>>):       // pred: ^bb1
+// CHECK:   %9 = "tf.Cast"(%8) {Truncate = false} : (tensor<!tf.variant<tensor<1x?xf32>>>) -> tensor<!tf.variant<tensor<*xf32>>>
+// CHECK:   return %9 : tensor<!tf.variant<tensor<*xf32>>>
 
-  return %0 : tensor<?x?xf32>
-// CHECK:   return %9 : tensor<?x?xf32>
 }
 
 // -----
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-custom-operation.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-custom-operation.pbtxt
index 9ce15315832..207d6676f61 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-custom-operation.pbtxt
+++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-custom-operation.pbtxt
@@ -54,5 +54,5 @@ versions {
 # the names are matching between the function definition and the uses / call
 # site (a numerical suffix may be appended).
 
-# CHECK: "tf.foo0"(
+# CHECK: "tf.LegacyCall"(%outputs) {_disable_call_shape_inference = false, f = @foo0}
 # CHECK: func @foo0
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-default-attr.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-default-attr.pbtxt
index b26d7e7f2ba..ac248041994 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-default-attr.pbtxt
+++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-default-attr.pbtxt
@@ -8,7 +8,7 @@
 # Verify that we can also pull some attributes that are needed to be able to
 # create a Graph in memory, like `T`.
 # CHECK: tf.MaxPool
-# CHECK-SAME: T = "tfdtype$DT_FLOAT"
+# CHECK-SAME: T = f32
 
 node {
   name: "input"
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-call.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-call.pbtxt
new file mode 100644
index 00000000000..f0a7a574ae3
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-call.pbtxt
@@ -0,0 +1,65 @@
+# RUN: tf-mlir-translate -graphdef-to-mlir %s -tf-input-arrays=x -tf-input-data-types=DT_INT32 -tf-input-shapes=10 -tf-output-arrays=func_call -o - | FileCheck %s
+
+node {
+  name: "x"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_INT32
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_INT32
+        tensor_shape {
+          dim {
+            size: 1
+          }
+        }
+        int_val: 1
+      }
+    }
+  }
+}
+node {
+  name: "func_call"
+  op: "test_func_name"
+  input: "x"
+  attr {
+    key: "_disable_call_shape_inference"
+    value {
+      b: true
+    }
+  }
+}
+library {
+  function {
+    signature {
+      name: "test_func_name"
+      input_arg {
+        name: "a_0"
+        type: DT_INT32
+      }
+      output_arg {
+        name: "a"
+        type: DT_INT32
+      }
+    }
+    ret {
+      key: "a"
+      value: "a_0"
+    }
+    attr {
+      key: "_disable_call_shape_inference"
+      value {
+        b: true
+      }
+    }
+  }
+}
+
+# CHECK: func @main
+# CHECK: "tf.LegacyCall"(%arg0) {_disable_call_shape_inference = true, f = @test_func_name0}
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-name-bug.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-name-bug.pbtxt
index dcdbe67ccb6..563007f4305 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-name-bug.pbtxt
+++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-function-name-bug.pbtxt
@@ -121,8 +121,8 @@ versions {
 # Verify that functions from the library are properly imported.
 
 # CHECK-LABEL:  func @main() {
-# CHECK:    "tf.foo110"()
-# CHECK:    "tf.foo111"()
+# CHECK:    "tf.LegacyCall"() {_disable_call_shape_inference = false, f = @foo110}
+# CHECK:    "tf.LegacyCall"() {_disable_call_shape_inference = false, f = @foo111}
 
 # CHECK-LABEL:  func @foo110() {
 # CHECK-LABEL:  func @foo111() {
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-library.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-library.pbtxt
index 17b2655aa5d..b65984227f6 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-library.pbtxt
+++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/graph-library.pbtxt
@@ -39,10 +39,10 @@ versions {
 # Verify that functions from the library are properly imported.
 
 # CHECK-LABEL:  func @main() {
-# CHECK:    "tf.foo0"()
-# CHECK:    "tf.bar0"()
+# CHECK:    "tf.LegacyCall"() {_disable_call_shape_inference = false, f = @foo0}
+# CHECK:    "tf.LegacyCall"() {_disable_call_shape_inference = false, f = @bar0}
 
 # CHECK-LABEL:  func @foo0() {
-# CHECK: "tf.bar0"()
+# CHECK: "tf.LegacyCall"() {_disable_call_shape_inference = false, f = @bar0}
 
 # CHECK-LABEL:  func @bar0() {
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/multi-output-feeds.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/multi-output-feeds.pbtxt
new file mode 100644
index 00000000000..b28e2818730
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/multi-output-feeds.pbtxt
@@ -0,0 +1,300 @@
+# RUN: tf-mlir-translate -graphdef-to-mlir %s -tf-input-arrays=z:1,z:2 -tf-input-shapes=':' -tf-output-arrays=z:2,z:1,a:0 -o - | FileCheck %s --dump-input=fail
+# RUN: tf-mlir-translate -graphdef-to-mlir %s -tf-prune-unused-nodes -tf-input-arrays=z:1,z:2 -tf-input-shapes=':' -tf-output-arrays=z:2,z:1,a:0 -o - | FileCheck --check-prefix=PRUNE %s --dump-input=fail
+# RUN: tf-mlir-translate -graphdef-to-mlir %s -tf-prune-unused-nodes -tf-input-arrays=z:1,z:2 -tf-input-shapes=':' -tf-output-arrays=z:0,a:0 -o - | FileCheck --check-prefix=PRESERVE %s --dump-input=fail
+
+# Generated in Python via
+# ```
+# import tensorflow as tf
+#
+# with tf.compat.v1.Graph().as_default() as g:
+#   w = tf.constant(2.0)
+#   x = tf.constant(3.0)
+#   y = tf.constant(4.0)
+#   var = tf.Variable(2.0)
+#   var_add = var.assign_add(3.0)
+#   with g.control_dependencies([var_add]):
+#     z0, z1, z2 = tf.identity_n((w, x, y))
+#
+#   a = tf.add(z1, z2)
+# ```
+
+node {
+  name: "w"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 2.0
+      }
+    }
+  }
+}
+node {
+  name: "x"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 3.0
+      }
+    }
+  }
+}
+node {
+  name: "y"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 4.0
+      }
+    }
+  }
+}
+node {
+  name: "var/initial_value"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 2.0
+      }
+    }
+  }
+}
+node {
+  name: "var"
+  op: "VariableV2"
+  attr {
+    key: "container"
+    value {
+      s: ""
+    }
+  }
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "shape"
+    value {
+      shape {
+      }
+    }
+  }
+  attr {
+    key: "shared_name"
+    value {
+      s: ""
+    }
+  }
+}
+node {
+  name: "var/Assign"
+  op: "Assign"
+  input: "var"
+  input: "var/initial_value"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@var"
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: true
+    }
+  }
+  attr {
+    key: "validate_shape"
+    value {
+      b: true
+    }
+  }
+}
+node {
+  name: "var/read"
+  op: "Identity"
+  input: "var"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@var"
+      }
+    }
+  }
+}
+node {
+  name: "var_add/value"
+  op: "Const"
+  attr {
+    key: "dtype"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "value"
+    value {
+      tensor {
+        dtype: DT_FLOAT
+        tensor_shape {
+        }
+        float_val: 3.0
+      }
+    }
+  }
+}
+node {
+  name: "var_add"
+  op: "AssignAdd"
+  input: "var"
+  input: "var_add/value"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+  attr {
+    key: "_class"
+    value {
+      list {
+        s: "loc:@var"
+      }
+    }
+  }
+  attr {
+    key: "use_locking"
+    value {
+      b: false
+    }
+  }
+}
+node {
+  name: "z"
+  op: "IdentityN"
+  input: "w"
+  input: "x"
+  input: "y"
+  input: "^var_add"
+  attr {
+    key: "T"
+    value {
+      list {
+        type: DT_FLOAT
+        type: DT_FLOAT
+        type: DT_FLOAT
+      }
+    }
+  }
+}
+node {
+  name: "a"
+  op: "Add"
+  input: "z:1"
+  input: "z:2"
+  attr {
+    key: "T"
+    value {
+      type: DT_FLOAT
+    }
+  }
+}
+versions {
+  producer: 230
+}
+
+# Test non zero index output tensors as feeds. Original ops where their outputs
+# are replaced with feeds are preserved and args and rets are lifted to the
+# function. Rets that happen to coincide with a feed should have its value be
+# of the feed.
+#
+# CHECK: func @main(%[[ARG_0:.*]]: tensor<f32>, %[[ARG_1:.*]]: tensor<f32>) -> (tensor<f32>, tensor<f32>, tensor<f32>)
+# CHECK: attributes {tf.entry_function = {inputs = "z:1,z:2", outputs = "z:2,z:1,a:0"}}
+# CHECK:     %{{.*}}, %[[ASSIGN_ADD_CTRL:.*]] = tf_executor.island wraps "tf.AssignAdd"
+# CHECK:     %{{.*}}, %{{.*}} = tf_executor.island(%[[ASSIGN_ADD_CTRL]]) wraps "tf.IdentityN"
+# CHECK:     %[[ADD:.*]], %{{.*}} = tf_executor.island wraps "tf.Add"(%[[ARG_0]], %[[ARG_1]])
+# CHECK:     tf_executor.fetch %[[ARG_1]], %[[ARG_0]], %[[ADD]]
+
+# Test when non zero index output tensors are feeds, remaining ops that are
+# unreachable are pruned if pruning is enabled.
+#
+# PRUNE:       func @main(%[[ARG_0:.*]]: tensor<f32>, %[[ARG_1:.*]]: tensor<f32>) -> (tensor<f32>, tensor<f32>, tensor<f32>)
+# PRUNE:       attributes {tf.entry_function = {inputs = "z:1,z:2", outputs = "z:2,z:1,a:0"}}
+# PRUNE-NOT:       "tf.Const"
+# PRUNE-NOT:       "tf.VariableV2"
+# PRUNE-NOT:       "tf.Assign"
+# PRUNE-NOT:       "tf.Identity"
+# PRUNE-NOT:       "tf.AssignAdd"
+# PRUNE-NOT:       "tf.IdentityN"
+# PRUNE:           %[[ADD:.*]], %{{.*}} = tf_executor.island wraps "tf.Add"(%[[ARG_0]], %[[ARG_1]])
+# PRUNE:           tf_executor.fetch %[[ARG_1]], %[[ARG_0]], %[[ADD]]
+
+# Test when non zero index output tensors are feeds, remaining ops that are
+# unreachable are preserved if pruning is not enabled.
+#
+# PRESERVE: func @main(%[[ARG_0:.*]]: tensor<f32>, %[[ARG_1:.*]]: tensor<f32>) -> (tensor<f32>, tensor<f32>)
+# PRESERVE: attributes {tf.entry_function = {inputs = "z:1,z:2", outputs = "z:0,a:0"}}
+# PRESERVE:     %{{.*}}, %[[ASSIGN_ADD_CTRL:.*]] = tf_executor.island wraps "tf.AssignAdd"
+# PRESERVE:     %[[IDENTITY_N:.*]]:3, %{{.*}} = tf_executor.island(%[[ASSIGN_ADD_CTRL]]) wraps "tf.IdentityN"
+# PRESERVE:     %[[ADD:.*]], %{{.*}} = tf_executor.island wraps "tf.Add"(%[[ARG_0]], %[[ARG_1]])
+# PRESERVE:     tf_executor.fetch %[[IDENTITY_N]]#0, %[[ADD]]
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/switch_n.pbtxt b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/switch_n.pbtxt
index d33ac2f3b5b..3dd5ce58ed2 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/switch_n.pbtxt
+++ b/tensorflow/compiler/mlir/tensorflow/tests/graphdef2mlir/switch_n.pbtxt
@@ -2,11 +2,11 @@
 
 # CHECK: tf_executor.SwitchN
 # CHECK-SAME: of 3 : tensor<i32>
-# CHECK-SAME: T = "tfdtype$DT_INT32"
+# CHECK-SAME: T = i32
 # CHECK-SAME: name = "Case/branch_index/_3"
 # CHECK: tf_executor.SwitchN
 # CHECK-SAME: of 2 : tensor<f32>
-# CHECK-SAME: T = "tfdtype$DT_FLOAT"
+# CHECK-SAME: T = f32
 # CHECK-SAME: name = "Case/Case/input_0/_7"
 
 node {
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir b/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir
index 120e73f6e94..60ffc924ae5 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/lower_tf.mlir
@@ -250,3 +250,19 @@ func @ZerosLike_variant(%arg0: tensor<!tf.variant<tensor<2xi32>>>) -> tensor<!tf
   %0 = "tf.ZerosLike"(%arg0) : (tensor<!tf.variant<tensor<2xi32>>>) -> tensor<!tf.variant<tensor<2xi32>>>
   return %0 : tensor<!tf.variant<tensor<2xi32>>>
 }
+
+// CHECK-LABEL: func @addN
+func @addN(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>, %arg2: tensor<*xf32>) -> tensor<*xf32> {
+  // CHECK: %[[SUM0:.*]] = "tf.AddV2"(%arg0, %arg1)
+  // CHECK: %[[SUM1:.*]] = "tf.AddV2"(%[[SUM0]], %arg2)
+  // return %[[SUM1]]
+  %0 = "tf.AddN"(%arg0, %arg1, %arg2) : (tensor<*xf32>, tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32>
+  return %0 : tensor<*xf32>
+}
+
+// CHECK-LABEL: func @addN_variant
+func @addN_variant(%arg0: tensor<!tf.variant<tensor<2xf32>>>, %arg1: tensor<!tf.variant<tensor<2xf32>>>, %arg2: tensor<!tf.variant<tensor<2xf32>>>) -> tensor<!tf.variant<tensor<2xf32>>> {
+  // CHECK: tf.AddN
+  %0 = "tf.AddN"(%arg0, %arg1, %arg2) : (tensor<!tf.variant<tensor<2xf32>>>, tensor<!tf.variant<tensor<2xf32>>>, tensor<!tf.variant<tensor<2xf32>>>) -> tensor<!tf.variant<tensor<2xf32>>>
+  return %0 : tensor<!tf.variant<tensor<2xf32>>>
+}
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/tf-legacy-call.mlir b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/tf-legacy-call.mlir
new file mode 100644
index 00000000000..6c83b45295e
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/tests/mlir2graphdef/tf-legacy-call.mlir
@@ -0,0 +1,26 @@
+// RUN: tf-mlir-translate -mlir-to-graphdef %s -o - | FileCheck %s
+
+func @main() {
+  tf_executor.graph {
+    %outputs, %control = tf_executor.island wraps "tf.Const"() {device = "", dtype = "tfdtype$DT_INT32", name = "Constant", value = dense<0> : tensor<i32>} : () -> tensor<i32>
+    %outputs_0, %control_1 = tf_executor.island wraps "tf.LegacyCall"(%outputs) {f = @foo0} : (tensor<i32>) -> tensor<i32>
+    tf_executor.fetch
+  }
+  return
+}
+func @foo0(%arg0: tensor<*xi32>) -> tensor<*xi32> {
+  %0 = tf_executor.graph {
+    tf_executor.fetch %arg0 : tensor<*xi32>
+  }
+  return %0 : tensor<*xi32>
+}
+
+// CHECK: node {
+// CHECK:  name: "_tf.LegacyCall"
+// CHECK-NEXT:  op: "foo0"
+
+// CHECK: library {
+// CHECK-NEXT:  function {
+// CHECK-NEXT:    signature {
+// CHECK-NEXT:      name: "foo0"
+
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/resource-device-inference.mlir b/tensorflow/compiler/mlir/tensorflow/tests/resource-device-inference.mlir
new file mode 100644
index 00000000000..c98e40fed05
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/tests/resource-device-inference.mlir
@@ -0,0 +1,244 @@
+// RUN: tf-opt -split-input-file -verify-diagnostics -tf-resource-device-inference %s | FileCheck %s --dump-input=fail
+
+// Tests that the pass can correctly propagate device attributes inside the same
+// function.
+
+// CHECK-LABEL: func @propagate_in_function
+func @propagate_in_function(
+  %arg0: tensor<*x!tf.resource<tensor<32xf32>>> {tf.device = "/TPU:0"},
+  %arg1: tensor<*x!tf.resource<tensor<32xf32>>> {tf.device = "/TPU:1"}) {
+  tf_executor.graph {
+    // CHECK: tf_executor.island
+    %island = tf_executor.island {
+      // CHECK-NEXT: "tf.VarHandleOp"
+      %var_handle = "tf.VarHandleOp"() {container = "c", shared_name = "v0", device = "/CPU:0"}
+        : () -> tensor<*x!tf.resource<tensor<32xf32>>>
+      // CHECK-NEXT: "tf.Identity"
+      // CHECK-SAME: {device = "/TPU:0"}
+      %id0 = "tf.Identity"(%arg0) : (tensor<*x!tf.resource<tensor<32xf32>>>)
+        -> tensor<*x!tf.resource<tensor<32xf32>>>
+      // CHECK-NEXT: "tf.Identity"
+      // CHECK-SAME: {device = "/TPU:0"}
+      %id1 = "tf.Identity"(%id0) : (tensor<*x!tf.resource<tensor<32xf32>>>)
+        -> tensor<*x!tf.resource<tensor<32xf32>>>
+      // CHECK-NEXT: "tf.Identity"
+      // CHECK-SAME: {device = "/CPU:0"}
+      %id2 = "tf.Identity"(%var_handle) : (tensor<*x!tf.resource<tensor<32xf32>>>)
+        -> tensor<*x!tf.resource<tensor<32xf32>>>
+      %read = "tf.ReadVariableOp"(%id2) : (tensor<*x!tf.resource<tensor<32xf32>>>) -> tensor<32xf32>
+      %id3 = "tf.Identity"(%read) : (tensor<32xf32>) -> tensor<32xf32>
+      tf_executor.yield
+    }
+    tf_executor.fetch %island : !tf_executor.control
+  }
+  return
+}
+
+// -----
+
+// Tesets that the pass can propagate through tf.If's branches.
+
+// CHECK-LABEL: func @propagate_if_op
+func @propagate_if_op(
+  %arg0: tensor<*x!tf.resource<tensor<32xf32>>> {tf.device = "/TPU:0"},
+  %arg1: tensor<i1>) {
+  tf_executor.graph {
+    // CHECK: tf_executor.island
+    %island = tf_executor.island {
+      // CHECK-NEXT: "tf.Identity"
+      // CHECK-SAME: {device = "/TPU:0"}
+      %id0 = "tf.Identity"(%arg0) : (tensor<*x!tf.resource<tensor<32xf32>>>)
+        -> tensor<*x!tf.resource<tensor<32xf32>>>
+      // CHECK-NEXT: "tf.VarHandleOp"
+      %var_handle = "tf.VarHandleOp"() {container = "c", shared_name = "v0", device = "/TPU:1"}
+        : () -> tensor<*x!tf.resource<tensor<32xf32>>>
+      // CHECK-NEXT: "tf.If"
+      "tf.If"(%arg1, %id0, %var_handle) {
+          then_branch = @if_then,
+          else_branch = @if_else,
+          output_shapes = [], is_stateless = false}
+        : (tensor<i1>, tensor<*x!tf.resource<tensor<32xf32>>>,
+           tensor<*x!tf.resource<tensor<32xf32>>>) -> ()
+      tf_executor.yield
+    }
+    tf_executor.fetch %island : !tf_executor.control
+  }
+  return
+}
+
+// CHECK-LABEL: func @if_then
+func @if_then(
+  %arg0: tensor<*x!tf.resource<tensor<32xf32>>>,
+  %arg1: tensor<*x!tf.resource<tensor<32xf32>>>) {
+  tf_executor.graph {
+    // CHECK: tf_executor.island
+    %island = tf_executor.island {
+      // CHECK-NEXT: "tf.Identity"
+      // CHECK-SAME: {device = "/TPU:0"}
+      %id0 = "tf.Identity"(%arg0) : (tensor<*x!tf.resource<tensor<32xf32>>>)
+        -> tensor<*x!tf.resource<tensor<32xf32>>>
+      // CHECK-NEXT: "tf.Identity"
+      // CHECK-SAME: {device = "/TPU:1"}
+      %id1 = "tf.Identity"(%arg1) : (tensor<*x!tf.resource<tensor<32xf32>>>)
+        -> tensor<*x!tf.resource<tensor<32xf32>>>
+      tf_executor.yield
+    }
+    tf_executor.fetch %island : !tf_executor.control
+  }
+  return
+}
+
+// CHECK-LABEL: func @if_else
+func @if_else(
+  %arg0: tensor<*x!tf.resource<tensor<32xf32>>>,
+  %arg1: tensor<*x!tf.resource<tensor<32xf32>>>) {
+  tf_executor.graph {
+    // CHECK: tf_executor.island
+    %island = tf_executor.island {
+      // CHECK-NEXT: "tf.Identity"
+      // CHECK-SAME: {device = "/TPU:0"}
+      %id0 = "tf.Identity"(%arg0) : (tensor<*x!tf.resource<tensor<32xf32>>>)
+        -> tensor<*x!tf.resource<tensor<32xf32>>>
+      tf_executor.yield
+    }
+    tf_executor.fetch %island : !tf_executor.control
+  }
+  return
+}
+
+
+// -----
+
+// Tesets that the pass can propagate through tf.While's branches.
+
+// CHECK-LABEL: func @propagate_while_op
+func @propagate_while_op(
+  %arg0: tensor<*x!tf.resource<tensor<32xf32>>> {tf.device = "/TPU:0"},
+  %arg1: tensor<i32>) {
+  tf_executor.graph {
+    // CHECK: tf_executor.island
+    %island = tf_executor.island {
+      // CHECK-NEXT: "tf.Identity"
+      // CHECK-SAME: {device = "/TPU:0"}
+      %id0 = "tf.Identity"(%arg0) : (tensor<*x!tf.resource<tensor<32xf32>>>)
+        -> tensor<*x!tf.resource<tensor<32xf32>>>
+      // CHECK-NEXT: "tf.VarHandleOp"
+      %var_handle = "tf.VarHandleOp"() {container = "c", shared_name = "v0", device = "/TPU:1"}
+        : () -> tensor<*x!tf.resource<tensor<32xf32>>>
+      // CHECK-NEXT: "tf.While"
+      "tf.While"(%arg1, %id0, %var_handle) {
+          body = @while_body,
+          cond = @while_cond,
+          output_shapes = [], is_stateless = false}
+        : (tensor<i32>, tensor<*x!tf.resource<tensor<32xf32>>>,
+           tensor<*x!tf.resource<tensor<32xf32>>>) ->
+          (tensor<i32>, tensor<*x!tf.resource<tensor<32xf32>>>,
+           tensor<*x!tf.resource<tensor<32xf32>>>)
+      tf_executor.yield
+    }
+    tf_executor.fetch %island : !tf_executor.control
+  }
+  return
+}
+
+// CHECK-LABEL: func @while_body
+func @while_body(
+  %arg0: tensor<i32>,
+  %arg1: tensor<*x!tf.resource<tensor<32xf32>>>,
+  %arg2: tensor<*x!tf.resource<tensor<32xf32>>>) ->
+  (tensor<i32>, tensor<*x!tf.resource<tensor<32xf32>>>,
+   tensor<*x!tf.resource<tensor<32xf32>>>) {
+  %graph:3 = tf_executor.graph {
+    // CHECK: tf_executor.island
+    %island:4 = tf_executor.island {
+      // CHECK-NEXT: "tf.Identity"
+      // CHECK-SAME: {device = "/TPU:0"}
+      %id0 = "tf.Identity"(%arg1) : (tensor<*x!tf.resource<tensor<32xf32>>>)
+        -> tensor<*x!tf.resource<tensor<32xf32>>>
+      // CHECK-NEXT: "tf.Identity"
+      // CHECK-SAME: {device = "/TPU:1"}
+      %id1 = "tf.Identity"(%arg2) : (tensor<*x!tf.resource<tensor<32xf32>>>)
+        -> tensor<*x!tf.resource<tensor<32xf32>>>
+      tf_executor.yield %arg0, %id0, %id1
+        : tensor<i32>, tensor<*x!tf.resource<tensor<32xf32>>>,
+          tensor<*x!tf.resource<tensor<32xf32>>>
+    }
+    tf_executor.fetch %island#0, %island#1, %island#2
+      : tensor<i32>, tensor<*x!tf.resource<tensor<32xf32>>>,
+        tensor<*x!tf.resource<tensor<32xf32>>>
+  }
+  return %graph#0, %graph#1, %graph#2
+     : tensor<i32>, tensor<*x!tf.resource<tensor<32xf32>>>,
+       tensor<*x!tf.resource<tensor<32xf32>>>
+}
+
+// CHECK-LABEL: func @while_cond
+func @while_cond(
+  %arg0: tensor<i32>,
+  %arg1: tensor<*x!tf.resource<tensor<32xf32>>>,
+  %arg2: tensor<*x!tf.resource<tensor<32xf32>>>) -> tensor<32xf32> {
+  %graph = tf_executor.graph {
+    // CHECK: tf_executor.island
+    %island:2 = tf_executor.island {
+      // CHECK-NEXT: "tf.Identity"
+      // CHECK-SAME: {device = "/TPU:0"}
+      %id0 = "tf.Identity"(%arg1) : (tensor<*x!tf.resource<tensor<32xf32>>>)
+        -> tensor<*x!tf.resource<tensor<32xf32>>>
+      %read = "tf.ReadVariableOp"(%id0)
+        : (tensor<*x!tf.resource<tensor<32xf32>>>) -> tensor<32xf32>
+      tf_executor.yield %read : tensor<32xf32>
+    }
+    tf_executor.fetch %island#0 : tensor<32xf32>
+  }
+  return %graph : tensor<32xf32>
+}
+
+// -----
+
+// Tesets that the pass reports error on conflicting assignments from multiple
+// callers.
+
+func @error_on_conflict_multiple_callers(
+  %arg0: tensor<*x!tf.resource<tensor<32xf32>>> {tf.device = "/TPU:0"},
+  %arg1: tensor<i1>) {
+  tf_executor.graph {
+    %island = tf_executor.island {
+      %id0 = "tf.Identity"(%arg0) : (tensor<*x!tf.resource<tensor<32xf32>>>)
+        -> tensor<*x!tf.resource<tensor<32xf32>>>
+      %var_handle = "tf.VarHandleOp"() {container = "c", shared_name = "v0", device = "/TPU:1"}
+        : () -> tensor<*x!tf.resource<tensor<32xf32>>>
+      "tf.If"(%arg1, %id0, %var_handle) {
+          then_branch = @if_then_and_else,
+          else_branch = @if_then_and_else,
+          output_shapes = [], is_stateless = false}
+        : (tensor<i1>, tensor<*x!tf.resource<tensor<32xf32>>>,
+           tensor<*x!tf.resource<tensor<32xf32>>>) -> ()
+      "tf.If"(%arg1, %var_handle, %id0) {
+      // expected-error@above {{Conflicting device assignment for resource}}
+          then_branch = @if_then_and_else,
+          else_branch = @if_then_and_else,
+          output_shapes = [], is_stateless = false}
+        : (tensor<i1>, tensor<*x!tf.resource<tensor<32xf32>>>,
+           tensor<*x!tf.resource<tensor<32xf32>>>) -> ()
+      tf_executor.yield
+    }
+    tf_executor.fetch %island : !tf_executor.control
+  }
+  return
+}
+
+func @if_then_and_else(
+  %arg0: tensor<*x!tf.resource<tensor<32xf32>>>,
+  %arg1: tensor<*x!tf.resource<tensor<32xf32>>>) {
+  tf_executor.graph {
+    %island = tf_executor.island {
+      %id0 = "tf.Identity"(%arg0) : (tensor<*x!tf.resource<tensor<32xf32>>>)
+        -> tensor<*x!tf.resource<tensor<32xf32>>>
+      %id1 = "tf.Identity"(%arg1) : (tensor<*x!tf.resource<tensor<32xf32>>>)
+        -> tensor<*x!tf.resource<tensor<32xf32>>>
+      tf_executor.yield
+    }
+    tf_executor.fetch %island : !tf_executor.control
+  }
+  return
+}
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir b/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir
index 8ff72dbc7fc..e5905e5f681 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/resource_op_lifting.mlir
@@ -8,7 +8,7 @@ func @only_resource_load() -> tensor<*xi32> {
   // CHECK: %[[RES_HANDLE:[0-9]*]] = "tf.VarHandleOp"
   %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>
 
-  // CHECK: %[[RES_READ_VAL:[0-9]*]] = "tf.ReadVariableOp"(%[[RES_HANDLE]]) {dtype = "tfdtype$DT_INT32"}
+  // CHECK: %[[RES_READ_VAL:[0-9]*]] = "tf.ReadVariableOp"(%[[RES_HANDLE]]) {dtype = i32}
   // CHECK: "tf_device.launch"
   // CHECK: %[[COMPUTE_RES:[0-9]*]] = "tf.SomeComputation"(%[[RES_READ_VAL]])
   // CHECK: tf_device.return %[[COMPUTE_RES]]
@@ -16,7 +16,7 @@ func @only_resource_load() -> tensor<*xi32> {
   // CHECK-SAME: () -> tensor<*xi32>
 
   %1 = "tf_device.launch"() ( {
-    %2 = "tf.ReadVariableOp"(%0) {dtype = "tfdtype$DT_INT32"} : (tensor<*x!tf.resource>) -> tensor<*xi32>
+    %2 = "tf.ReadVariableOp"(%0) {dtype = i32} : (tensor<*x!tf.resource>) -> tensor<*xi32>
     %3 = "tf.SomeComputation"(%2) : (tensor<*xi32>) -> (tensor<*xi32>)
     tf_device.return %3 : tensor<*xi32>
   }) {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<*xi32>
@@ -39,11 +39,11 @@ func @only_resource_store() -> tensor<*xi32> {
   // CHECK: tf_device.return %[[COMPUTE_RES]], %[[COMPUTE_RES]]
   // CHECK: {device = "tpu0", launch_attr = "launch_attr"}
   // CHECK-SAME: () -> (tensor<*xi32>, tensor<*xi32>)
-  // CHECK: "tf.AssignVariableOp"(%[[RES_HANDLE]], %[[LAUNCH_RES]]#1) {dtype = "tfdtype$DT_INT32"}
+  // CHECK: "tf.AssignVariableOp"(%[[RES_HANDLE]], %[[LAUNCH_RES]]#1) {dtype = i32}
 
   %1 = "tf_device.launch"() ( {
     %2 = "tf.SomeComputation"() : () -> (tensor<*xi32>)
-    "tf.AssignVariableOp"(%0, %2) {dtype = "tfdtype$DT_INT32"} : (tensor<*x!tf.resource>, tensor<*xi32>) -> ()
+    "tf.AssignVariableOp"(%0, %2) {dtype = i32} : (tensor<*x!tf.resource>, tensor<*xi32>) -> ()
     tf_device.return %2 : tensor<*xi32>
   }) {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<*xi32>
 
@@ -61,18 +61,18 @@ func @same_resource_load_and_store() -> tensor<*xi32> {
   // CHECK: %[[RES_HANDLE:[0-9]*]] = "tf.VarHandleOp"
   %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>
 
-  // CHECK: %[[RES_READ_VAL:[0-9]*]] = "tf.ReadVariableOp"(%[[RES_HANDLE]]) {dtype = "tfdtype$DT_INT32"}
+  // CHECK: %[[RES_READ_VAL:[0-9]*]] = "tf.ReadVariableOp"(%[[RES_HANDLE]]) {dtype = i32}
   // CHECK: %[[LAUNCH_RES:[0-9]*]]:2 = "tf_device.launch"
   // CHECK: %[[COMPUTE_RES:[0-9]*]] = "tf.SomeComputation"(%[[RES_READ_VAL]])
   // CHECK: tf_device.return %[[COMPUTE_RES]], %[[COMPUTE_RES]]
   // CHECK: {device = "tpu0", launch_attr = "launch_attr"}
   // CHECK-SAME: () -> (tensor<*xi32>, tensor<*xi32>)
-  // CHECK: "tf.AssignVariableOp"(%[[RES_HANDLE]], %[[LAUNCH_RES]]#1) {dtype = "tfdtype$DT_INT32"}
+  // CHECK: "tf.AssignVariableOp"(%[[RES_HANDLE]], %[[LAUNCH_RES]]#1) {dtype = i32}
 
   %1 = "tf_device.launch"() ( {
-    %2 = "tf.ReadVariableOp"(%0) {dtype = "tfdtype$DT_INT32"} : (tensor<*x!tf.resource>) -> tensor<*xi32>
+    %2 = "tf.ReadVariableOp"(%0) {dtype = i32} : (tensor<*x!tf.resource>) -> tensor<*xi32>
     %3 = "tf.SomeComputation"(%2) : (tensor<*xi32>) -> (tensor<*xi32>)
-    "tf.AssignVariableOp"(%0, %3) {dtype = "tfdtype$DT_INT32"} : (tensor<*x!tf.resource>, tensor<*xi32>) -> ()
+    "tf.AssignVariableOp"(%0, %3) {dtype = i32} : (tensor<*x!tf.resource>, tensor<*xi32>) -> ()
     tf_device.return %3 : tensor<*xi32>
   }) {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<*xi32>
 
@@ -82,96 +82,6 @@ func @same_resource_load_and_store() -> tensor<*xi32> {
 
 // -----
 
-// Tests that composite tf.AssignAddVariableOp operation is decomposed and
-// hoisted.
-
-// CHECK-LABEL: func @decompose_assign_add_variable_op
-func @decompose_assign_add_variable_op() -> tensor<*xi32> {
-
-  // CHECK: %[[RES_HANDLE:[0-9]*]] = "tf.VarHandleOp"
-  %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>
-
-  // CHECK: %[[RES_READ_VAL:[0-9]*]] = "tf.ReadVariableOp"(%[[RES_HANDLE]]) {dtype = "tfdtype$DT_INT32"}
-  // CHECK: %[[LAUNCH_RES:[0-9]*]]:2 = "tf_device.launch"
-  // CHECK: %[[ONE:[0-9]*]] = "tf.Const"() {value = dense<1> : tensor<i32>}
-  // CHECK: %[[COMPUTE_RES:[0-9]*]] = "tf.AddV2"(%[[RES_READ_VAL]], %[[ONE]])
-  // CHECK: tf_device.return %[[COMPUTE_RES]], %[[COMPUTE_RES]]
-  // CHECK: {device = "tpu0", launch_attr = "launch_attr"}
-  // CHECK-SAME: () -> (tensor<*xi32>, tensor<*xi32>)
-  // CHECK: "tf.AssignVariableOp"(%[[RES_HANDLE]], %[[LAUNCH_RES]]#1) {dtype = "tfdtype$DT_INT32"}
-
-  %1 = "tf_device.launch"() ( {
-    %2 = "tf.Const"() {value = dense<1> : tensor<i32>} : () -> tensor<i32>
-    "tf.AssignAddVariableOp"(%0, %2) {dtype = "tfdtype$DT_INT32"} : (tensor<*x!tf.resource>, tensor<i32>) -> ()
-    %3 = "tf.ReadVariableOp"(%0) {dtype = "tfdtype$DT_INT32"} : (tensor<*x!tf.resource>) -> tensor<*xi32>
-    tf_device.return %3 : tensor<*xi32>
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<*xi32>
-
-  // CHECK: return %[[LAUNCH_RES]]#0
-  return %1 : tensor<*xi32>
-}
-
-// -----
-
-// Tests that composite tf.AssignSubVariableOp operation is decomposed using
-// SubOp.
-
-// CHECK-LABEL: func @decompose_assign_sub_variable_op
-func @decompose_assign_sub_variable_op() -> tensor<*xi32> {
-
-  %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>
-
-  // CHECK: %[[RES_READ_VAL:[0-9]*]] = "tf.ReadVariableOp"
-  // CHECK: %[[ONE:[0-9]*]] = "tf.Const"() {value = dense<1> : tensor<i32>}
-  // CHECK: "tf.Sub"(%[[RES_READ_VAL]], %[[ONE]])
-  // CHECK: "tf.AssignVariableOp"
-
-  %1 = "tf_device.launch"() ( {
-    %2 = "tf.Const"() {value = dense<1> : tensor<i32>} : () -> tensor<i32>
-    "tf.AssignSubVariableOp"(%0, %2) {dtype = "tfdtype$DT_INT32"} : (tensor<*x!tf.resource>, tensor<i32>) -> ()
-    %3 = "tf.ReadVariableOp"(%0) {dtype = "tfdtype$DT_INT32"} : (tensor<*x!tf.resource>) -> tensor<*xi32>
-    tf_device.return %3 : tensor<*xi32>
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<*xi32>
-
-  return %1 : tensor<*xi32>
-}
-
-// -----
-
-// Tests that composite tf.ResourceApplyGradientDescent operation is decomposed
-// and hoisted.
-
-// CHECK-LABEL: func @decompose_resource_apply_gradient_descent
-func @decompose_resource_apply_gradient_descent() -> tensor<*xf32> {
-
-  // CHECK: %[[RES_HANDLE:[0-9]*]] = "tf.VarHandleOp"
-  %0 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>
-
-  // CHECK: %[[RES_READ_VAL:[0-9]*]] = "tf.ReadVariableOp"(%[[RES_HANDLE]]) {dtype = "tfdtype$DT_FLOAT"}
-  // CHECK: %[[LAUNCH_RES:[0-9]*]]:2 = "tf_device.launch"
-  // CHECK: %[[ALPHA:[0-9]*]] = "tf.Const"
-  // CHECK: %[[DELTA:[0-9]*]] = "tf.Const"
-  // CHECK: %[[MUL:[0-9]*]] = "tf.Mul"(%[[ALPHA]], %[[DELTA]])
-  // CHECK: %[[SUB:[0-9]*]] = "tf.Sub"(%[[RES_READ_VAL]], %[[MUL]])
-  // CHECK: tf_device.return %[[SUB]], %[[SUB]]
-  // CHECK: {device = "tpu0", launch_attr = "launch_attr"}
-  // CHECK-SAME: () -> (tensor<*xf32>, tensor<*xf32>)
-  // CHECK: "tf.AssignVariableOp"(%[[RES_HANDLE]], %[[LAUNCH_RES]]#1) {dtype = "tfdtype$DT_FLOAT"}
-
-  %1 = "tf_device.launch"() ( {
-    %2 = "tf.Const"() {T = "tfdtype$DT_FLOAT", value = dense<[1.0]> : tensor<1xf32>} : () -> tensor<f32>
-    %3 = "tf.Const"() {T = "tfdtype$DT_FLOAT", value = dense<[0.5]> : tensor<1xf32>} : () -> tensor<f32>
-    "tf.ResourceApplyGradientDescent"(%0, %2, %3) : (tensor<*x!tf.resource>, tensor<f32>, tensor<f32>) -> ()
-    %4 = "tf.ReadVariableOp"(%0) {dtype = "tfdtype$DT_FLOAT"} : (tensor<*x!tf.resource>) -> tensor<*xf32>
-    tf_device.return %4 : tensor<*xf32>
-  }) {device = "tpu0", launch_attr = "launch_attr"} : () -> tensor<*xf32>
-
-  // CHECK: return %[[LAUNCH_RES]]#0
-  return %1 : tensor<*xf32>
-}
-
-// -----
-
 // Tests that internal resource operations are not hoisted.
 
 // CHECK-LABEL: func @internal_resource
@@ -184,13 +94,13 @@ func @internal_resource() -> tensor<*xi32> {
     %1 = "tf.VarHandleOp"() {container = "c", shared_name = "v"} : () -> tensor<*x!tf.resource>
 
     // CHECK: %[[RES_READ_VAL:[0-9]*]] = "tf.ReadVariableOp"(%[[RES_HANDLE]])
-    %2 = "tf.ReadVariableOp"(%1) {dtype = "tfdtype$DT_INT32"} : (tensor<*x!tf.resource>) -> tensor<*xi32>
+    %2 = "tf.ReadVariableOp"(%1) {dtype = i32} : (tensor<*x!tf.resource>) -> tensor<*xi32>
 
     // CHECK: %[[COMPUTE_RES:[0-9]*]] = "tf.SomeComputation"(%[[RES_READ_VAL]])
     %3 = "tf.SomeComputation"(%2) : (tensor<*xi32>) -> (tensor<*xi32>)
 
     // CHECK: "tf.AssignVariableOp"(%[[RES_HANDLE]], %[[COMPUTE_RES]])
-    "tf.AssignVariableOp"(%1, %3) {dtype = "tfdtype$DT_INT32"} : (tensor<*x!tf.resource>, tensor<*xi32>) -> ()
+    "tf.AssignVariableOp"(%1, %3) {dtype = i32} : (tensor<*x!tf.resource>, tensor<*xi32>) -> ()
 
     // CHECK: tf_device.return %[[COMPUTE_RES]]
     tf_device.return %3 : tensor<*xi32>
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir b/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir
index acf236f8e1f..5a3c531023c 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/shape_inference.mlir
@@ -1,6 +1,17 @@
 // RUN: tf-opt %s -tf-shape-inference -verify-diagnostics | FileCheck %s -dump-input=fail -color
 
 module attributes {tf.versions = {bad_consumers = [], min_consumer = 0 : i32, producer = 130 : i32}} {
+// CHECK-LABEL: func @main(%arg0: tensor<1xi32>, %arg1: tensor<1xi32>) -> tensor<1xi32>
+  func @main(%arg0: tensor<1xi32>, %arg1: tensor<1xi32>) -> tensor<*xi32> {
+ // CHECK: %[[ARG0:.*]] = "tf.Cast"(%arg0) : (tensor<1xi32>) -> tensor<1xi32>
+ // CHECK: %[[ARG1:.*]] = "tf.Cast"(%arg1) : (tensor<1xi32>) -> tensor<1xi32>
+ // CHECK: %[[RESULT:.*]] = "tf.AddV2"(%[[ARG0]], %[[ARG1]]) : (tensor<1xi32>, tensor<1xi32>) -> tensor<1xi32>
+ // CHECK: return %[[RESULT]] : tensor<1xi32>
+    %0 = "tf.Cast"(%arg0) : (tensor<1xi32>) -> tensor<*xi32>
+    %1 = "tf.Cast"(%arg1) : (tensor<1xi32>) -> tensor<*xi32>
+    %2 = "tf.AddV2"(%0, %1) : (tensor<*xi32>, tensor<*xi32>) -> tensor<*xi32>
+    return %2 : tensor<*xi32>
+  }
 
 // CHECK-LABEL: func @simple_chain
   func @simple_chain(%arg0: tensor<1xf32>) -> tensor<*xf32> {
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/side-effect-analysis-test.mlir b/tensorflow/compiler/mlir/tensorflow/tests/side-effect-analysis-test.mlir
index c6eb4663e57..678c2373a1b 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/side-effect-analysis-test.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/side-effect-analysis-test.mlir
@@ -6,18 +6,15 @@
 // CHECK-LABEL: func @non_aliasing_reads_writes
 func @non_aliasing_reads_writes(
 // expected-remark@above {{ID: 13}}
-// expected-remark@above {{Predecessors: {12}}}
   %arg0: tensor<*x!tf.resource<tensor<32xf32>>>,
   %arg1: tensor<*x!tf.resource<tensor<32xf32>>>,
   %arg2: tensor<32xf32>) -> (tensor<32xf32>) {
   %graph = tf_executor.graph {
   // expected-remark@above {{ID: 11}}
-  // expected-remark@above {{Predecessors: {10}}}
   // expected-remark@above {{Successors: {12}}}
     // CHECK: tf_executor.island
     %island:2 = tf_executor.island {
     // expected-remark@above {{ID: 9}}
-    // expected-remark@above {{Predecessors: {8}}}
     // expected-remark@above {{Successors: {10}}}
       %read0 = "tf.ReadVariableOp"(%arg0) : (tensor<*x!tf.resource<tensor<32xf32>>>) -> tensor<32xf32>
       // expected-remark@above {{ID: 0}}
@@ -49,17 +46,14 @@ func @non_aliasing_reads_writes(
       tf_executor.yield %read3 : tensor<32xf32>
       // expected-remark@above {{ID: 8}}
       // expected-remark@above {{Predecessors: {4,5,7}}}
-      // expected-remark@above {{Successors: {9}}}
     }
     tf_executor.fetch %island#0 : tensor<32xf32>
     // expected-remark@above {{ID: 10}}
     // expected-remark@above {{Predecessors: {9}}}
-    // expected-remark@above {{Successors: {11}}}
   }
   return %graph : tensor<32xf32>
   // expected-remark@above {{ID: 12}}
   // expected-remark@above {{Predecessors: {11}}}
-  // expected-remark@above {{Successors: {13}}}
 }
 
 // -----
@@ -70,15 +64,12 @@ func @non_aliasing_reads_writes(
 // CHECK-LABEL: func @aliasing_reads_writes
 func @aliasing_reads_writes(%arg0: tensor<32xf32>) -> () {
 // expected-remark@above {{ID: 14}}
-// expected-remark@above {{Predecessors: {13}}}
   tf_executor.graph {
   // expected-remark@above {{ID: 12}}
-  // expected-remark@above {{Predecessors: {11}}}
   // expected-remark@above {{Successors: {13}}}
     // CHECK: tf_executor.island
     %island = tf_executor.island {
     // expected-remark@above {{ID: 10}}
-    // expected-remark@above {{Predecessors: {9}}}
     // expected-remark@above {{Successors: {11}}}
       %vh0 = "tf.VarHandleOp"() {container = "c", shared_name = "v0"} : () -> tensor<*x!tf.resource<tensor<32xf32>>>
       // expected-remark@above {{ID: 0}}
@@ -112,17 +103,14 @@ func @aliasing_reads_writes(%arg0: tensor<32xf32>) -> () {
       tf_executor.yield
       // expected-remark@above {{ID: 9}}
       // expected-remark@above {{Predecessors: {8}}}
-      // expected-remark@above {{Successors: {10}}}
     }
     tf_executor.fetch %island : !tf_executor.control
     // expected-remark@above {{ID: 11}}
     // expected-remark@above {{Predecessors: {10}}}
-    // expected-remark@above {{Successors: {12}}}
   }
   return
   // expected-remark@above {{ID: 13}}
   // expected-remark@above {{Predecessors: {12}}}
-  // expected-remark@above {{Successors: {14}}}
 }
 
 // -----
@@ -133,15 +121,12 @@ func @aliasing_reads_writes(%arg0: tensor<32xf32>) -> () {
 // CHECK-LABEL: func @unknown_side_effecting_op
 func @unknown_side_effecting_op(%arg0: tensor<32xf32>) -> () {
 // expected-remark@above {{ID: 13}}
-// expected-remark@above {{Predecessors: {12}}}
   tf_executor.graph {
   // expected-remark@above {{ID: 11}}
-  // expected-remark@above {{Predecessors: {10}}}
   // expected-remark@above {{Successors: {12}}}
     // CHECK: tf_executor.island
     %island = tf_executor.island {
     // expected-remark@above {{ID: 9}}
-    // expected-remark@above {{Predecessors: {8}}}
     // expected-remark@above {{Successors: {10}}}
       %vh0 = "tf.VarHandleOp"() {container = "c", shared_name = "v0"} : () -> tensor<*x!tf.resource<tensor<32xf32>>>
       // expected-remark@above {{ID: 0}}
@@ -172,17 +157,14 @@ func @unknown_side_effecting_op(%arg0: tensor<32xf32>) -> () {
       tf_executor.yield
       // expected-remark@above {{ID: 8}}
       // expected-remark@above {{Predecessors: {6,7}}}
-      // expected-remark@above {{Successors: {9}}}
     }
     tf_executor.fetch %island : !tf_executor.control
     // expected-remark@above {{ID: 10}}
     // expected-remark@above {{Predecessors: {9}}}
-    // expected-remark@above {{Successors: {11}}}
   }
   return
   // expected-remark@above {{ID: 12}}
   // expected-remark@above {{Predecessors: {11}}}
-  // expected-remark@above {{Successors: {13}}}
 }
 
 // -----
@@ -193,15 +175,12 @@ func @unknown_side_effecting_op(%arg0: tensor<32xf32>) -> () {
 // CHECK-LABEL: func @read_only_unknown_resource
 func @read_only_unknown_resource(%arg0: tensor<32xf32>) -> () {
 // expected-remark@above {{ID: 10}}
-// expected-remark@above {{Predecessors: {9}}}
   tf_executor.graph {
   // expected-remark@above {{ID: 8}}
-  // expected-remark@above {{Predecessors: {7}}}
   // expected-remark@above {{Successors: {9}}}
     // CHECK: tf_executor.island
     %island = tf_executor.island {
     // expected-remark@above {{ID: 6}}
-    // expected-remark@above {{Predecessors: {5}}}
     // expected-remark@above {{Successors: {7}}}
       %vh0 = "tf._UnknownSideEffectingOp_"() : () -> tensor<*x!tf.resource<tensor<32xf32>>>
       // expected-remark@above {{ID: 0}}
@@ -223,15 +202,71 @@ func @read_only_unknown_resource(%arg0: tensor<32xf32>) -> () {
       tf_executor.yield
       // expected-remark@above {{ID: 5}}
       // expected-remark@above {{Predecessors: {4}}}
-      // expected-remark@above {{Successors: {6}}}
     }
     tf_executor.fetch %island : !tf_executor.control
     // expected-remark@above {{ID: 7}}
     // expected-remark@above {{Predecessors: {6}}}
-    // expected-remark@above {{Successors: {8}}}
   }
   return
   // expected-remark@above {{ID: 9}}
   // expected-remark@above {{Predecessors: {8}}}
-  // expected-remark@above {{Successors: {10}}}
+}
+
+// -----
+
+// Tests that the pass adds control dependencies in nested regions with
+// tf_device.replicate
+
+func @with_replicate(
+  // expected-remark@above {{ID: 12}}
+  %arg0: tensor<*x!tf.resource<tensor<32xf32>>>,
+  %arg1: tensor<*x!tf.resource<tensor<32xf32>>>,
+  %arg2: tensor<*x!tf.resource<tensor<32xf32>>>,
+  %arg3: tensor<*x!tf.resource<tensor<32xf32>>>) {
+  tf_executor.graph {
+  // expected-remark@above {{ID: 10}}
+  // expected-remark@above {{Successors: {11}}}
+    %island = tf_executor.island {
+    // expected-remark@above {{ID: 8}}
+    // expected-remark@above {{Successors: {9}}}
+      %u0:2 = "tf._UnknownSideEffectingOp_"() : () -> (tensor<32xf32>, tensor<32xf32>)
+      // expected-remark@above {{ID: 0}}
+      // expected-remark@above {{Successors: {5}}}
+      tf_device.replicate(
+      // expected-remark@above {{ID: 5}}
+      // expected-remark@above {{Predecessors: {0}}}
+      // expected-remark@above {{Successors: {6}}}
+          [%arg0, %arg1] as %r0: tensor<*x!tf.resource<tensor<32xf32>>>,
+          [%arg2, %arg3] as %r1: tensor<*x!tf.resource<tensor<32xf32>>>,
+          [%u0#0, %u0#1] as %u : tensor<32xf32>)
+          {n = 2 : i32, devices = ["/CPU:0", "/GPU:1"]} {
+        %read0 = "tf.ReadVariableOp"(%r0) : (tensor<*x!tf.resource<tensor<32xf32>>>) -> tensor<32xf32>
+        // expected-remark@above {{ID: 1}}
+        // expected-remark@above {{Successors: {4}}}
+        "tf.AssignVariableOp"(%r1, %u) : (tensor<*x!tf.resource<tensor<32xf32>>>, tensor<32xf32>) -> ()
+        // expected-remark@above {{ID: 2}}
+        // expected-remark@above {{Successors: {3}}}
+        %read1 = "tf.ReadVariableOp"(%r1) : (tensor<*x!tf.resource<tensor<32xf32>>>) -> tensor<32xf32>
+        // expected-remark@above {{ID: 3}}
+        // expected-remark@above {{Predecessors: {2}}}
+        // expected-remark@above {{Successors: {4}}}
+        tf_device.return
+        // expected-remark@above {{ID: 4}}
+        // expected-remark@above {{Predecessors: {1,3}}}
+      }
+      "tf._UnknownSideEffectingOp_"() : () -> ()
+      // expected-remark@above {{ID: 6}}
+      // expected-remark@above {{Predecessors: {5}}}
+      // expected-remark@above {{Successors: {7}}}
+      tf_executor.yield
+      // expected-remark@above {{ID: 7}}
+      // expected-remark@above {{Predecessors: {6}}}
+    }
+    tf_executor.fetch %island : !tf_executor.control
+    // expected-remark@above {{ID: 9}}
+    // expected-remark@above {{Predecessors: {8}}}
+  }
+  return
+  // expected-remark@above {{ID: 11}}
+  // expected-remark@above {{Predecessors: {10}}}
 }
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir
index e064c1a53ef..90aa6e73f79 100644
--- a/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir
+++ b/tensorflow/compiler/mlir/tensorflow/tests/tf-ops.mlir
@@ -1610,7 +1610,7 @@ func @testSplitUnknownDimInput(%input: tensor<4x?x4xf32>) {
 
 // -----
 
-func @testSplitNonConstSplitDim(%input: tensor<4x4xf32>, %split_dim: tensor<1xi32>) {
+func @testSplitNonScalarSplitDim(%input: tensor<4x4xf32>, %split_dim: tensor<1xi32>) {
   // expected-error @+1 {{split dimension should be an integer scalar tensor}}
   %0:2 = "tf.Split"(%split_dim, %input) : (tensor<1xi32>, tensor<4x4xf32>) -> (tensor<*xf32>, tensor<*xf32>)
   return
@@ -1674,3 +1674,152 @@ func @testTopKV2WrongKRank(%input: tensor<8xf32>, %k: tensor<5xi32>) {
   %0:2 = "tf.TopKV2"(%input, %k) : (tensor<8xf32>, tensor<5xi32>) -> (tensor<*xf32>, tensor<*xi32>)
   return
 }
+
+// -----
+
+func @testSplitVScalarInput(%input: tensor<f32>, %split_sizes: tensor<2xi32>, %split_dim: tensor<i32>) {
+  // expected-error @+1 {{cannot split scalar input tensor}}
+  %0:2 = "tf.SplitV"(%input, %split_sizes, %split_dim) : (tensor<f32>, tensor<2xi32>, tensor<i32>) -> (tensor<*xf32>, tensor<*xf32>)
+  return
+}
+
+// -----
+
+func @testSplitVNonScalarSplitDim(%input: tensor<4x4xf32>, %split_sizes: tensor<2xi32>, %split_dim: tensor<1xi32>) {
+  // expected-error @+1 {{split dimension should be an integer scalar tensor}}
+  %0:2 = "tf.SplitV"(%input, %split_sizes, %split_dim) : (tensor<4x4xf32>, tensor<2xi32>, tensor<1xi32>) -> (tensor<*xf32>, tensor<*xf32>)
+  return
+}
+
+// -----
+
+func @testSplitVSplitDimOutOfRange(%input: tensor<4x4xf32>, %split_sizes: tensor<2xi32>) {
+  %split_dim = "tf.Const"() {value = dense<100>: tensor<i32>} : () -> (tensor<i32>)
+  // expected-error @+1 {{split dimension must be in range [-2, 2)}}
+  %0:2 = "tf.SplitV"(%input, %split_sizes, %split_dim) : (tensor<4x4xf32>, tensor<2xi32>, tensor<i32>) -> (tensor<*xf32>, tensor<*xf32>)
+  return
+}
+
+// -----
+
+func @testSplitVWrongSplitSizesType(%input: tensor<4x4xf32>, %split_sizes: tensor<2x2xi32>, %split_dim: tensor<i32>) {
+  // expected-error @+1 {{op split sizes should be a 1D tensor of 2 elements}}
+  %0:2 = "tf.SplitV"(%input, %split_sizes, %split_dim) : (tensor<4x4xf32>, tensor<2x2xi32>, tensor<i32>) -> (tensor<*xf32>, tensor<*xf32>)
+  return
+}
+
+// -----
+
+func @testSplitVMultipleDynamicSizes(%input: tensor<4x4xf32>) {
+  %split_dim = "tf.Const"() {value = dense<1>: tensor<i32>} : () -> (tensor<i32>)
+  %split_sizes = "tf.Const"() {value = dense<[-1, -1]>: tensor<2xi32>} : () -> (tensor<2xi32>)
+  // expected-error @+1 {{cannot have more than one dynamic dimension in split sizes}}
+  %0:2 = "tf.SplitV"(%input, %split_sizes, %split_dim) : (tensor<4x4xf32>, tensor<2xi32>, tensor<i32>) -> (tensor<*xf32>, tensor<*xf32>)
+  return
+}
+
+// -----
+
+func @testSplitVSplitSizeOutOfRange(%input: tensor<4x4xf32>) {
+  %split_dim = "tf.Const"() {value = dense<1>: tensor<i32>} : () -> (tensor<i32>)
+  %split_sizes = "tf.Const"() {value = dense<[-1, 100]>: tensor<2xi32>} : () -> (tensor<2xi32>)
+  // expected-error @+1 {{split sizes must sum up to be less than or equal to the dimension size along split dimension, found 100 vs 4}}
+  %0:2 = "tf.SplitV"(%input, %split_sizes, %split_dim) : (tensor<4x4xf32>, tensor<2xi32>, tensor<i32>) -> (tensor<*xf32>, tensor<*xf32>)
+  return
+}
+
+// -----
+
+func @testSplitVSplitSizeOutOfRange(%input: tensor<4x4xf32>) {
+  %split_dim = "tf.Const"() {value = dense<1>: tensor<i32>} : () -> (tensor<i32>)
+  %split_sizes = "tf.Const"() {value = dense<[2, 3]>: tensor<2xi32>} : () -> (tensor<2xi32>)
+  // expected-error @+1 {{split sizes must sum up to the dimension size along split dimension, found 5 vs 4}}
+  %0:2 = "tf.SplitV"(%input, %split_sizes, %split_dim) : (tensor<4x4xf32>, tensor<2xi32>, tensor<i32>) -> (tensor<*xf32>, tensor<*xf32>)
+  return
+}
+
+// -----
+
+func @testSplitV1(%input: tensor<4x4xf32>) {
+  %split_dim = "tf.Const"() {value = dense<1>: tensor<i32>} : () -> (tensor<i32>)
+  %split_sizes = "tf.Const"() {value = dense<[-1, 4]>: tensor<2xi32>} : () -> (tensor<2xi32>)
+  %0:2 = "tf.SplitV"(%input, %split_sizes, %split_dim) : (tensor<4x4xf32>, tensor<2xi32>, tensor<i32>) -> (tensor<*xf32>, tensor<*xf32>)
+  return
+}
+
+func @testSplitV2(%input: tensor<4x4xf32>) {
+  %split_dim = "tf.Const"() {value = dense<1>: tensor<i32>} : () -> (tensor<i32>)
+  %split_sizes = "tf.Const"() {value = dense<[3, 1]>: tensor<2xi32>} : () -> (tensor<2xi32>)
+  %0:2 = "tf.SplitV"(%input, %split_sizes, %split_dim) : (tensor<4x4xf32>, tensor<2xi32>, tensor<i32>) -> (tensor<*xf32>, tensor<*xf32>)
+  return
+}
+
+// -----
+
+//===--------------------------------------------------------------------===//
+//  tf.All
+//===--------------------------------------------------------------------===//
+
+func @testAllDimWrongRank(%input: tensor<4x6xi1>, %dims: tensor<2x2xi32>) {
+  // expected-error @+1 {{dimensions can only be 0D or 1D tensor}}
+  %0 = "tf.All"(%input, %dims) : (tensor<4x6xi1>, tensor<2x2xi32>) -> (tensor<*xi1>)
+  return
+}
+
+// -----
+
+func @testAllDimOutOfRange(%input: tensor<4x6xi1>) {
+  %dims = "tf.Const"() {value = dense<[-1, 5]> : tensor<2xi32>} : () -> (tensor<2xi32>)
+  // expected-error @+1 {{1-th dimension should be in the range of [-2, 2)}}
+  %0 = "tf.All"(%input, %dims) : (tensor<4x6xi1>, tensor<2xi32>) -> (tensor<*xi1>)
+  return
+}
+
+// -----
+
+//===--------------------------------------------------------------------===//
+//  tf.Any
+//===--------------------------------------------------------------------===//
+
+func @testAnyDimWrongRank(%input: tensor<4x6xi1>, %dims: tensor<2x2xi32>) {
+  // expected-error @+1 {{dimensions can only be 0D or 1D tensor}}
+  %0 = "tf.Any"(%input, %dims) : (tensor<4x6xi1>, tensor<2x2xi32>) -> (tensor<*xi1>)
+  return
+}
+
+// -----
+
+func @testAnyDimOutOfRange(%input: tensor<4x6xi1>) {
+  %dims = "tf.Const"() {value = dense<[-1, 5]> : tensor<2xi32>} : () -> (tensor<2xi32>)
+  // expected-error @+1 {{1-th dimension should be in the range of [-2, 2)}}
+  %0 = "tf.Any"(%input, %dims) : (tensor<4x6xi1>, tensor<2xi32>) -> (tensor<*xi1>)
+  return
+}
+
+// -----
+
+//===--------------------------------------------------------------------===//
+//  tf.Unpack
+//===--------------------------------------------------------------------===//
+
+func @testUnpackAxisOutOfRange(%input: tensor<2x6xf32>) {
+  // expected-error @+1 {{axis attribute must be in the range of [-2, 2)}}
+  %0:2 = "tf.Unpack"(%input) {axis = 5} : (tensor<2x6xf32>) -> (tensor<6xf32>, tensor<6xf32>)
+  return
+}
+
+// -----
+
+func @testAxisUnknownDim(%input: tensor<?x6xf32>) {
+  // CHECK: tf.Unpack
+  %0:2 = "tf.Unpack"(%input) {axis = 0} : (tensor<?x6xf32>) -> (tensor<6xf32>, tensor<6xf32>)
+  return
+}
+
+// -----
+
+func @testAxisDim(%input: tensor<2x6xf32>) {
+  // expected-error @+1 {{result count must be equal to 6}}
+  %0:2 = "tf.Unpack"(%input) {axis = -1} : (tensor<2x6xf32>) -> (tensor<6xf32>, tensor<6xf32>)
+  return
+}
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/exported_python_args.py b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/exported_python_args.py
new file mode 100644
index 00000000000..f73aa83a76c
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/tests/tf_saved_model/exported_python_args.py
@@ -0,0 +1,41 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# RUN: (! %p/exported_python_args 2>&1) | FileCheck %s
+
+# pylint: disable=missing-docstring,line-too-long,dangerous-default-value
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow.compat.v2 as tf
+from tensorflow.compiler.mlir.tensorflow.tests.tf_saved_model import common
+
+
+class TestModule(tf.Module):
+
+  @tf.function(input_signature=[tf.TensorSpec([], tf.float32)])
+  def some_function(self, x):
+    return self.callee(x)
+
+  # CHECK: While importing SavedModel function 'callee': in input signature:
+  # CHECK-SAME: Unhandled structured value kind {{.*}} at index path: <value>.1.foo
+  @tf.function
+  def callee(self, x, n={'foo': 42}):
+    return x
+
+
+if __name__ == '__main__':
+  common.do_test(TestModule)
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc
index a7f45c41f15..c08d17104ea 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc
@@ -31,9 +31,14 @@ void CreateTPUBridge(OpPassManager &pm) {
   func_pm.addPass(tf_executor::CreateTFExecutorIslandCoarseningPass());
   func_pm.addPass(CreateTPUClusterFormationPass());
   func_pm.addPass(createCanonicalizerPass());
+  // Place DecomposeResourceOpsPass before TFExecutorConstantSinking pass
+  // because DecomposeResourceOpsPass uses pattern rewriter which hoists
+  // changed constants out of tf_device.Launch.
+  func_pm.addPass(TFDevice::CreateDecomposeResourceOpsPass());
   func_pm.addPass(tf_executor::CreateTFExecutorConstantSinkingPass());
   func_pm.addPass(TFDevice::CreateResourceOpLiftingPass());
 
+  pm.addPass(TF::CreateResourceDeviceInferencePass());
   pm.addPass(TFDevice::CreateClusterOutliningPass());
   pm.addPass(CreateTPURewritePass());
   pm.addNestedPass<FuncOp>(TFDevice::CreateReplicateInvariantOpHoistingPass());
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/cluster_outlining.cc b/tensorflow/compiler/mlir/tensorflow/transforms/cluster_outlining.cc
index 7dab06124dc..10337df1a66 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/cluster_outlining.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/cluster_outlining.cc
@@ -44,16 +44,16 @@ struct ClusterOutliningPass : public ModulePass<ClusterOutliningPass> {
 
 void ReplaceLaunchReturnWithReturn(tf_device::ReturnOp launch_return_op,
                                    OpBuilder* builder) {
-  llvm::SmallVector<Value*, 4> operands(launch_return_op.getOperands());
-  builder->create<ReturnOp>(launch_return_op.getLoc(), operands);
+  builder->create<ReturnOp>(launch_return_op.getLoc(),
+                            launch_return_op.getOperands());
   launch_return_op.erase();
 }
 
 // Builds a function that outlines region attached to launch_op and inserts
 // built function into given module.
 FuncOp BuildFunction(StringRef device, llvm::ArrayRef<Value*> live_ins,
-                     tf_device::LaunchOp launch_op,
-                     ModuleManager* module_manager, OpBuilder* builder) {
+                     tf_device::LaunchOp launch_op, SymbolTable* symbol_table,
+                     OpBuilder* builder) {
   llvm::SmallVector<Type, 4> operand_types;
   operand_types.reserve(live_ins.size());
   for (Value* v : live_ins) operand_types.emplace_back(v->getType());
@@ -92,14 +92,14 @@ FuncOp BuildFunction(StringRef device, llvm::ArrayRef<Value*> live_ins,
   builder->setInsertionPoint(launch_return_op);
   ReplaceLaunchReturnWithReturn(launch_return_op, builder);
 
-  module_manager->insert(outlined_func);
+  symbol_table->insert(outlined_func);
   return outlined_func;
 }
 
 // Outlines body of `tf_device.launch` into a function and create a
 // `tf_device.launch_func` to invoke that function. `tf_device.launch` is
 // removed afterwards.`
-void OutlineLaunch(tf_device::LaunchOp launch_op, ModuleManager* module_manager,
+void OutlineLaunch(tf_device::LaunchOp launch_op, SymbolTable* symbol_table,
                    OpBuilder* builder) {
   llvm::SetVector<Value*> live_ins;
   getUsedValuesDefinedAbove(launch_op.body(), launch_op.body(), live_ins);
@@ -108,7 +108,7 @@ void OutlineLaunch(tf_device::LaunchOp launch_op, ModuleManager* module_manager,
       launch_op.getAttrOfType<StringAttr>(kDeviceAttr).getValue();
 
   FuncOp outlined_func = BuildFunction(device, live_ins.getArrayRef(),
-                                       launch_op, module_manager, builder);
+                                       launch_op, symbol_table, builder);
   launch_op.setAttr(builder->getIdentifier(kFuncAttr),
                     builder->getSymbolRefAttr(outlined_func.getName()));
 
@@ -124,10 +124,10 @@ void OutlineLaunch(tf_device::LaunchOp launch_op, ModuleManager* module_manager,
 
 void ClusterOutliningPass::runOnModule() {
   ModuleOp m = getModule();
-  ModuleManager module_manager(m);
+  SymbolTable symbol_table(m);
   OpBuilder builder(m.getContext());
   m.walk([&](tf_device::LaunchOp launch) {
-    OutlineLaunch(launch, &module_manager, &builder);
+    OutlineLaunch(launch, &symbol_table, &builder);
   });
 }
 
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.cc b/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.cc
new file mode 100644
index 00000000000..b70d14fd43b
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.cc
@@ -0,0 +1,31 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.h"
+
+#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
+
+namespace mlir {
+namespace TF {
+
+#include "tensorflow/compiler/mlir/tensorflow/transforms/generated_decompose_resource_ops.inc"
+
+void PopulateDecomposeResourceOpsPatterns(MLIRContext *context,
+                                          OwningRewritePatternList *patterns) {
+  populateWithGenerated(context, patterns);
+}
+
+}  // namespace TF
+}  // namespace mlir
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.h b/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.h
new file mode 100644
index 00000000000..813fc649059
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.h
@@ -0,0 +1,34 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_DECOMPOSE_RESOURCE_OPS_H_
+#define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_DECOMPOSE_RESOURCE_OPS_H_
+
+#include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
+#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
+
+namespace mlir {
+namespace TF {
+
+// Populates rewrite patterns that decompose composite resource operations into
+// primitive ones like ReadVariableOp, AssignVariableOp and other computations
+// to facilitate transformations like resource op lifting.
+void PopulateDecomposeResourceOpsPatterns(MLIRContext *context,
+                                          OwningRewritePatternList *patterns);
+
+}  // namespace TF
+}  // namespace mlir
+
+#endif  // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_DECOMPOSE_RESOURCE_OPS_H_
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.td b/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.td
new file mode 100644
index 00000000000..29c99cdc3d0
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.td
@@ -0,0 +1,63 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+include "mlir/IR/OpBase.td"
+include "mlir/Dialect/StandardOps/Ops.td"
+include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.td"
+
+def CreateTFReadVariableOp: NativeCodeCall<
+    "$_builder.create<TF::ReadVariableOp>("
+    "  $0.getLoc(),"
+    "  UnrankedTensorType::get("
+    "    $1->getType().cast<TensorType>().getElementType()),"
+    "  $2)"
+    >;
+
+def DecomposeAssignAddVariableOp :
+  Pat<
+    (TF_AssignAddVariableOp:$src_op $resource, $value),
+    (TF_AssignVariableOp
+      $resource,
+      (TF_AddV2Op
+        (CreateTFReadVariableOp $src_op, $value, $resource),
+        $value
+      )
+    )
+  >;
+
+def DecomposeAssignSubVariableOp :
+  Pat<
+    (TF_AssignSubVariableOp:$src_op $resource, $value),
+    (TF_AssignVariableOp
+      $resource,
+      (TF_SubOp
+        (CreateTFReadVariableOp $src_op, $value, $resource),
+        $value
+      )
+    )
+  >;
+
+// This decomposition is only correct inside XLA as it ignores use_locking
+// attribute.
+def DecomposeResourceApplyGradientDescentOp :
+  Pat<
+    (TF_ResourceApplyGradientDescentOp:$src_op $resource, $alpha, $delta, $_),
+    (TF_AssignVariableOp
+      $resource,
+      (TF_SubOp
+        (CreateTFReadVariableOp $src_op, $alpha, $resource),
+        (TF_MulOp $alpha, $delta)
+      )
+    )
+  >;
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops_pass.cc
new file mode 100644
index 00000000000..b7be4ff8742
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops_pass.cc
@@ -0,0 +1,59 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
+#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.h"
+
+namespace mlir {
+namespace TFDevice {
+namespace {
+
+// A pass that decomposes composite resource operations into primitive ones like
+// ReadVariableOp, AssignVariableOp and other computations to facilitate
+// transformations like resource op lifting.
+//
+// For example:
+//
+// tf.AssignAddVariableOp(%res, %0)
+//
+// Becomes
+//
+// %res_val = tf.ReadVariableOp(%res)
+// %1 = tf.AddV2(%res_val, %0)
+// tf.AssignVariableOp(%res, %1)
+struct DecomposeResourceOps : public FunctionPass<DecomposeResourceOps> {
+  void runOnFunction() override {
+    // Add lowering patterns to the list.
+    OwningRewritePatternList patterns;
+    mlir::TF::PopulateDecomposeResourceOpsPatterns(&getContext(), &patterns);
+
+    applyPatternsGreedily(getFunction(), patterns);
+  }
+};
+
+}  // namespace
+
+std::unique_ptr<OpPassBase<FuncOp>> CreateDecomposeResourceOpsPass() {
+  return std::make_unique<DecomposeResourceOps>();
+}
+
+}  // namespace TFDevice
+}  // namespace mlir
+
+static mlir::PassRegistration<mlir::TFDevice::DecomposeResourceOps> pass(
+    "tf-device-decompose-resource-ops",
+    "Decompose composite resource variable operations into primitive "
+    "Read/AssignVariableOp and raw computation");
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/executor_island_coarsening.cc b/tensorflow/compiler/mlir/tensorflow/transforms/executor_island_coarsening.cc
index c6958d992f1..918e6ac3078 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/executor_island_coarsening.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/executor_island_coarsening.cc
@@ -304,8 +304,7 @@ void InsertDummyIslandForFetch(FetchOp fetch) {
       /*control=*/ControlType::get(fetch.getContext()),
       /*controlInputs=*/control_fetches);
   island.body().push_back(new Block);
-  OpBuilder(&island.GetBody())
-      .create<YieldOp>(fetch.getLoc(), llvm::to_vector<4>(data_fetches));
+  OpBuilder(&island.GetBody()).create<YieldOp>(fetch.getLoc(), data_fetches);
   const int fetch_control_idx = data_fetches.size();
   for (int i = 0, e = fetch.getNumOperands(); i < e; i++) {
     // The fetch could have multiple control operands (all at the end of its
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_cfg.cc b/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_cfg.cc
index 880b4c4210b..e9b3879c025 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_cfg.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_cfg.cc
@@ -17,6 +17,7 @@ limitations under the License.
 // standard TensorFlow dialect to MLIR Control Flow Graph (CFG) form.
 
 #include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
+#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
 #include "mlir/IR/Builders.h"  // TF:local_config_mlir
 #include "mlir/IR/Operation.h"  // TF:local_config_mlir
 #include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
@@ -79,8 +80,11 @@ static Operation* CallFn(Location loc,
   for (int i = 0; i < num_operands; ++i) {
     Value* val = get_arg(i);
     Type expected = fn_type.getInput(i);
-    if (val->getType() != expected)
-      val = builder->create<TensorCastOp>(loc, val, expected);
+    if (val->getType() != expected) {
+      val =
+          builder->create<TF::CastOp>(loc, expected, val,
+                                      /*Truncate=*/builder->getBoolAttr(false));
+    }
     operands.push_back(val);
   }
   return builder->create<CallOp>(loc, fn, operands).getOperation();
@@ -100,8 +104,11 @@ static llvm::SmallVector<Value*, 4> PrepareValsForJump(
   for (int i = 0; i < num_vals; ++i) {
     Value* val = get_val(i);
     Type expected = block->getArgument(i)->getType();
-    if (val->getType() != expected)
-      val = builder->create<TensorCastOp>(loc, val, expected);
+    if (val->getType() != expected) {
+      val =
+          builder->create<TF::CastOp>(loc, expected, val,
+                                      /*Truncate=*/builder->getBoolAttr(false));
+    }
     result.push_back(val);
   }
   return result;
@@ -131,8 +138,11 @@ static void ReplaceOpResultWithBlockArgs(Location loc, Operation* op,
   for (unsigned i = 0, e = op->getNumResults(); i != e; ++i) {
     Value* arg = block->getArgument(i);
     Value* result = op->getResult(i);
-    if (arg->getType() != result->getType())
-      arg = builder->create<TensorCastOp>(loc, arg, result->getType());
+    if (arg->getType() != result->getType()) {
+      arg =
+          builder->create<TF::CastOp>(loc, result->getType(), arg,
+                                      /*Truncate=*/builder->getBoolAttr(false));
+    }
     result->replaceAllUsesWith(arg);
   }
 }
@@ -301,26 +311,15 @@ void FunctionalControlFlowToCFG::runOnFunction() {
       // subsequent blocks.
       //
       // TODO: Use PatternRewriter to eliminate these function control flow ops.
-      auto has_variant_operand = [](Operation* op) {
-        auto is_variant = [](Type ty) {
-          return getElementTypeOrSelf(ty).getKind() == TensorFlowTypes::VARIANT;
-        };
-
-        if (llvm::none_of(op->getOperandTypes(), is_variant)) return false;
-
-        op->emitOpError() << "does not yet support operands of type variant "
-                             "for conversion to CFG";
-        return true;
-      };
 
       if (IfOp if_op = llvm::dyn_cast<IfOp>(op)) {
-        if (has_variant_operand(&op) || failed(LowerIfOp(if_op))) {
+        if (failed(LowerIfOp(if_op))) {
           return signalPassFailure();
         }
         break;
       }
       if (WhileOp while_op = llvm::dyn_cast<WhileOp>(op)) {
-        if (has_variant_operand(&op) || failed(LowerWhileOp(while_op))) {
+        if (failed(LowerWhileOp(while_op))) {
           return signalPassFailure();
         }
         break;
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc
index 65c6ac86288..89941c2fab4 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
 #include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
+#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 #include "tensorflow/core/util/tensor_format.h"
 
 namespace mlir {
@@ -109,6 +110,39 @@ Type InferExpandDimsType(Type ty, int64_t axis, Builder *builder) {
   return RankedTensorType::get(shape, ranked_ty.getElementType());
 }
 
+// Lowers AddN op to a sequence of AddV2 ops to accumulate operands.
+//
+//   %result = "tf.AddN"(%0, %1, %2)
+//
+// is lowered to:
+//
+//   %sum_0 = "tf.AddV2"(%0, %1)
+//   %result = "tf.AddV2"(%sum_0, %2)
+//
+class LowerAddNOp : public OpRewritePattern<TF::AddNOp> {
+ public:
+  explicit LowerAddNOp(MLIRContext *context)
+      : OpRewritePattern<TF::AddNOp>(context) {}
+
+  PatternMatchResult matchAndRewrite(TF::AddNOp op,
+                                     PatternRewriter &rewriter) const override {
+    // TODO(hinsu): Support variant with TensorList type. tf.AddV2 doesn't
+    // support variant type so variant types require special handling.
+    if (getElementTypeOrSelf(op.getType()).isa<VariantType>())
+      return matchFailure();
+
+    // TODO(hinsu): Improve parallelism by splitting operands in two halves and
+    // accumulating them first.
+    Value *result = *op.inputs().begin();
+    for (Value *operand : llvm::drop_begin(op.inputs(), 1)) {
+      result = rewriter.create<TF::AddV2Op>(op.getLoc(), result, operand);
+    }
+
+    rewriter.replaceOp(op, result);
+    return matchSuccess();
+  }
+};
+
 // Lowers Pack op to ConcatV2 op after changing shape of the inputs with
 // ExpandDims op.
 //
@@ -159,6 +193,7 @@ class LowerPackOp : public OpRewritePattern<TF::PackOp> {
 
 void PopulateLoweringTFPatterns(MLIRContext *context,
                                 OwningRewritePatternList *patterns) {
+  patterns->insert<LowerAddNOp>(context);
   patterns->insert<LowerPackOp>(context);
   populateWithGenerated(context, patterns);
 }
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/optimize.cc b/tensorflow/compiler/mlir/tensorflow/transforms/optimize.cc
index b0420663bde..6e28b19ad80 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/optimize.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/optimize.cc
@@ -63,7 +63,7 @@ void CreateTFStandardPipeline(OpPassManager &pm,
   if (options.enable_inliner) {
     pm.addPass(createInlinerPass());
   }
-  pm.addNestedPass<FuncOp>(CreateTFShapeInferencePass());
+  pm.addPass(CreateTFShapeInferencePass());
   pm.addNestedPass<FuncOp>(CreateTFOptimizePass());
   pm.addNestedPass<FuncOp>(createCSEPass());
 }
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h
index 30ee91f4aea..fca1c02bc62 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h
@@ -57,6 +57,9 @@ struct StandardPipelineOptions : public PassOptions<StandardPipelineOptions> {
 // NOLINTNEXTLINE - MLIR contract is pass by mutable reference.
 void CreateTFStandardPipeline(OpPassManager& pm,
                               const StandardPipelineOptions& options);
+
+// Propagates device attributes of resources from callers to callees.
+std::unique_ptr<OpPassBase<ModuleOp>> CreateResourceDeviceInferencePass();
 }  // namespace TF
 
 namespace TFControlFlow {
@@ -96,6 +99,11 @@ std::unique_ptr<OpPassBase<FuncOp>> CreateClusterFormationPass();
 // Creates a pass that outlines regions of tf_device.launch operations.
 std::unique_ptr<OpPassBase<ModuleOp>> CreateClusterOutliningPass();
 
+// A pass that decomposes composite resource operations into primitive ones like
+// ReadVariableOp, AssignVariableOp and other computations to facilitate
+// transformations like resource op lifting.
+std::unique_ptr<OpPassBase<FuncOp>> CreateDecomposeResourceOpsPass();
+
 // Creates a pass that lifts operations on external resource variables from
 // device computation nested in `tf_device::LaunchOp` out so that resource
 // variable load operations are all before device computation while resource
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc b/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc
index 8033773cfaa..9787ac0f0f0 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/replicate_to_island.cc
@@ -64,8 +64,8 @@ llvm::SmallVector<tf_executor::IslandOp, 8> ExpandReplicateIntoReplicas(
 
   // Replace replicate terminator with YieldOp.
   builder->setInsertionPoint(&terminator);
-  builder->create<tf_executor::YieldOp>(
-      terminator.getLoc(), llvm::to_vector<8>(terminator.getOperands()));
+  builder->create<tf_executor::YieldOp>(terminator.getLoc(),
+                                        terminator.getOperands());
   terminator.erase();
 
   builder->setInsertionPoint(island_op);
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/resource_device_inference.cc b/tensorflow/compiler/mlir/tensorflow/transforms/resource_device_inference.cc
new file mode 100644
index 00000000000..616c2cb10e8
--- /dev/null
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/resource_device_inference.cc
@@ -0,0 +1,278 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <iterator>
+#include <memory>
+#include <tuple>
+#include <utility>
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Casting.h"
+#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
+#include "mlir/IR/Builders.h"  // TF:local_config_mlir
+#include "mlir/IR/Function.h"  // TF:local_config_mlir
+#include "mlir/IR/Operation.h"  // TF:local_config_mlir
+#include "mlir/IR/Types.h"  // TF:local_config_mlir
+#include "mlir/IR/Value.h"  // TF:local_config_mlir
+#include "mlir/IR/Visitors.h"  // TF:local_config_mlir
+#include "mlir/Pass/Pass.h"  // TF:local_config_mlir
+#include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
+#include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h"
+#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
+#include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
+
+namespace mlir {
+namespace TF {
+
+namespace {
+constexpr char kDeviceAttr[] = "device";
+constexpr char kFuncDeviceAttr[] = "tf.device";
+
+// A pass that propagates device assignment of resources on a module. It
+// performs in-function propagation, as well as cross-function propagation from
+// callers to callees.
+//
+// This pass changes the module by adding "tf.device" attribute to function
+// arguments and adding "device" attribute to TF ops.
+struct ResourceDeviceInference : public ModulePass<ResourceDeviceInference> {
+  void runOnModule() override;
+};
+
+// A class that records each resource's device assignment in a function.
+class PerFunctionResult {
+ public:
+  explicit PerFunctionResult(FuncOp func_op) : alias_analysis_(func_op) {}
+
+  // Returns the recorded device assignment for a resource, if any.
+  llvm::Optional<llvm::StringRef> DeviceForResource(
+      const Value* resource) const {
+    llvm::Optional<llvm::StringRef> result;
+    if (alias_analysis_.IsUnknownResource(resource)) return result;
+    for (int64_t id : alias_analysis_.GetResourceUniqueIds(resource)) {
+      auto it = resource_id_to_device_.find(id);
+      if (it == resource_id_to_device_.end()) continue;
+      if (!result) {
+        result = it->getSecond();
+        continue;
+      }
+      if (result != it->getSecond()) {
+        // Got conflicting assignments, clear the result.
+        result.reset();
+        return result;
+      }
+    }
+    return result;
+  }
+
+  // Records the device assignment for a resource. If the new assignment
+  // conflicts with an existing one, returns an error.
+  //
+  // If `changed` is provided, assign *changed to true if anything is modified.
+  LogicalResult AddResourceDevice(const Value* resource, llvm::StringRef device,
+                                  bool* changed = nullptr) {
+    if (alias_analysis_.IsUnknownResource(resource)) return success();
+    for (int64_t id : alias_analysis_.GetResourceUniqueIds(resource)) {
+      auto emplace_res = resource_id_to_device_.try_emplace(id, device);
+      if (emplace_res.second) {
+        if (changed) *changed = true;
+      } else if (emplace_res.first->getSecond() != device) {
+        // Existing assignment does not equal the new assignment.
+        return failure();
+      }
+    }
+    return success();
+  }
+
+ private:
+  llvm::SmallDenseMap<int64_t, llvm::StringRef, 8> resource_id_to_device_;
+  TF::ResourceAliasAnalysis alias_analysis_;
+};
+
+// Tries to record device assignment for a resource.
+LogicalResult AddResourceDeviceAndEmitError(const Value* resource,
+                                            llvm::StringRef device,
+                                            Operation* error_reporting_op,
+                                            PerFunctionResult* result,
+                                            bool* changed = nullptr) {
+  auto res = result->AddResourceDevice(resource, device, changed);
+  if (failed(res)) {
+    error_reporting_op->emitError()
+        << "Conflicting device assignment for resource";
+  }
+  return res;
+}
+
+// Propagates device assignment inside a function.
+LogicalResult ComputeResourceDevicesInComputation(FuncOp func_op,
+                                                  PerFunctionResult* result) {
+  OpBuilder builder(func_op);
+  // Function arguments.
+  for (auto arg : func_op.getArguments()) {
+    if (!mlir::getElementTypeOrSelf(arg->getType()).isa<TF::ResourceType>()) {
+      continue;
+    }
+    auto device_attr = func_op.getArgAttrOfType<mlir::StringAttr>(
+        arg->getArgNumber(), kFuncDeviceAttr);
+    if (!device_attr || device_attr.getValue() == "") {
+      // If device_attr does not exist, try to construct it from any recorded
+      // assignment.
+      if (auto device = result->DeviceForResource(arg)) {
+        func_op.setArgAttr(arg->getArgNumber(), kFuncDeviceAttr,
+                           builder.getStringAttr(*device));
+      }
+      continue;
+    }
+    // Record the attribute.
+    auto res = AddResourceDeviceAndEmitError(arg, device_attr.getValue(),
+                                             func_op, result);
+    if (failed(res)) return res;
+  }
+  auto walk_res = func_op.walk([&](Operation* op) {
+    if (auto var_handle = llvm::dyn_cast<TF::VarHandleOp>(op)) {
+      // Record VarHanldeOp's device attribute.
+      auto device_attr =
+          var_handle.getAttrOfType<mlir::StringAttr>(kDeviceAttr);
+      if (!device_attr || device_attr.getValue().empty()) {
+        return WalkResult::advance();
+      }
+      auto res = AddResourceDeviceAndEmitError(
+          var_handle.resource(), device_attr.getValue(), op, result);
+      if (failed(res)) return WalkResult::interrupt();
+    }
+    if (auto identity = llvm::dyn_cast<TF::IdentityOp>(op)) {
+      // Try to construct IdentityOp's attribute from recorded assignment.
+      if (!mlir::getElementTypeOrSelf(identity.output()->getType())
+               .isa<TF::ResourceType>()) {
+        return WalkResult::advance();
+      }
+      if (auto device = result->DeviceForResource(identity.output())) {
+        auto device_attr =
+            identity.getAttrOfType<mlir::StringAttr>(kDeviceAttr);
+        if (!device_attr || device_attr.getValue().empty()) {
+          identity.setAttr(kDeviceAttr, builder.getStringAttr(*device));
+        }
+      }
+      return WalkResult::advance();
+    }
+    // Propagate and record output device assignment for other ops based on
+    // existing recording. E.g., IdentityN.
+    for (auto output : op->getResults()) {
+      if (!mlir::getElementTypeOrSelf(output->getType())
+               .isa<TF::ResourceType>()) {
+        continue;
+      }
+      if (auto device = result->DeviceForResource(output)) {
+        auto res = AddResourceDeviceAndEmitError(output, *device, op, result);
+        if (failed(res)) return WalkResult::interrupt();
+      }
+    }
+    return WalkResult::advance();
+  });
+  return failure(walk_res.wasInterrupted());
+}
+
+void ResourceDeviceInference::runOnModule() {
+  auto module = getModule();
+  llvm::SmallDenseMap<Operation*, PerFunctionResult, 4> per_function_results;
+  llvm::SetVector<FuncOp> worklist;
+  module.walk([&](FuncOp func_op) {
+    worklist.insert(func_op);
+    per_function_results.try_emplace(func_op, func_op);
+  });
+  // Helper that propagates an op's recorded operand device assignments to its
+  // called function's arguments.
+  auto propagate_operands_to_callee_arguments =
+      [&](Operation* caller,
+          llvm::iterator_range<OperandIterator> caller_operands,
+          llvm::StringRef called_func_name,
+          const PerFunctionResult& caller_res) {
+        auto callee =
+            llvm::dyn_cast<FuncOp>(module.lookupSymbol(called_func_name));
+        assert(callee);
+        auto& callee_res = per_function_results.find(callee)->getSecond();
+        bool callee_needs_recompute = false;
+        for (auto operand_and_argument :
+             llvm::zip(caller_operands, callee.getArguments())) {
+          if (!mlir::getElementTypeOrSelf(
+                   std::get<0>(operand_and_argument)->getType())
+                   .isa<TF::ResourceType>()) {
+            continue;
+          }
+          auto device =
+              caller_res.DeviceForResource(std::get<0>(operand_and_argument));
+          if (!device) continue;
+          if (failed(AddResourceDeviceAndEmitError(
+                  std::get<1>(operand_and_argument), *device, caller,
+                  &callee_res, &callee_needs_recompute))) {
+            return failure();
+          }
+        }
+        // If the callee recording is modified, make sure that it will be
+        // reprocessed.
+        if (callee_needs_recompute) {
+          worklist.insert(callee);
+        }
+        return success();
+      };
+  while (!worklist.empty()) {
+    auto func_op = worklist.back();
+    worklist.pop_back();
+    auto& func_res = per_function_results.find(func_op)->getSecond();
+    // In-function propagation.
+    if (failed(ComputeResourceDevicesInComputation(func_op, &func_res))) {
+      return signalPassFailure();
+    }
+    // Propagation to callees.
+    auto walk_res = func_op.walk([&](Operation* op) {
+      if (auto while_op = llvm::dyn_cast<TF::WhileOp>(op)) {
+        if (failed(propagate_operands_to_callee_arguments(
+                while_op, while_op.getOperands(), while_op.body(), func_res)) ||
+            failed(propagate_operands_to_callee_arguments(
+                while_op, while_op.getOperands(), while_op.cond(), func_res))) {
+          return WalkResult::interrupt();
+        }
+      } else if (auto if_op = llvm::dyn_cast<TF::IfOp>(op)) {
+        if (failed(propagate_operands_to_callee_arguments(
+                if_op, if_op.input(), if_op.then_branch(), func_res)) ||
+            failed(propagate_operands_to_callee_arguments(
+                if_op, if_op.input(), if_op.else_branch(), func_res))) {
+          return WalkResult::interrupt();
+        }
+      }
+      return WalkResult::advance();
+    });
+    if (walk_res.wasInterrupted()) return signalPassFailure();
+  }
+}
+
+}  // namespace
+
+std::unique_ptr<OpPassBase<ModuleOp>> CreateResourceDeviceInferencePass() {
+  return std::make_unique<ResourceDeviceInference>();
+}
+
+static PassRegistration<ResourceDeviceInference> pass(
+    "tf-resource-device-inference",
+    "Propagates the device attribute on resources from callers to callees.");
+
+}  // namespace TF
+}  // namespace mlir
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc
index 7aa5c19fead..2f32a3a2c28 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc
@@ -77,129 +77,6 @@ struct ResourceOpLiftingPass : public FunctionPass<ResourceOpLiftingPass> {
   void runOnFunction() override;
 };
 
-// Rewrites composite variable op `tf.AssignAddVariableOp` or
-// `tf.AssignSubVariableOp` into primitive resource/computation ops.
-// For example:
-//
-// tf.AssignAddVariableOp(%res, %0)
-//
-// Becomes
-//
-// %res_val = tf.ReadVariableOp(%res)
-// %1 = tf.AddV2(%res_val, %0)
-// tf.AssignVariableOp(%res, %1)
-//
-template <typename T>
-LogicalResult RewriteCompositeAssignVariableOp(T src_op, OpBuilder* builder) {
-  // Read mangled dtype, which indicates type of data stored in resource
-  // variable. It can then be used to construct type needed for both
-  // ReadVariableOp and AssignVariableOp.
-  StringAttr mangled_dtype_attr =
-      src_op.template getAttrOfType<StringAttr>(kDTypeAttr);
-  std::string type_string = mangled_dtype_attr.getValue();
-  tensorflow::DataType dtype_proto;
-  auto s =
-      tensorflow::mangling_util::DemangleDataType(type_string, &dtype_proto);
-  if (!s.ok()) return src_op.emitError() << s.error_message();
-
-  Type type;
-  s = tensorflow::ConvertDataType(dtype_proto, *builder, &type);
-  if (!s.ok()) return src_op.emitError() << s.error_message();
-  type = UnrankedTensorType::get(type);
-
-  builder->setInsertionPoint(src_op);
-
-  auto read_variable_op = builder->create<TF::ReadVariableOp>(
-      src_op.getLoc(), type, src_op.resource());
-  read_variable_op.setAttr(builder->getIdentifier(kDTypeAttr),
-                           mangled_dtype_attr);
-
-  Value* result;
-  if (std::is_same<T, TF::AssignAddVariableOp>()) {
-    result = builder->create<TF::AddV2Op>(
-        src_op.getLoc(), read_variable_op.value(), src_op.value());
-  } else {
-    result = builder->create<TF::SubOp>(
-        src_op.getLoc(), read_variable_op.value(), src_op.value());
-  }
-
-  auto assign_variable_op = builder->create<TF::AssignVariableOp>(
-      src_op.getLoc(), src_op.resource(), result);
-  assign_variable_op.setAttr(builder->getIdentifier(kDTypeAttr),
-                             mangled_dtype_attr);
-
-  src_op.erase();
-  return success();
-}
-
-// Rewrites `tf.ResourceApplyGradientDescent` into primitive resource and
-// computation ops.
-//
-// Specifically:
-//
-// tf.ResourceApplyGradientDescent(%var, %alpha, %delta)
-//
-// Becomes
-//
-// %old_var_val = tf.ReadVariableOp(%var)
-// %gradient_update = tf.Mul(%alpha, %delta)
-// %new_var_val = tf.Sub(%old_var_val, %gradient_update)
-// tf.AssignVariableOp(%var, %new_var_val)
-LogicalResult RewriteResourceApplyGradientDescentOp(
-    TF::ResourceApplyGradientDescentOp op, OpBuilder* builder) {
-  Type type = op.alpha()->getType();
-  auto t = UnrankedTensorType::get(type.cast<TensorType>().getElementType());
-
-  tensorflow::DataType data_type;
-  auto s = tensorflow::ConvertToDataType(type, &data_type);
-  if (!s.ok()) return op.emitError() << s.error_message();
-
-  std::string mangled_data_type =
-      tensorflow::mangling_util::MangleDataType(data_type);
-  auto mangled_dtype_attr = builder->getStringAttr(mangled_data_type);
-
-  builder->setInsertionPoint(op);
-  auto read_variable_op =
-      builder->create<TF::ReadVariableOp>(op.getLoc(), t, op.var());
-  read_variable_op.setAttr(builder->getIdentifier(kDTypeAttr),
-                           mangled_dtype_attr);
-
-  auto mul_op =
-      builder->create<TF::MulOp>(op.getLoc(), t, op.alpha(), op.delta());
-  auto sub_op = builder->create<TF::SubOp>(
-      op.getLoc(), t, read_variable_op.value(), mul_op.z());
-  auto assign_variable_op =
-      builder->create<TF::AssignVariableOp>(op.getLoc(), op.var(), sub_op.z());
-  assign_variable_op.setAttr(builder->getIdentifier(kDTypeAttr),
-                             mangled_dtype_attr);
-
-  op.erase();
-
-  return success();
-}
-
-// Rewrites an operation that updates value of a resource variable into its
-// equivalent primitive ones so that following analysis/rewrite can be easier.
-// If given op is not a composite resource store op or is an unsupported op, no
-// change is applied.
-// TODO(ycao): Explore using pattern rewriter after needed operations are
-// defined.
-// TODO(ycao): Add support for other composite resource store ops.
-LogicalResult MaybeRewriteCompositeResourceStore(Operation* op,
-                                                 OpBuilder* builder) {
-  if (auto assign_add_op = dyn_cast<TF::AssignAddVariableOp>(op)) {
-    return RewriteCompositeAssignVariableOp(assign_add_op, builder);
-  } else if (auto assign_sub_op = dyn_cast<TF::AssignSubVariableOp>(op)) {
-    return RewriteCompositeAssignVariableOp(assign_sub_op, builder);
-  } else if (auto resource_apply_gradient_descent_op =
-                 dyn_cast<TF::ResourceApplyGradientDescentOp>(op)) {
-    return RewriteResourceApplyGradientDescentOp(
-        resource_apply_gradient_descent_op, builder);
-  }
-
-  return success();
-}
-
 // Performs store-load forwarding. This effectively removes
 // 1) Any resource loads after a store to that same resource is done
 // 2) Any resource stores except the last one.
@@ -358,10 +235,6 @@ void HoistResourceOpsFromLaunchOp(tf_device::LaunchOp launch_op) {
   ModuleOp m = launch_op.getParentOfType<ModuleOp>();
   OpBuilder builder(m);
 
-  // Rewrite composite resource store operations into primitive ones.
-  launch_op.walk(
-      [&](Operation* op) { MaybeRewriteCompositeResourceStore(op, &builder); });
-
   // Perform store-load forwarding. So that each resource is only loaded with
   // its initial value and is only stored with its final value.
   ForwardStoreToLoad(launch_op);
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc
index c44c81d1cef..812100ced64 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc
@@ -47,6 +47,46 @@ using ::tensorflow::int64;
 
 namespace mlir {
 namespace TF {
+namespace {
+Optional<llvm::SmallVector<mlir::Type, 4>> InferShapeForFunctionReturnType(
+    FuncOp func) {
+  // Only infer shape when there is one return op for now.
+  if (!has_single_element(func.getBody()) || func.front().empty()) {
+    return None;
+  }
+
+  // Find the return type.
+  auto return_op = dyn_cast<mlir::ReturnOp>(func.front().back());
+  if (!return_op) {
+    return None;
+  }
+
+  // Manually fold tf.Cast that precedes the return instruction and only differs
+  // in shape refinement level.
+  for (OpOperand& arg_op : return_op.getOperation()->getOpOperands()) {
+    if (auto cast_op = dyn_cast<CastOp>(arg_op.get()->getDefiningOp())) {
+      // Shape inference should not change the element type.
+      if (cast_op.SrcT() != cast_op.DstT()) continue;
+      // We only refine the result shape if the result a dynamic shape, the
+      // input has static shape, and the two shapes are compatible.
+      auto has_static_shape = [](const Value* value) {
+        auto shaped_type = value->getType().dyn_cast<ShapedType>();
+        return shaped_type && shaped_type.hasStaticShape();
+      };
+      Value* input = cast_op.x();
+      Value* result = cast_op.y();
+      if (!has_static_shape(input) || has_static_shape(result) ||
+          failed(verifyCompatibleShape(input->getType(), result->getType())))
+        continue;
+
+      arg_op.set(cast_op.x());
+      if (cast_op.y()->use_empty()) cast_op.erase();
+    }
+  }
+
+  return llvm::to_vector<4>(return_op.getOperandTypes());
+}
+}  // namespace
 
 bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
                                   int64_t graph_version) {
@@ -245,11 +285,10 @@ LogicalResult InferShapeUntilFixPoint(Region* region, int64_t graph_version,
   return success();
 }
 
-LogicalResult InferShapeForFunction(FuncOp op,
+LogicalResult InferShapeForFunction(FuncOp func,
                                     ArrayRef<ArrayRef<int64_t>> arg_shapes,
                                     int64_t graph_version) {
-  auto main_func = op;
-  mlir::FunctionType func_type = main_func.getType();
+  mlir::FunctionType func_type = func.getType();
   bool needs_refinement = false;
   llvm::SmallVector<mlir::Type, 4> new_arg_types;
   new_arg_types.reserve(func_type.getNumInputs());
@@ -276,7 +315,7 @@ LogicalResult InferShapeForFunction(FuncOp op,
     auto new_arg_type = mlir::RankedTensorType::get(shape, element_type);
     if (new_arg_type != func_type.getInput(i)) {
       // If the new type is more detailed, trigger shape inference.
-      main_func.getArgument(i)->setType(new_arg_type);
+      func.getArgument(i)->setType(new_arg_type);
       needs_refinement = true;
     }
     new_arg_types.push_back(new_arg_type);
@@ -287,39 +326,28 @@ LogicalResult InferShapeForFunction(FuncOp op,
   }
 
   mlir::LogicalResult result =
-      mlir::TF::InferShapeUntilFixPoint(&main_func.getBody(), graph_version);
+      mlir::TF::InferShapeUntilFixPoint(&func.getBody(), graph_version);
   if (failed(result)) {
     return failure();
   }
 
-  // Must only have 1 block so that there is only one return op.
-  if (main_func.getBody().getBlocks().size() != 1 ||
-      main_func.front().empty()) {
-    return failure();
+  auto return_types = InferShapeForFunctionReturnType(func);
+  func.setType(mlir::FunctionType::get(new_arg_types,
+                                       return_types.hasValue()
+                                           ? return_types.getValue()
+                                           : func.getType().getResults(),
+                                       func.getContext()));
+
+  return success();
+}
+
+LogicalResult InferShapeForFunctionType(FuncOp func) {
+  if (auto return_types = InferShapeForFunctionReturnType(func)) {
+    func.setType(mlir::FunctionType::get(func.getType().getInputs(),
+                                         return_types.getValue(),
+                                         func.getContext()));
   }
 
-  // Find the return type.
-  auto return_op = dyn_cast<mlir::ReturnOp>(*main_func.front().rbegin());
-  if (!return_op) {
-    return failure();
-  }
-
-  // Manually fold tf.Cast that precedes the return instruction and only differ
-  // in shape refinement level.
-  for (OpOperand& arg_op : return_op.getOperation()->getOpOperands()) {
-    if (auto cast_op = dyn_cast<CastOp>(arg_op.get()->getDefiningOp())) {
-      if (cast_op.SrcT() != cast_op.DstT()) continue;
-      arg_op.set(cast_op.x());
-      if (cast_op.y()->use_empty()) cast_op.erase();
-    }
-  }
-
-  llvm::SmallVector<mlir::Type, 4> return_types(return_op.getOperandTypes());
-
-  // Update function signature with the results of inference.
-  main_func.setType(
-      mlir::FunctionType::get(new_arg_types, return_types, op.getContext()));
-
   return success();
 }
 
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.h b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.h
index 1cbd5eb6c29..0529e6414b7 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.h
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.h
@@ -41,12 +41,16 @@ bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
 LogicalResult InferShapeUntilFixPoint(Region* region, int64_t graph_version,
                                       int64_t max_iteration = 10);
 
-// Given a list of refined shapes matching the function arguments of op, run
+// Given a list of refined shapes matching the function arguments of func, runs
 // shape inference over the function to propagate this updated information.
-LogicalResult InferShapeForFunction(FuncOp op,
+LogicalResult InferShapeForFunction(FuncOp func,
                                     ArrayRef<ArrayRef<int64_t>> arg_shapes,
                                     int64_t graph_version);
 
+// Refines the return type of the given function by folding tf.Cast that
+// precedes the return instruction.
+LogicalResult InferShapeForFunctionType(FuncOp func);
+
 }  // namespace TF
 
 }  // namespace mlir
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc
index 2ef601e914d..637b14346b0 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc
@@ -65,7 +65,11 @@ struct ShapeInference : public ModulePass<ShapeInference> {
       return;
     }
     for (auto func : module.getOps<FuncOp>()) {
-      TF::InferShapeUntilFixPoint(&func.getBody(), producer.getInt());
+      InferShapeUntilFixPoint(&func.getBody(), producer.getInt());
+    }
+
+    if (auto main_func = module.lookupSymbol<mlir::FuncOp>("main")) {
+      InferShapeForFunctionType(main_func);
     }
   }
 };
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_cluster_formation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_cluster_formation.cc
index 6580ad53129..3fb311ff415 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_cluster_formation.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_cluster_formation.cc
@@ -34,10 +34,12 @@ limitations under the License.
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Casting.h"
 #include "mlir/IR/Attributes.h"  // TF:local_config_mlir
 #include "mlir/IR/Builders.h"  // TF:local_config_mlir
 #include "mlir/IR/Identifier.h"  // TF:local_config_mlir
 #include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
+#include "mlir/IR/Operation.h"  // TF:local_config_mlir
 #include "mlir/IR/Types.h"  // TF:local_config_mlir
 #include "mlir/IR/Value.h"  // TF:local_config_mlir
 #include "mlir/Pass/Pass.h"  // TF:local_config_mlir
@@ -57,8 +59,6 @@ constexpr char kTPUReplicateAttr[] = "_tpu_replicate";
 constexpr char kDeviceAttr[] = "device";
 constexpr char kNameAttr[] = "name";
 constexpr char kNumReplicasAttr[] = "num_replicas";
-constexpr char kTPUReplicatedInputOp[] = "tf.TPUReplicatedInput";
-constexpr char kTPUReplicatedOutputOp[] = "tf.TPUReplicatedOutput";
 
 constexpr char kBadTPUReplicateAttrMsg[] =
     "requires '_tpu_replicate' string attribute";
@@ -275,9 +275,8 @@ LogicalResult ReplicateCluster(tf_device::LaunchOp launch_op,
   mlir::visitUsedValuesDefinedAbove(
       launch_op.body(), launch_op.body(), [&](mlir::OpOperand* operand) {
         Operation* def = operand->get()->getDefiningOp();
-        if (def && def->getName().getStringRef() == kTPUReplicatedInputOp) {
+        if (def && llvm::isa<TF::TPUReplicatedInputOp>(def))
           replicated_input_ops.insert(def);
-        }
       });
 
   // Check if number of operands of each used TPUReplicatedInput op matches
@@ -305,10 +304,10 @@ LogicalResult ReplicateCluster(tf_device::LaunchOp launch_op,
     int idx = result_and_idx.index();
     for (auto& use : result->getUses()) {
       Operation* def = use.getOwner();
-      if (!def || def->getName().getStringRef() != kTPUReplicatedOutputOp)
+      if (!def || !llvm::isa<TF::TPUReplicatedOutputOp>(def))
         return launch_op.emitError()
                << "requires output of " << launch_op.getOperationName()
-               << " to lead to a '" << kTPUReplicatedOutputOp << "' op";
+               << " to lead to a 'tf.TPUReplicatedOutput' op";
 
       if (def->getNumResults() != num_replicas)
         return def->emitOpError() << "requires " << num_replicas << " results";
@@ -331,9 +330,8 @@ LogicalResult ReplicateCluster(tf_device::LaunchOp launch_op,
 
   // Create terminator for replicate op and move launch into replicate.
   builder.setInsertionPointToEnd(&replicate_op.GetBody());
-  auto return_op = builder.create<tf_device::ReturnOp>(
-      replicate_op.getLoc(),
-      llvm::SmallVector<Value*, 8>(launch_op.getResults()));
+  auto return_op = builder.create<tf_device::ReturnOp>(replicate_op.getLoc(),
+                                                       launch_op.getResults());
   launch_op.getOperation()->moveBefore(return_op);
 
   return success();
@@ -427,8 +425,8 @@ void TPUClusterFormation::runOnFunction() {
 
   // Remove TPUReplicatedInput and TPUReplicatedOutput nodes.
   auto remove_result = getFunction().walk([&](Operation* op) {
-    auto op_name = op->getName().getStringRef();
-    if (op_name != kTPUReplicatedInputOp && op_name != kTPUReplicatedOutputOp)
+    if (!llvm::isa<TF::TPUReplicatedInputOp>(op) &&
+        !llvm::isa<TF::TPUReplicatedOutputOp>(op))
       return WalkResult::advance();
 
     // Forward operand to result. When `num_replicas` attribute is 1, no
@@ -440,7 +438,8 @@ void TPUClusterFormation::runOnFunction() {
     // Leftover TPUReplicatedInput/TPUReplicatedOutput that are not of
     // `num_replicas` to 1.
     if (!op->use_empty()) {
-      op->emitOpError() << "expects " << op_name << " to have no uses";
+      op->emitOpError() << "expects " << op->getName().getStringRef()
+                        << " to have no uses";
       return WalkResult::interrupt();
     }
 
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc
index c5bf918a496..1033670dd1c 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc
@@ -109,13 +109,13 @@ LogicalResult EncapsulateFuncAndSerialize(FuncOp entry_func,
     return parent_module.emitError(CreateMissingAttributeMsg(kVersionsAttr));
 
   module_for_func.get().getOperation()->setAttr(kVersionsAttr, versions_attr);
-  ModuleManager module_manager(module_for_func.get());
+  SymbolTable symbol_table(module_for_func.get());
 
   while (!referenced.empty()) {
     auto func = referenced.pop_back_val();
 
     // Skip functions that have already been cloned into new module.
-    if (module_manager.lookupSymbol<FuncOp>(func.getName())) continue;
+    if (symbol_table.lookup<FuncOp>(func.getName())) continue;
 
     // Find any SymbolRefAttr in func that maps to a FuncOp. We need to clone
     // all found FuncOps to new_module to make sure new_module is
@@ -138,7 +138,7 @@ LogicalResult EncapsulateFuncAndSerialize(FuncOp entry_func,
       // should be no other reference to it.
       clone.setName("main");
     }
-    module_manager.insert(clone);
+    symbol_table.insert(clone);
   }
 
   // Serialize module and return.
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/breakup-islands.cc b/tensorflow/compiler/mlir/tensorflow/translate/breakup-islands.cc
index 22d04b27dd1..764c7915577 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/breakup-islands.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/breakup-islands.cc
@@ -13,14 +13,19 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <cstdint>
+
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
 #include "mlir/IR/Builders.h"  // TF:local_config_mlir
 #include "mlir/IR/Operation.h"  // TF:local_config_mlir
 #include "mlir/Pass/Pass.h"  // TF:local_config_mlir
 #include "mlir/Pass/PassRegistry.h"  // TF:local_config_mlir
 #include "mlir/Support/STLExtras.h"  // TF:local_config_mlir
+#include "tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h"
 
 // This pass is used in preparation for Graph export.
@@ -38,12 +43,11 @@ struct BreakUpIslands : OperationPass<BreakUpIslands, FuncOp> {
   void runOnOperation() final;
 
   void BreakUpIsland(tf_executor::IslandOp op,
+                     const TF::SideEffectAnalysis& side_effect_analysis,
                      llvm::DenseMap<Operation*, llvm::SmallVector<Value*, 4>>*
                          new_control_edges);
 };
 
-}  // end anonymous namespace
-
 void BreakUpIslands::runOnOperation() {
   auto graph_op_range = getOperation().getBody().front().without_terminator();
   tf_executor::GraphOp graph_op;
@@ -61,12 +65,13 @@ void BreakUpIslands::runOnOperation() {
   // Map from the users of the existing islands to the list of control
   // edges that need to be added.
   llvm::DenseMap<Operation*, llvm::SmallVector<Value*, 4>> new_control_edges;
+  auto& side_effect_analysis = getAnalysis<TF::SideEffectAnalysis>();
   // Iterate in reverse order to avoid invalidating Operation* stored in
   // new_control_edges.
   for (auto& item :
        llvm::make_early_inc_range(llvm::reverse(graph_op.GetBody()))) {
     if (auto island = dyn_cast<tf_executor::IslandOp>(&item)) {
-      BreakUpIsland(island, &new_control_edges);
+      BreakUpIsland(island, side_effect_analysis, &new_control_edges);
     }
   }
   OpBuilder builder(getOperation());
@@ -106,21 +111,81 @@ void BreakUpIslands::runOnOperation() {
   }
 }
 
+// Helper that creates an island. If `sub_op` is not nullptr, it will be moved
+// to the island.
+tf_executor::IslandOp CreateIsland(ArrayRef<Type> result_types,
+                                   ArrayRef<Value*> control_inputs,
+                                   const tf_executor::ControlType& control_type,
+                                   const Location& loc, Operation* sub_op,
+                                   tf_executor::IslandOp original_island) {
+  OpBuilder builder(original_island);
+  auto island = builder.create<tf_executor::IslandOp>(
+      loc, result_types, control_type, control_inputs);
+  island.body().push_back(new Block);
+  Block* block = &island.body().back();
+  if (sub_op) {
+    sub_op->replaceAllUsesWith(island.outputs());
+    sub_op->moveBefore(block, block->begin());
+  }
+  OpBuilder island_builder(original_island);
+  island_builder.setInsertionPointToEnd(block);
+  if (sub_op) {
+    island_builder.create<tf_executor::YieldOp>(loc, sub_op->getResults());
+  } else {
+    island_builder.create<tf_executor::YieldOp>(loc, ArrayRef<Value*>{});
+  }
+  return island;
+}
+
+// A struct contains the operations in an island that do not have incoming or
+// outgoing dependencies.
+struct IslandSourcesAndSinks {
+  // Sub-ops that do not depend on other ops in the island.
+  llvm::SmallPtrSet<Operation*, 4> sources;
+  // Sub-ops that do not have other sub-ops island depending on them (excluding
+  // yield).
+  llvm::SmallPtrSet<Operation*, 4> sinks;
+};
+
+// Finds IslandSourcesAndSinks for an unmodified island.
+IslandSourcesAndSinks FindSourcesAndSinksInIsland(
+    tf_executor::IslandOp island,
+    const TF::SideEffectAnalysis& side_effect_analysis) {
+  IslandSourcesAndSinks result;
+  auto island_body = island.GetBody().without_terminator();
+  for (Operation& sub_op : island_body) {
+    auto predecessors = side_effect_analysis.DirectControlPredecessors(&sub_op);
+    result.sinks.insert(&sub_op);
+    // Remove predecessor from sinks.
+    for (auto predecessor : predecessors) result.sinks.erase(predecessor);
+    bool has_in_island_operands = false;
+    for (auto operand : sub_op.getOperands()) {
+      auto defining_op = operand->getDefiningOp();
+      if (!defining_op || defining_op->getParentOp() != island) continue;
+      // Remove operands from sinks.
+      result.sinks.erase(defining_op);
+      has_in_island_operands = true;
+    }
+    if (predecessors.empty() && !has_in_island_operands) {
+      result.sources.insert(&sub_op);
+    }
+  }
+  return result;
+}
+
 // Converts a single island into multiple islands (one for each op). The islands
 // are chained together by control flow values.
 void BreakUpIslands::BreakUpIsland(
     tf_executor::IslandOp op,
+    const TF::SideEffectAnalysis& side_effect_analysis,
     llvm::DenseMap<Operation*, llvm::SmallVector<Value*, 4>>*
         new_control_edges) {
   auto island_body = op.GetBody().without_terminator();
   // Skip islands that are already only a single op.
   // Skip islands that are empty (only yield).
   if (island_body.empty() || has_single_element(island_body)) return;
-  OpBuilder builder(op);
-  OpBuilder island_builder(op);
   auto control_type = tf_executor::ControlType::get(&getContext());
-  Value* previous_island = nullptr;
-  auto tmp_control_inputs = llvm::to_vector<4>(op.controlInputs());
+  auto island_control_inputs = llvm::to_vector<4>(op.controlInputs());
   // Add control dependencies for yields of values defined by other islands to
   // the island that defines that fetched value.
   for (auto* fetch : op.GetYield().fetches()) {
@@ -130,7 +195,7 @@ void BreakUpIslands::BreakUpIsland(
       // OK, because it is the same island.
     } else if (auto island_op = llvm::dyn_cast<tf_executor::IslandOp>(
                    fetch->getDefiningOp())) {
-      tmp_control_inputs.push_back(island_op.control());
+      island_control_inputs.push_back(island_op.control());
     } else {
       // TODO(parkers): Any defining op that has a control output can be handled
       // just like an island.
@@ -138,39 +203,71 @@ void BreakUpIslands::BreakUpIsland(
       return signalPassFailure();
     }
   }
-  ArrayRef<Value*> previous_control = tmp_control_inputs;
+  // If there are multiple control inputs, create an empty island to group them.
+  if (island_control_inputs.size() > 1) {
+    auto island = CreateIsland({}, island_control_inputs, control_type,
+                               op.getLoc(), nullptr, op);
+    island_control_inputs.clear();
+    island_control_inputs.push_back(island.control());
+  }
+  // Find sources and sinks inside the original island.
+  auto sources_and_sinks =
+      FindSourcesAndSinksInIsland(op, side_effect_analysis);
+  // The corresponding control output of the new island created for each sub-op.
+  llvm::SmallDenseMap<Operation*, Value*, 8> new_control_for_sub_ops;
+  // Control outputs of newly created islands that are sinks.
+  llvm::SmallVector<Value*, 8> sink_island_controls;
   // For each operation in the island, construct a new island to wrap the op,
   // yield all the results, and replace all the usages with the results of the
   // new island.
-  for (Operation& sub_op : llvm::make_early_inc_range(island_body)) {
-    auto loc = sub_op.getLoc();
-    auto island = builder.create<tf_executor::IslandOp>(
-        loc, llvm::to_vector<4>(sub_op.getResultTypes()), control_type,
-        previous_control);
-    island.body().push_back(new Block);
-    Block* block = &island.body().back();
-    sub_op.replaceAllUsesWith(island.outputs());
-    block->getOperations().splice(block->begin(), op.GetBody().getOperations(),
-                                  sub_op);
-    island_builder.setInsertionPointToEnd(block);
-    island_builder.create<tf_executor::YieldOp>(
-        loc, llvm::to_vector<4>(sub_op.getResults()));
-    previous_island = island.control();
-    previous_control = previous_island;
+  for (auto& sub_op : llvm::make_early_inc_range(island_body)) {
+    const auto predecessors =
+        side_effect_analysis.DirectControlPredecessors(&sub_op);
+    // Get the controls from the predecessors.
+    llvm::SmallVector<Value*, 4> predecessors_control;
+    predecessors_control.reserve(predecessors.size());
+    for (auto predecessor : predecessors) {
+      predecessors_control.push_back(new_control_for_sub_ops[predecessor]);
+    }
+    // If sub_op is a source, use island_control_inputs, because that's required
+    // by inter-islands dependencies; otherwise, we do not need to include
+    // island_control_inputs, since they must have been tracked by the (direct
+    // or indirect) control predecessors or operands.
+    ArrayRef<Value*> control = sources_and_sinks.sources.count(&sub_op) > 0
+                                   ? island_control_inputs
+                                   : predecessors_control;
+    auto island =
+        CreateIsland(llvm::to_vector<4>(sub_op.getResultTypes()), control,
+                     control_type, sub_op.getLoc(), &sub_op, op);
+    new_control_for_sub_ops[&sub_op] = island.control();
+    if (sources_and_sinks.sinks.count(&sub_op)) {
+      sink_island_controls.push_back(island.control());
+    }
   }
-  op.control()->replaceAllUsesWith(previous_island);
-  // All existing outputs need to add a control flow edge to the
-  // previous_island.
+  // Create output controls for the sinks.
+  assert(!sink_island_controls.empty());
+  // If there are multiple output controls, create an empty island to group
+  // them.
+  if (sink_island_controls.size() > 1) {
+    auto island = CreateIsland({}, sink_island_controls, control_type,
+                               op.getLoc(), nullptr, op);
+    sink_island_controls.clear();
+    sink_island_controls.push_back(island.control());
+  }
+  assert(sink_island_controls.size() == 1);
+  op.control()->replaceAllUsesWith(sink_island_controls[0]);
+  // All existing outputs need to add a control flow edge from
+  // sink_island_controls[0].
   for (Value* out : op.outputs()) {
     for (auto& use : out->getUses()) {
       Operation* owner = use.getOwner();
       if (auto island_op =
               llvm::dyn_cast<tf_executor::IslandOp>(owner->getParentOp())) {
-        (*new_control_edges)[island_op].push_back(previous_island);
+        (*new_control_edges)[island_op].push_back(sink_island_controls[0]);
       } else if (llvm::isa<tf_executor::FetchOp>(owner) ||
                  llvm::isa<tf_executor::MergeOp>(owner) ||
                  llvm::isa<tf_executor::SwitchOp>(owner)) {
-        (*new_control_edges)[owner].push_back(previous_island);
+        (*new_control_edges)[owner].push_back(sink_island_controls[0]);
       } else {
         use.getOwner()->emitError("Adding control dependency not supported");
         return signalPassFailure();
@@ -182,6 +279,8 @@ void BreakUpIslands::BreakUpIsland(
   op.erase();
 }
 
+}  // namespace
+
 std::unique_ptr<OpPassBase<FuncOp>> CreateBreakUpIslandsPass() {
   return std::make_unique<BreakUpIslands>();
 }
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc b/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc
index bac3ea22973..58242e62f1c 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/export_graphdef.cc
@@ -535,6 +535,18 @@ StatusOr<std::unique_ptr<Graph>> Exporter::Convert(
         arg, index,
         graph_as_function && !input_names.empty() ? input_names[index] : ""));
   }
+
+  auto convert_called_function = [&](llvm::StringRef name) {
+    auto func =
+        function.getParentOfType<mlir::ModuleOp>().lookupSymbol<mlir::FuncOp>(
+            name);
+    if (func != nullptr) {
+      TF_RETURN_IF_ERROR(ConvertLibFunction(configs, tf_dialect, func, flib));
+      TF_RETURN_IF_ERROR(graph->AddFunctionLibrary(*flib));
+    }
+    return Status::OK();
+  };
+
   // Adds nodes for operations.
   for (Operation& inst : block) {
     auto op_name = GetTensorFlowOpName(inst.getName().getStringRef());
@@ -544,13 +556,12 @@ StatusOr<std::unique_ptr<Graph>> Exporter::Convert(
       // definition library
       // TODO(prakalps): If two functions have cyclic dependence, this will
       // introduce an infinite loop.
-      auto func =
-          function.getParentOfType<mlir::ModuleOp>().lookupSymbol<mlir::FuncOp>(
-              op_name.ValueOrDie());
-      if (func != nullptr) {
-        TF_RETURN_IF_ERROR(ConvertLibFunction(configs, tf_dialect, func, flib));
-        TF_RETURN_IF_ERROR(graph->AddFunctionLibrary(*flib));
-      }
+      TF_RETURN_IF_ERROR(convert_called_function(op_name.ValueOrDie().str()));
+    }
+
+    if (IsLegacyCallInstruction(&inst)) {
+      TF_RETURN_IF_ERROR(convert_called_function(
+          inst.getAttrOfType<mlir::SymbolRefAttr>("f").getLeafReference()));
     }
 
     for (auto type : inst.getResultTypes()) {
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc
index da2e6a67445..7bc7c914f56 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc
@@ -16,8 +16,11 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/tensorflow/translate/import_model.h"
 
 #include <iterator>
+#include <string>
 #include <tuple>
 #include <type_traits>
+#include <utility>
+#include <vector>
 
 #include "absl/algorithm/container.h"
 #include "absl/container/flat_hash_map.h"
@@ -97,6 +100,9 @@ using stream_executor::port::StatusOr;
 
 namespace {
 
+const char* disable_call_shape_inference_attribute_name =
+    "_disable_call_shape_inference";
+
 // This class is used to generate new MLIR function name strings that are both
 // unique in the TF function library `flib_` and unique among the name strings
 // generated by the class object during its lifetime.
@@ -246,11 +252,14 @@ class ImporterBase {
       llvm::SmallVector<mlir::NamedAttribute, 4>* attributes);
 
   // Helper to create either a tf_executor operation or a TF operation wrapped
-  // in an island.
+  // in an island. When convert_to_legacy_call is true, converts the operation
+  // representing a call to a library function with a name represented in
+  // node_type_name to LegacyCallOp.
   mlir::Operation* createOperation(
-      const Node& node, llvm::StringRef op_name,
+      const Node& node, llvm::StringRef node_type_name,
       const mlir::OperationState& result,
-      const llvm::SmallVectorImpl<mlir::Value*>& control_operands);
+      const llvm::SmallVectorImpl<mlir::Value*>& control_operands,
+      bool convert_to_legacy_call = false);
 
   // Converts one NodeDef from the input GraphDef into an Operation and
   // inserts it into the MLIR module using builder_.
@@ -297,19 +306,24 @@ class ImporterBase {
   // Gets the location information string for the given node.
   std::string GetLocationStr(const Node& node, bool includeNodeName = false);
 
-  // Inserts a placeholder node in the graph to replace the input node. Replaces
-  // all the output edges of the input_node with the placeholder node, and
-  // removes the input_node from the graph. The new node has the same name as
-  // the input_node, so Nodespecs do not need any modification.
+  // Inserts a placeholder node in the graph to replace a feed output tensor,
+  // and returns the new placeholder node and a boolean indicating if the
+  // original input node was removed from the graph. Uses of the feed output
+  // tensor are replaced with this placeholder node. If the feed output tensor
+  // is of a single output node, the control dependencies are forwarded to the
+  // the placeholder node, and the original node will be removed.
   // Note: This modifies the graph, and so any list of ordered nodes needs to be
   // reconstructed.
-  StatusOr<Node*> ReplaceWithPlaceholderNode(const TensorShapeProto& shape,
-                                             DataType dtype, Node* input_node);
+  StatusOr<std::pair<Node*, bool>> CreatePlaceholderNodeForFeed(
+      const TensorShapeProto& shape, DataType dtype, Node* node, int index,
+      const std::unordered_map<string, Node*>& node_name_map);
 
   // Gets the input and output nodes corresponding to the specified input and
   // output nodes in specs_. If there are no input or output nodes specified,
-  // nodes will be empty
-  Status GetInputOutputNodes(std::unordered_set<const Node*>* nodes);
+  // nodes will be empty.
+  Status GetInputOutputNodes(
+      const std::unordered_map<string, Node*>& node_name_map,
+      std::unordered_set<const Node*>* nodes);
 
   // The input graph with backedges removed. The removed backedges are stored
   // in the back_edge_helper.
@@ -339,6 +353,10 @@ class ImporterBase {
   NodeValueMap node_values_;
   std::unique_ptr<ShapeRefiner> shape_refiner_;
   NameUniquifier* function_name_uniquifier_;
+
+ protected:
+  // Maps feed as TensorId to new Placeholder node name.
+  absl::flat_hash_map<TensorId, absl::string_view> remapped_feeds_;
 };
 
 // Returns true if the node with given name has a non primary output that is
@@ -419,6 +437,49 @@ Status PreprocessGraphDef(const GraphImportConfig* specs, GraphDef* graph_def) {
   return Status::OK();
 }
 
+// Mapping from node name to feed (index and ArrayInfo). Node name must outlive
+// this map.
+using FeedsByNode = absl::flat_hash_map<
+    absl::string_view,
+    absl::flat_hash_map<int, const std::pair<std::string, ArrayInfo>*>>;
+
+// Creates from a `GraphImportConfig::InputArrays` a mapping from a feeds output
+// tensor name to index and ArrayInfo. Keys and values are backed by
+// `GraphImportConfig::InputArrays`.
+StatusOr<FeedsByNode> GetFeedsByNode(
+    const GraphImportConfig::InputArrays& inputs) {
+  FeedsByNode feeds_by_node;
+  feeds_by_node.reserve(inputs.size());
+
+  for (const auto& input : inputs) {
+    TensorId tensor = ParseTensorName(input.first);
+    if (tensor.index() < 0)
+      return errors::FailedPrecondition(
+          "Feed output tensor must be a data output '", tensor.ToString(), "'");
+
+    auto& node = feeds_by_node[tensor.node()];
+    if (!node.insert({tensor.index(), &input}).second)
+      return errors::FailedPrecondition(
+          "Multiple feeds for the same output tensor '", tensor.ToString(),
+          "'");
+  }
+
+  return feeds_by_node;
+}
+
+// Creates a unique name for a node that will be replacing a feed output tensor.
+std::string GetUniqueNodeName(
+    absl::string_view node_name, int index,
+    const std::unordered_map<string, Node*>& node_name_map) {
+  std::string new_node_name_base = absl::StrCat(node_name, "_", index);
+  int count = 0;
+  std::string new_node_name = new_node_name_base;
+  while (node_name_map.find(new_node_name) != node_name_map.end()) {
+    new_node_name = absl::StrCat(new_node_name_base, "_", count++);
+  }
+  return new_node_name;
+}
+
 Status ImporterBase::RemoveBackedges(const Graph& graph) {
   // TODO(fengliuai): Converting to GraphDef and back is the easiest way to
   // clone a graph.
@@ -459,37 +520,54 @@ Status ImporterBase::RemoveBackedges(const Graph& graph) {
   return Status::OK();
 }
 
-StatusOr<Node*> ImporterBase::ReplaceWithPlaceholderNode(
-    const TensorShapeProto& shape, DataType dtype, Node* input_node) {
+StatusOr<std::pair<Node*, bool>> ImporterBase::CreatePlaceholderNodeForFeed(
+    const TensorShapeProto& shape, DataType dtype, Node* node, int index,
+    const std::unordered_map<string, Node*>& node_name_map) {
+  DCHECK_LT(index, node->num_outputs());
+  const bool update_inplace = node->num_outputs() == 1 && index == 0;
+  std::string new_node_name =
+      update_inplace ? node->name()
+                     : GetUniqueNodeName(node->name(), index, node_name_map);
+
   Node* placeholder_node;
-  NodeBuilder builder(input_node->name(), "Placeholder");
+  NodeBuilder builder(new_node_name, "Placeholder");
   builder.Attr("shape", shape);
   builder.Attr("dtype", dtype);
   TF_RETURN_IF_ERROR(builder.Finalize(graph_.get(), &placeholder_node));
 
-  while (!input_node->out_edges().empty()) {
-    const Edge* oe = *input_node->out_edges().begin();
-    // UpdateEdge cannot be used with control edges.
-    if (oe->src_output() == Graph::kControlSlot) {
-      graph_->AddControlEdge(placeholder_node, oe->dst());
-      graph_->RemoveControlEdge(oe);
-      continue;
+  // Update edges from original feed with Placeholder node.
+  std::vector<const Edge*> data_edges;
+  std::vector<const Edge*> control_edges;
+  for (const tensorflow::Edge* edge : node->out_edges()) {
+    if (edge->src_output() == index) {
+      data_edges.push_back(edge);
+    } else if (update_inplace && edge->IsControlEdge()) {
+      control_edges.push_back(edge);
     }
-
-    TF_RETURN_IF_ERROR(
-        graph_->UpdateEdge(placeholder_node, 0, oe->dst(), oe->dst_input()));
   }
 
-  graph_->RemoveNode(input_node);
+  for (const auto* edge : data_edges) {
+    TF_RETURN_IF_ERROR(graph_->UpdateEdge(placeholder_node, 0, edge->dst(),
+                                          edge->dst_input()));
+  }
 
-  return placeholder_node;
+  for (const auto* edge : control_edges) {
+    graph_->AddControlEdge(placeholder_node, edge->dst());
+    graph_->RemoveControlEdge(edge);
+  }
+
+  if (update_inplace) {
+    graph_->RemoveNode(node);
+  }
+
+  return std::pair<Node*, bool>(placeholder_node, update_inplace);
 }
 
 Status ImporterBase::GetInputOutputNodes(
+    const std::unordered_map<string, Node*>& node_name_map,
     std::unordered_set<const Node*>* nodes) {
-  auto node_name_map = graph_->BuildNodeNameIndex();
-  auto add_node = [&](const string& name) {
-    auto it = node_name_map.find(name);
+  auto add_node = [&](absl::string_view name) {
+    auto it = node_name_map.find(std::string(name));
     if (it == node_name_map.end()) {
       return errors::FailedPrecondition(
           absl::StrCat("Graph does not contain node: ", name));
@@ -498,13 +576,25 @@ Status ImporterBase::GetInputOutputNodes(
     return Status::OK();
   };
 
+  // Remap feeds and fetches to newly created Placeholder nodes.
   for (const auto& input : specs_.inputs) {
-    TF_RETURN_IF_ERROR(add_node(input.first));
+    TensorId tensor = ParseTensorName(input.first);
+    auto remapped_it = remapped_feeds_.find(tensor);
+    if (remapped_it != remapped_feeds_.end()) {
+      TF_RETURN_IF_ERROR(add_node(remapped_it->second));
+    } else {
+      TF_RETURN_IF_ERROR(add_node(tensor.node()));
+    }
   }
 
   for (const auto& output : specs_.outputs) {
-    auto output_node_name = std::string(ParseTensorName(output).first);
-    TF_RETURN_IF_ERROR(add_node(output_node_name));
+    TensorId tensor = ParseTensorName(output);
+    auto remapped_it = remapped_feeds_.find(tensor);
+    if (remapped_it != remapped_feeds_.end()) {
+      TF_RETURN_IF_ERROR(add_node(remapped_it->second));
+    } else {
+      TF_RETURN_IF_ERROR(add_node(tensor.node()));
+    }
   }
 
   return Status::OK();
@@ -520,6 +610,9 @@ Status ImporterBase::AddNodesToShapeRefiner() {
   shape_refiner_->set_require_shape_inference_fns(false);
   shape_refiner_->set_function_library_for_shape_inference(&graph_flib_);
 
+  TF_ASSIGN_OR_RETURN(auto feeds_by_node, GetFeedsByNode(specs_.inputs));
+  auto node_name_map = graph_->BuildNodeNameIndex();
+
   // First add all nodes to the refiner.
   for (Node* node : ordered_nodes_) {
     // We need to use a TensorFlow node to teach the shape refiner that user
@@ -533,28 +626,49 @@ Status ImporterBase::AddNodesToShapeRefiner() {
     // it to replace the original input node, so the shape refiner can
     // successfully propagate the user's input type and shape to the rest of the
     // graph.
-    auto it = specs_.inputs.find(node->name());
-    if (it != specs_.inputs.end()) {
-      auto node_name = node->op_def().name();
-      if (node_name != "Placeholder" && node_name != "LegacyFedInput" &&
-          node_name != FunctionLibraryDefinition::kArgOp) {
-        // We do not handle the case where the input node has multiple outputs
-        if (node->num_outputs() > 1) {
-          return errors::FailedPrecondition(absl::StrCat(
-              "Input arrays can only have op with single output. Node op:",
-              node_name));
+    bool node_added_to_shape_refiner = false;
+    auto it = feeds_by_node.find(node->name());
+    if (it != feeds_by_node.end()) {
+      auto op_name = node->op_def().name();
+      if (op_name != "Placeholder" && op_name != "LegacyFedInput" &&
+          op_name != FunctionLibraryDefinition::kArgOp) {
+        for (const auto& output_tensor : it->second) {
+          const int index = output_tensor.first;
+          const ArrayInfo& array_info = output_tensor.second->second;
+
+          DataType dtype = array_info.imported_dtype;
+          // Uses the existing output type if it isn't specified by the user.
+          if (dtype == DT_INVALID) {
+            dtype = node->output_type(0);
+          }
+
+          TF_ASSIGN_OR_RETURN(
+              auto placeholder_node_and_removed,
+              CreatePlaceholderNodeForFeed(array_info.shape, dtype, node, index,
+                                           node_name_map));
+
+          Node* placeholder_node = placeholder_node_and_removed.first;
+          if (placeholder_node_and_removed.second) {
+            // Original node has been removed from the graph.
+            node = placeholder_node;
+            node_added_to_shape_refiner = true;
+          }
+          remapped_feeds_[{it->first, index}] = placeholder_node->name();
+          node_name_map[placeholder_node->name()] = placeholder_node;
+          // Add the new placeholder node to the shape refiner.
+          TF_RETURN_WITH_CONTEXT_IF_ERROR(
+              shape_refiner_->AddNode(placeholder_node),
+              GetLocationStr(*placeholder_node));
         }
-        // For single output nodes, replace them with Placeholder node.
-        DataType dtype = it->second.imported_dtype;
-        // Uses the existing output type if it isn't specified by the user.
-        if (dtype == DT_INVALID) {
-          dtype = node->output_type(0);
-        }
-        TF_ASSIGN_OR_RETURN(
-            node, ReplaceWithPlaceholderNode(it->second.shape, dtype, node));
       } else {
-        node->AddAttr("shape", it->second.shape);
-        DataType dtype = it->second.imported_dtype;
+        auto index_it = it->second.find(0);
+        if (index_it == it->second.end()) {
+          return errors::FailedPrecondition(
+              "Missing feed output tensor at index 0 for node '", node->name(),
+              "'");
+        }
+        node->AddAttr("shape", index_it->second->second.shape);
+        DataType dtype = index_it->second->second.imported_dtype;
         // Uses the existing output type if it isn't specified by the user.
         if (dtype == DT_INVALID) {
           dtype = node->output_type(0);
@@ -562,9 +676,11 @@ Status ImporterBase::AddNodesToShapeRefiner() {
         node->AddAttr("dtype", dtype);
       }
     }
-    // Adds the node to the shape refiner.
-    TF_RETURN_WITH_CONTEXT_IF_ERROR(shape_refiner_->AddNode(node),
-                                    GetLocationStr(*node));
+    if (!node_added_to_shape_refiner) {
+      // Add the node to the shape refiner if the node hasn't been removed.
+      TF_RETURN_WITH_CONTEXT_IF_ERROR(shape_refiner_->AddNode(node),
+                                      GetLocationStr(*node));
+    }
 
     auto set_shape_from_list_attr = [&](const AttrValue* attr) {
       auto& list = attr->list();
@@ -625,7 +741,7 @@ Status ImporterBase::AddNodesToShapeRefiner() {
   // Prune nodes in the graph that are not reachable from the output.
   if (specs_.prune_unused_nodes) {
     std::unordered_set<const Node*> prune_start;
-    TF_RETURN_IF_ERROR(GetInputOutputNodes(&prune_start));
+    TF_RETURN_IF_ERROR(GetInputOutputNodes(node_name_map, &prune_start));
     if (!prune_start.empty()) {
       if (PruneForReverseReachability(graph_.get(), prune_start)) {
         VLOG(1) << "Pruned unused nodes in graphdef";
@@ -829,9 +945,11 @@ StatusOr<mlir::Attribute> ImporterBase::ConvertAttributeValue(
       return builder_.getFloatAttr(builder_.getF32Type(), value.f());
     case AttrValue::kB:
       return builder_.getBoolAttr(value.b());
-    case AttrValue::kType:
-      return builder_.getStringAttr(
-          mangling_util::MangleDataType(value.type()));
+    case AttrValue::kType: {
+      mlir::Type type;
+      TF_RETURN_IF_ERROR(ConvertDataType(value.type(), builder_, &type));
+      return mlir::TypeAttr::get(type);
+    }
     case AttrValue::kShape:
       return builder_.getStringAttr(mangling_util::MangleShape(value.shape()));
     case AttrValue::kTensor:
@@ -1106,11 +1224,9 @@ Status ImporterBase::ConvertFunctionArgAndRets(
   builder_.setInsertionPointToEnd(&graph_op.body().front());
   builder_.create<mlir::tf_executor::FetchOp>(graph_op.getLoc(),
                                               inst_to_return);
-  inst_to_return.assign(graph_op.getResults().begin(),
-                        graph_op.getResults().end());
   builder_.setInsertionPointToEnd(bb);
   builder_.create<mlir::ReturnOp>(mlir::UnknownLoc::get(context_),
-                                  inst_to_return);
+                                  graph_op.getResults());
   return Status::OK();
 }
 
@@ -1210,9 +1326,10 @@ std::string ImporterBase::GetLocationStr(const Node& node,
 }
 
 mlir::Operation* ImporterBase::createOperation(
-    const Node& node, llvm::StringRef op_name,
+    const Node& node, llvm::StringRef node_type_name,
     const mlir::OperationState& result,
-    const llvm::SmallVectorImpl<mlir::Value*>& control_operands) {
+    const llvm::SmallVectorImpl<mlir::Value*>& control_operands,
+    bool convert_to_legacy_call) {
   // For the tf.executor specific operations (not wrapped in an island), we
   // have an extra returned value for the control result, and we concatenate
   // control and non-control operands.
@@ -1274,11 +1391,31 @@ mlir::Operation* ImporterBase::createOperation(
   mlir::OpBuilder island_builder(&island.GetBody());
 
   // Create the operation inside the island now.
-  mlir::Operation* inner_op = island_builder.createOperation(result);
+  mlir::Operation* inner_op;
+  if (convert_to_legacy_call) {
+    bool disable_call_shape_inference = false;
+    for (const auto& name_and_value : node.attrs()) {
+      const auto& attr_name = name_and_value.first;
+      const AttrValue& attr_value = name_and_value.second;
+      if (strcmp(attr_name.c_str(),
+                 disable_call_shape_inference_attribute_name) == 0 &&
+          attr_value.value_case() == AttrValue::kB) {
+        disable_call_shape_inference = attr_value.b();
+      }
+    }
+
+    mlir::BoolAttr attribute =
+        builder_.getBoolAttr(disable_call_shape_inference);
+    inner_op = island_builder.create<mlir::TF::LegacyCallOp>(
+        result.location, result.types, result.operands,
+        island_builder.getSymbolRefAttr(node_type_name), attribute);
+  } else {
+    inner_op = island_builder.createOperation(result);
+  }
 
   // Add the terminator for the island
-  mlir::SmallVector<mlir::Value*, 8> ret_vals(inner_op->getResults());
-  island_builder.create<mlir::tf_executor::YieldOp>(result.location, ret_vals);
+  island_builder.create<mlir::tf_executor::YieldOp>(result.location,
+                                                    inner_op->getResults());
   return island.getOperation();
 }
 
@@ -1293,9 +1430,11 @@ Status ImporterBase::ConvertNode(const Node& node) {
   // create the MLIR function and insert it to the module if it doesn't exist.
   std::string node_type_name = node.type_string();
   const auto* func_def = graph_flib_.Find(node_type_name);
+  bool convert_to_legacy_call = false;
   if (func_def) {
     TF_RETURN_IF_ERROR(ConvertLibFunction(node_type_name));
     node_type_name = (*tf_name_to_mlir_name_)[node_type_name];
+    convert_to_legacy_call = true;
   }
 
   auto get_full_op_name = [&](const std::string& op_name) {
@@ -1380,6 +1519,14 @@ Status ImporterBase::ConvertNode(const Node& node) {
   for (const auto& name_and_value : node.attrs()) {
     const auto& attr_name = name_and_value.first;
     const AttrValue& attr_value = name_and_value.second;
+    // LegacyCall can only represent _diable_call_shape_inference attribute.
+    // If a call has other attributes, can't convert it to LegacyCall.
+    if (convert_to_legacy_call &&
+        (strcmp(attr_name.c_str(),
+                disable_call_shape_inference_attribute_name) ||
+         attr_value.value_case() != AttrValue::kB)) {
+      convert_to_legacy_call = false;
+    }
     if (attr_value.value_case() == AttrValue::kFunc) {
       // Attribute iteration order is not defined for protocol buffer Map.
       // Process function attributes separately in the lexicographical order to
@@ -1423,9 +1570,8 @@ Status ImporterBase::ConvertNode(const Node& node) {
   }
 
   // Register the mapping between the TF node and the newly created operation.
-  node_values_[node.id()] =
-      createOperation(node, op_name, result, control_operands);
-
+  node_values_[node.id()] = createOperation(
+      node, node_type_name, result, control_operands, convert_to_legacy_call);
   return Status::OK();
 }
 
@@ -1667,36 +1813,52 @@ StatusOr<mlir::FunctionType> GraphDefImporter::InferMainFunctionType(
     const GraphImportConfig& specs, mlir::MLIRContext* context,
     absl::InlinedVector<OutputTensor, 4>* arg_nodes,
     absl::InlinedVector<OutputTensor, 4>* ret_nodes) {
-  // Finds out all the input nodes and output nodes.
-  absl::flat_hash_set<absl::string_view> output_node_names;
-  for (const auto& output_tensor : specs.outputs) {
-    output_node_names.insert(ParseTensorName(output_tensor).node());
+  // Find all the input nodes and output nodes.
+  // Feeds have been remapped to single output nodes (Placeholder), so an exact
+  // name match is sufficient.
+  absl::flat_hash_map<absl::string_view, int> inputs;
+  for (auto input_and_idx : llvm::enumerate(specs.inputs)) {
+    TensorId tensor = ParseTensorName(input_and_idx.value().first);
+    auto remapped_it = remapped_feeds_.find(tensor);
+    if (remapped_it != remapped_feeds_.end()) {
+      inputs.insert({remapped_it->second, input_and_idx.index()});
+    } else {
+      inputs.insert({tensor.node(), input_and_idx.index()});
+    }
   }
-  if (!specs.inputs.empty() || !specs.outputs.empty()) {
-    arg_nodes->resize(specs.inputs.size());
-    ret_nodes->resize(specs.outputs.size());
+
+  absl::flat_hash_set<absl::string_view> output_node_names;
+  std::vector<TensorId> outputs;
+  output_node_names.reserve(specs.outputs.size());
+  for (const auto& output : specs.outputs) {
+    TensorId tensor = ParseTensorName(output);
+    auto remapped_it = remapped_feeds_.find(tensor);
+    if (remapped_it != remapped_feeds_.end()) {
+      output_node_names.insert(remapped_it->second);
+      outputs.push_back({remapped_it->second, 0});
+    } else {
+      output_node_names.insert(tensor.node());
+      outputs.push_back(tensor);
+    }
+  }
+
+  if (!inputs.empty() || !outputs.empty()) {
+    arg_nodes->resize(inputs.size());
+    ret_nodes->resize(outputs.size());
 
     for (Node* n : GetOrderedNodes()) {
       // Handle inputs/arguments.
-      auto input_it = specs.inputs.find(n->name());
-      if (input_it != specs.inputs.end()) {
-        (*arg_nodes)[std::distance(specs.inputs.begin(), input_it)] = {n, 0};
+      auto input_it = inputs.find(n->name());
+      if (input_it != inputs.end()) {
+        (*arg_nodes)[input_it->second] = {n, 0};
       }
 
       // Handle outputs/returns.
       if (output_node_names.contains(n->name())) {
-        for (int i = 0, e = specs.outputs.size(); i != e; ++i) {
-          std::pair<std::string, std::string> name_and_port =
-              absl::StrSplit(specs.outputs[i], ':');
-          auto name = name_and_port.first;
-          if (name != n->name()) continue;
-          int port = 0;
-          if (!name_and_port.second.empty() &&
-              !absl::SimpleAtoi(name_and_port.second, &port)) {
-            return errors::InvalidArgument("Invalid port specification: ",
-                                           specs.outputs[i]);
-          }
-          (*ret_nodes)[i] = {n, port};
+        for (int i = 0, e = outputs.size(); i != e; ++i) {
+          TensorId tensor = outputs[i];
+          if (n->name() != tensor.node()) continue;
+          (*ret_nodes)[i] = {n, tensor.index()};
         }
       }
     }
@@ -2118,7 +2280,11 @@ class StructuredValueLinearizer {
 
   // Returns the list of index paths to each leaf of the StructuredValue,
   // in a linearized order matching `tf.nest.flatten`.
-  llvm::ArrayRef<mlir::ArrayAttr> GetLeafIndexPaths() const;
+  //
+  // If an error ocurred during the linearization process, an error message with
+  // `error_context` prepended will be included in the returned status.
+  StatusOr<llvm::ArrayRef<mlir::ArrayAttr>> GetLeafIndexPaths(
+      llvm::StringRef error_context) const;
 
  private:
   // Main function that recursively traverses the StructuredValue.
@@ -2130,6 +2296,8 @@ class StructuredValueLinearizer {
   llvm::SmallVector<mlir::Attribute, 4> current_index_path_;
   // The list of leaf index paths we have discovered so far.
   llvm::SmallVector<mlir::ArrayAttr, 4> leaf_index_paths_;
+  // If non-empty, an error message to report.
+  std::string error_message_;
 };
 
 StructuredValueLinearizer::StructuredValueLinearizer(
@@ -2138,9 +2306,19 @@ StructuredValueLinearizer::StructuredValueLinearizer(
   RecursivelyFindLeaves(value);
 }
 
-llvm::ArrayRef<mlir::ArrayAttr> StructuredValueLinearizer::GetLeafIndexPaths()
-    const {
-  return leaf_index_paths_;
+StatusOr<llvm::ArrayRef<mlir::ArrayAttr>>
+StructuredValueLinearizer::GetLeafIndexPaths(
+    llvm::StringRef error_context) const {
+  if (error_message_.empty()) {
+    return llvm::makeArrayRef(leaf_index_paths_);
+  }
+  return errors::InvalidArgument(
+      error_context.str(), error_message_,
+      "This likely means that you have @tf.function "
+      "on an exported function instead of "
+      "@tf.function(input_signature=[...]). Consider annotating an "
+      "input_signature or narrowing your set of "
+      "exported names to not include this function.");
 }
 
 void StructuredValueLinearizer::RecursivelyFindLeaves(
@@ -2196,7 +2374,20 @@ void StructuredValueLinearizer::RecursivelyFindLeaves(
       return;
     }
     default: {
-      llvm_unreachable("Unhandled StructuredValue kind!");
+      llvm::raw_string_ostream os(error_message_);
+      // TODO(silvasean): Use an enumerant name string instead of a number.
+      os << "Unhandled structured value kind " << value.kind_case()
+         << " at index path: <value>";
+      for (auto path_element : current_index_path_) {
+        os << ".";
+        if (auto integer = path_element.dyn_cast<mlir::IntegerAttr>()) {
+          os << integer.getValue();
+        } else {
+          auto str = path_element.cast<mlir::StringAttr>();
+          os << str.getValue();
+        }
+      }
+      os << "\n";
     }
   }
 }
@@ -2290,6 +2481,9 @@ Status CreateSavedModelIR(
       if (object_names.GetExportedNames(node_id).empty()) {
         continue;
       }
+      std::string error_context =
+          "While importing SavedModel function '" +
+          object_names.GetExportedNames(node_id)[0].str() + "': ";
       const SavedFunction& function = object.function();
       auto orig_func = symbol_table.lookup<mlir::FuncOp>(
           tf_name_to_mlir_name.find(function.concrete_functions(0))->second);
@@ -2314,8 +2508,7 @@ Status CreateSavedModelIR(
             /*config=*/builder.getStringAttr(""),
             /*config_proto=*/builder.getStringAttr(""),
             /*executor_type=*/builder.getStringAttr(""));
-        body_builder.create<mlir::ReturnOp>(
-            func.getLoc(), llvm::to_vector<4>(call.getResults()));
+        body_builder.create<mlir::ReturnOp>(func.getLoc(), call.getResults());
       }
       func.setAttr(
           "tf_saved_model.exported_names",
@@ -2338,9 +2531,12 @@ Status CreateSavedModelIR(
 
       int bound_input_base =
           func.getNumArguments() - concrete_function.bound_inputs_size();
-      auto input_index_paths = input_linearizer.GetLeafIndexPaths();
+      TF_ASSIGN_OR_RETURN(auto input_index_paths,
+                          input_linearizer.GetLeafIndexPaths(
+                              error_context + "in input signature: "));
       if (bound_input_base != input_index_paths.size()) {
         return errors::InvalidArgument(
+            error_context,
             "Argument mismatch between concrete function input signature "
             "vs underlying FunctionDef for concrete function '",
             function.concrete_functions(0), "' (", input_index_paths.size(),
@@ -2361,9 +2557,12 @@ Status CreateSavedModelIR(
 
       StructuredValueLinearizer output_linearizer(
           concrete_function.output_signature(), builder.getContext());
-      auto output_index_paths = output_linearizer.GetLeafIndexPaths();
+      TF_ASSIGN_OR_RETURN(auto output_index_paths,
+                          output_linearizer.GetLeafIndexPaths(
+                              error_context + "in output signature: "));
       if (func.getNumResults() != output_index_paths.size()) {
         return errors::InvalidArgument(
+            error_context,
             "Result mismatch between concrete function output signature "
             "vs underlying FunctionDef for concrete function '",
             function.concrete_functions(0), "' (", output_index_paths.size(),
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/tf_functional_to_executor.cc b/tensorflow/compiler/mlir/tensorflow/translate/tf_functional_to_executor.cc
index ff397e4b456..86fbff91db1 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/tf_functional_to_executor.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/tf_functional_to_executor.cc
@@ -67,8 +67,6 @@ void FunctionalToExecutorDialectConversion::runOnFunction() {
     LLVM_DEBUG(llvm::dbgs() << "Expect function to end with return\n");
     return;
   }
-  llvm::SmallVector<Value*, 4> args =
-      llvm::to_vector<4>(return_op.getOperands());
   // Build GraphOp.
   OpBuilder builder(&body, body.begin());
   auto graph_op = builder.create<tf_executor::GraphOp>(
@@ -79,10 +77,10 @@ void FunctionalToExecutorDialectConversion::runOnFunction() {
       loc, getFunction().getType().getResults(),
       tf_executor::ControlType::get(&getContext()), ArrayRef<Value*>());
   // Create Fetch.
-  auto to_fetch = llvm::to_vector<4>(island.getResults());
+  ValueRange to_fetch = island.getResults();
   if (to_fetch.size() != 1) {
     // Drop control result for fetch.
-    to_fetch.pop_back();
+    to_fetch = to_fetch.drop_back();
   }
   builder.create<tf_executor::FetchOp>(loc, to_fetch);
   // Build Island.
@@ -91,7 +89,7 @@ void FunctionalToExecutorDialectConversion::runOnFunction() {
       island.body().front().begin(), body.getOperations(), copy_range.begin(),
       copy_range.end());
   builder.setInsertionPointToEnd(&island.body().front());
-  builder.create<tf_executor::YieldOp>(loc, args);
+  builder.create<tf_executor::YieldOp>(loc, return_op.getOperands());
   for (auto item : llvm::enumerate(graph_op.getResults())) {
     return_op.setOperand(item.index(), item.value());
   }
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc
index a5839cf7645..dc9ec6aa8ea 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc
@@ -15,7 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h"
 
-#include "absl/types/span.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 #include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
 #include "mlir/IR/Function.h"  // TF:local_config_mlir
@@ -58,7 +58,7 @@ Status ParseMlirModule(llvm::StringRef mlir_module_string,
 
 // Converts arg_shapes to xla::Shape's and store into xla_input_shapes.
 Status GetXlaInputShapes(
-    mlir::ModuleOp module, absl::Span<TensorShape> arg_shapes,
+    mlir::ModuleOp module, llvm::ArrayRef<TensorShape> arg_shapes,
     const xla::CustomShapeRepresentationFn shape_representation_fn,
     std::vector<xla::Shape>* xla_input_shapes) {
   xla_input_shapes->clear();
@@ -150,7 +150,8 @@ void GetInputMappingForMlir(int num_inputs, std::vector<int>* input_mapping) {
 }
 
 // Refine MLIR types based on new shape information.
-Status RefineShapes(absl::Span<TensorShape> arg_shapes, mlir::ModuleOp module) {
+Status RefineShapes(llvm::ArrayRef<TensorShape> arg_shapes,
+                    mlir::ModuleOp module) {
   auto versions = module.getAttrOfType<::mlir::DictionaryAttr>("tf.versions");
   if (!versions) {
     return errors::Internal(
@@ -234,7 +235,7 @@ Status ConvertMLIRToXlaComputation(mlir::ModuleOp module_op,
 }
 
 Status CompileSerializedMlirToXlaHlo(
-    llvm::StringRef mlir_module_string, absl::Span<TensorShape> arg_shapes,
+    llvm::StringRef mlir_module_string, llvm::ArrayRef<TensorShape> arg_shapes,
     const XlaCompiler::ShapeRepresentationFn shape_representation_fn,
     XlaCompiler::CompilationResult* compilation_result) {
   mlir::MLIRContext mlir_context;
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h
index 635c1d67f82..a07927ce432 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h
+++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.h
@@ -16,7 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_UTILS_COMPILE_MLIR_UTIL_H_
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_UTILS_COMPILE_MLIR_UTIL_H_
 
-#include "absl/types/span.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 #include "mlir/IR/Module.h"  // TF:local_config_mlir
 #include "tensorflow/compiler/tf2xla/xla_compiler.h"
@@ -40,7 +40,7 @@ Status ConvertMLIRToXlaComputation(mlir::ModuleOp module_op,
 // Compiles a serialized MLIR module into XLA HLO, generates all accompanying
 // metadata and stores them in CompilationResult.
 Status CompileSerializedMlirToXlaHlo(
-    llvm::StringRef mlir_module_string, absl::Span<TensorShape> arg_shapes,
+    llvm::StringRef mlir_module_string, llvm::ArrayRef<TensorShape> arg_shapes,
     const XlaCompiler::ShapeRepresentationFn shape_representation_fn,
     XlaCompiler::CompilationResult* compilation_result);
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc
index 3574b336f9a..1668cf615f0 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util_test.cc
@@ -41,9 +41,9 @@ TEST(CompileSerializedMlirToXlaHloTest, InvalidSerializedMlirModule) {
   std::vector<TensorShape> arg_shapes;
   XlaCompiler::CompilationResult compilation_result;
 
-  Status s = CompileSerializedMlirToXlaHlo(
-      invalid_mlir_module, absl::Span<TensorShape>(arg_shapes),
-      TestShapeRepresentation, &compilation_result);
+  Status s = CompileSerializedMlirToXlaHlo(invalid_mlir_module, arg_shapes,
+                                           TestShapeRepresentation,
+                                           &compilation_result);
   EXPECT_EQ(s.code(), tensorflow::errors::Code::INVALID_ARGUMENT);
 }
 
@@ -61,8 +61,7 @@ TEST(CompileSerializedMlirToXlaHloTest, Success) {
   XlaCompiler::CompilationResult compilation_result;
 
   Status s = CompileSerializedMlirToXlaHlo(
-      mlir_module, absl::Span<TensorShape>(arg_shapes), TestShapeRepresentation,
-      &compilation_result);
+      mlir_module, arg_shapes, TestShapeRepresentation, &compilation_result);
   ASSERT_TRUE(s.ok());
 
   const xla::HloModuleConfig module_config(
@@ -134,8 +133,7 @@ TEST(CompileSerializedMlirToXlaHloTest, CompileTimeConstantFoldedSuccess) {
   XlaCompiler::CompilationResult compilation_result;
 
   Status s = CompileSerializedMlirToXlaHlo(
-      mlir_module, absl::Span<TensorShape>(arg_shapes), TestShapeRepresentation,
-      &compilation_result);
+      mlir_module, arg_shapes, TestShapeRepresentation, &compilation_result);
   ASSERT_TRUE(s.ok());
 
   const xla::HloModuleConfig module_config(
@@ -174,8 +172,7 @@ TEST(CompileSerializedMlirToXlaHloTest, ShapeInference) {
   XlaCompiler::CompilationResult compilation_result;
 
   Status s = CompileSerializedMlirToXlaHlo(
-      mlir_module, absl::Span<TensorShape>(arg_shapes), TestShapeRepresentation,
-      &compilation_result);
+      mlir_module, arg_shapes, TestShapeRepresentation, &compilation_result);
   TF_ASSERT_OK(s);
 
   const xla::HloModuleConfig module_config(
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/error_util.h b/tensorflow/compiler/mlir/tensorflow/utils/error_util.h
index 198d04e0486..a60d90cbfb7 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/error_util.h
+++ b/tensorflow/compiler/mlir/tensorflow/utils/error_util.h
@@ -22,13 +22,11 @@ limitations under the License.
 #include "mlir/IR/Location.h"  // TF:local_config_mlir
 #include "mlir/IR/MLIRContext.h"  // TF:local_config_mlir
 #include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/stream_executor/lib/statusor.h"
 
 // Error utilities for MLIR when interacting with code using Status returns.
 namespace mlir {
 
 // TensorFlow's Status is used for error reporting back to callers.
-using stream_executor::port::StatusOr;
 using tensorflow::Status;
 
 // Diagnostic handler that collects all the diagnostics reported and can produce
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc
index 69b309f0632..e35b7130de8 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.cc
@@ -34,6 +34,7 @@ limitations under the License.
 #include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
 #include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
 #include "mlir/Support/DebugStringHelper.h"  // TF:local_config_mlir
+#include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_type.h"
@@ -253,21 +254,30 @@ StatusOr<std::unique_ptr<NodeDef>> GetOperationNodeDef(
   // Note: we do not use NodeBuilder or NodeDefBuilder as that would require
   // mapping back from the inputs to the input arguments.
 
-  // Some control flow ops in TensorFlow Graph have their respective "Ref" ops
-  // as well. For example there is Enter and RefEnter op. RefEnter forwards
-  // the input ref buffer to output. However both Enter and RefEnter are
-  // mapped to tf_executor::EnterOp during import and then to _tf.Enter op in
-  // control dialect. Check if it is a Ref op to correctly map to the TensorFlow
-  // Graph op.
   llvm::SmallString<64> op_name;
-  if (IsRefTypeControlOp(inst)) op_name = "Ref";
-
-  TF_ASSIGN_OR_RETURN(auto tf_name,
-                      GetTensorFlowOpName(inst->getName().getStringRef()));
-  op_name.append(tf_name);
+  if (IsLegacyCallInstruction(inst)) {
+    // The op_name is the name of the function.
+    op_name.append(
+        inst->getAttrOfType<mlir::SymbolRefAttr>("f").getLeafReference());
+    // Remove the attribute from the instruction as it is already converted to
+    // op_name.
+    auto attr_id = mlir::Identifier::get("f", inst->getContext());
+    inst->removeAttr(attr_id);
+  } else {
+    // Some control flow ops in TensorFlow Graph have their respective "Ref" ops
+    // as well. For example there is Enter and RefEnter op. RefEnter forwards
+    // the input ref buffer to output. However both Enter and RefEnter are
+    // mapped to tf_executor::EnterOp during import and then to _tf.Enter op in
+    // control dialect. Check if it is a Ref op to correctly map to the
+    // TensorFlow Graph op.
+    if (IsRefTypeControlOp(inst)) op_name = "Ref";
+    TF_ASSIGN_OR_RETURN(auto tf_name,
+                        GetTensorFlowOpName(inst->getName().getStringRef()));
+    op_name.append(tf_name);
+  }
 
+  node_def->set_name(name.str());
   node_def->set_op(op_name.str());
-  node_def->set_name(name);
 
   // Add inputs to the NodeDef based on the number of operands. This is required
   // as later when edges are added to the Node using Graph::AddEdge the
@@ -454,4 +464,9 @@ Status SetSizeAttribute(absl::string_view name, size_t size,
   return Status::OK();
 }
 
+bool IsLegacyCallInstruction(mlir::Operation* inst) {
+  return llvm::dyn_cast<mlir::TF::LegacyCallOp>(inst) ||
+         inst->getName().getStringRef().compare("_tf.LegacyCall") == 0;
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.h b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.h
index 8d813b53bd8..df176762c07 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/export_utils.h
+++ b/tensorflow/compiler/mlir/tensorflow/utils/export_utils.h
@@ -73,5 +73,16 @@ Status SetShapeAttribute(absl::string_view name, mlir::ShapedType shape,
 // If the attribute already exists with a different value, returns an error.
 Status SetSizeAttribute(absl::string_view name, size_t size,
                         AttrValueMap* values);
+
+// Returns true if the given instruction is an mlir::TF::LegacyCallOp or the
+// result of such an operation transformed by the
+// ExecutorToControlDialectConversion pass.
+//
+// TODO(b/145706023): When the ExecutorToControlDialectConversion pass runs
+// before the exporter, it mutates an mlir::TF::LegacyCallOp instruction to
+// an instruction with a different operation name. As such, this routine checks
+// both forms of a LegacyCall instruction. We only need to check for
+// mlir::TF::LegacyCallOp when the ticket is resolved.
+bool IsLegacyCallInstruction(mlir::Operation* inst);
 }  // namespace tensorflow
 #endif  // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_UTILS_EXPORTER_UTILS_H_
diff --git a/tensorflow/compiler/mlir/xla/BUILD b/tensorflow/compiler/mlir/xla/BUILD
index ac3475cebc4..bf71bcda776 100644
--- a/tensorflow/compiler/mlir/xla/BUILD
+++ b/tensorflow/compiler/mlir/xla/BUILD
@@ -23,6 +23,8 @@ package_group(
     ],
 )
 
+exports_files(["ir/hlo_ops.td"])
+
 filegroup(
     name = "hlo_ops_td_files",
     srcs = [
@@ -406,6 +408,7 @@ cc_library(
         "//tensorflow/compiler/xla/client:xla_builder",
         "//tensorflow/compiler/xla/client/lib:matrix",
         "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/stream_executor/lib",
         "@llvm//:support",
         "@local_config_mlir//:Analysis",
         "@local_config_mlir//:IR",
diff --git a/tensorflow/compiler/mlir/xla/hlo_function_importer.cc b/tensorflow/compiler/mlir/xla/hlo_function_importer.cc
index 7c95a13285b..1da4fd04ffb 100644
--- a/tensorflow/compiler/mlir/xla/hlo_function_importer.cc
+++ b/tensorflow/compiler/mlir/xla/hlo_function_importer.cc
@@ -182,13 +182,12 @@ tensorflow::Status HloFunctionImporter::ImportInstructions(
   // Setup the return type (HLO only supports a single return value).
   TF_ASSIGN_OR_RETURN(auto result,
                       GetMlirValue(computation->root_instruction()));
-  llvm::SmallVector<Value*, 1> return_values({result});
 
   // Create terminator op depending on the parent op of this region.
   if (llvm::isa<FuncOp>(block->getParentOp())) {
-    builder.create<mlir::ReturnOp>(loc, makeArrayRef(return_values));
+    builder.create<mlir::ReturnOp>(loc, result);
   } else {
-    builder.create<mlir::xla_hlo::ReturnOp>(loc, makeArrayRef(return_values));
+    builder.create<mlir::xla_hlo::ReturnOp>(loc, result);
   }
   return tensorflow::Status::OK();
 }
@@ -266,32 +265,20 @@ StatusOr<mlir::Operation*> HloFunctionImporter::ImportInstruction(
       MakeAndReturn(CompareOp);
     }
     case HloOpcode::kGather: {
-      const auto& gather_dimensions = instruction->gather_dimension_numbers();
-      std::vector<int64_t> offset_dims(gather_dimensions.offset_dims().begin(),
-                                       gather_dimensions.offset_dims().end());
+      auto gather_instruction = static_cast<HloGatherInstruction*>(instruction);
+      attributes.push_back(ConvertGatherDimensionNumbers(
+          gather_instruction->gather_dimension_numbers()));
 
       std::vector<int64_t> slice_sizes(
-          instruction->gather_slice_sizes().begin(),
-          instruction->gather_slice_sizes().end());
+          gather_instruction->gather_slice_sizes().begin(),
+          gather_instruction->gather_slice_sizes().end());
+      attributes.push_back(
+          builder_->getNamedAttr("slice_sizes", Convert(slice_sizes)));
+      attributes.push_back(builder_->getNamedAttr(
+          "indices_are_sorted",
+          builder_->getBoolAttr(gather_instruction->indices_are_sorted())));
 
-      std::vector<int64_t> collapsed_slice_dims(
-          gather_dimensions.collapsed_slice_dims().begin(),
-          gather_dimensions.collapsed_slice_dims().end());
-
-      std::vector<int64_t> start_index_map(
-          gather_dimensions.start_index_map().begin(),
-          gather_dimensions.start_index_map().end());
-
-      // TODO(b/132057942): Change to explicitly passing an integer instead of
-      // call getI64IntegerAttr here.
-      return func_builder
-          ->create<mlir::xla_hlo::GatherOp>(
-              loc, result_type, operands[0], operands[1],
-              func_builder->getI64IntegerAttr(
-                  gather_dimensions.index_vector_dim()),
-              Convert(offset_dims), Convert(slice_sizes),
-              Convert(collapsed_slice_dims), Convert(start_index_map))
-          .getOperation();
+      MakeAndReturn(GatherOp);
     }
     case HloOpcode::kDynamicUpdateSlice: {
       return func_builder
@@ -707,4 +694,19 @@ mlir::NamedAttribute HloFunctionImporter::ConvertConvDimensionNumbers(
   return builder_->getNamedAttr("dimension_numbers", attr);
 }
 
+mlir::NamedAttribute HloFunctionImporter::ConvertGatherDimensionNumbers(
+    const xla::GatherDimensionNumbers& dnums) {
+  std::vector<int64_t> offset_dims(dnums.offset_dims().begin(),
+                                   dnums.offset_dims().end());
+  std::vector<int64_t> collapsed_slice_dims(
+      dnums.collapsed_slice_dims().begin(), dnums.collapsed_slice_dims().end());
+  std::vector<int64_t> start_index_map(dnums.start_index_map().begin(),
+                                       dnums.start_index_map().end());
+  auto attr = mlir::xla_hlo::GatherDimensionNumbers::get(
+      Convert(offset_dims), Convert(collapsed_slice_dims),
+      Convert(start_index_map),
+      builder_->getI64IntegerAttr(dnums.index_vector_dim()), context_);
+  return builder_->getNamedAttr("dimension_numbers", attr);
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/mlir/xla/hlo_function_importer.h b/tensorflow/compiler/mlir/xla/hlo_function_importer.h
index 11a6b1c7dd5..bd36c9b2b54 100644
--- a/tensorflow/compiler/mlir/xla/hlo_function_importer.h
+++ b/tensorflow/compiler/mlir/xla/hlo_function_importer.h
@@ -117,6 +117,10 @@ class HloFunctionImporter {
   mlir::NamedAttribute ConvertConvDimensionNumbers(
       const xla::ConvolutionDimensionNumbers& dnums);
 
+  // Converts the gather dimensions to attributes.
+  mlir::NamedAttribute ConvertGatherDimensionNumbers(
+      const xla::GatherDimensionNumbers& dnums);
+
   mlir::MLIRContext* context_;
   mlir::ModuleOp module_;
   mlir::Builder* builder_;
diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_ops.cc b/tensorflow/compiler/mlir/xla/ir/hlo_ops.cc
index b2f02bdf76f..08967372bcb 100644
--- a/tensorflow/compiler/mlir/xla/ir/hlo_ops.cc
+++ b/tensorflow/compiler/mlir/xla/ir/hlo_ops.cc
@@ -606,7 +606,7 @@ static TensorType GetReduceResultType(Type operand_ty,
 }
 
 void ReduceOp::build(Builder* builder, OperationState& state,
-                     ArrayRef<Value*> operands, ArrayRef<Value*> init_values,
+                     ValueRange operands, ValueRange init_values,
                      DenseIntElementsAttr dimensions) {
   SmallVector<Type, 1> result_ty;
   result_ty.reserve(operands.size());
@@ -845,9 +845,8 @@ Type SliceOp::InferOutputTypes(Builder* builder, Value* operand,
 // SortOp
 //===----------------------------------------------------------------------===//
 
-void SortOp::build(Builder* builder, OperationState& state,
-                   ArrayRef<Value*> operands, int64_t dimension,
-                   bool is_stable) {
+void SortOp::build(Builder* builder, OperationState& state, ValueRange operands,
+                   int64_t dimension, bool is_stable) {
   state.addOperands(operands);
   state.addAttribute("dimension", builder->getI64IntegerAttr(dimension));
   state.addAttribute("is_stable", builder->getBoolAttr(dimension));
@@ -990,7 +989,7 @@ void GetTupleElementOp::build(Builder* builder, OperationState& result,
 //===----------------------------------------------------------------------===//
 
 void TupleOp::build(Builder* builder, OperationState& result,
-                    ArrayRef<Value*> values) {
+                    ValueRange values) {
   SmallVector<Type, 4> types;
   types.reserve(values.size());
   for (auto val : values) {
diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_ops.td b/tensorflow/compiler/mlir/xla/ir/hlo_ops.td
index c9b3e7985fc..3c4fd473eb6 100644
--- a/tensorflow/compiler/mlir/xla/ir/hlo_ops.td
+++ b/tensorflow/compiler/mlir/xla/ir/hlo_ops.td
@@ -405,8 +405,8 @@ def HLO_ReduceOp: HLO_Op<"reduce", [
   let results = (outs Variadic<HLO_TensorOrTuple>);
 
   let builders = [OpBuilder<
-    "Builder *, OperationState &state, ArrayRef<Value *> operands, "
-    "ArrayRef<Value *> init_values, DenseIntElementsAttr dimensions"
+    "Builder *, OperationState &state, ValueRange operands, "
+    "ValueRange init_values, DenseIntElementsAttr dimensions"
   >];
 
   let hasFolder = 1;
@@ -445,7 +445,7 @@ def HLO_TupleOp : HLO_Op<"tuple", [NoSideEffect]>, BASE_HLO_TupleOp {
 
   let builders = [OpBuilder<
                   "Builder *builder, OperationState &results, "
-                  "ArrayRef<Value*> values">];
+                  "ValueRange values">];
 
   // TupleOp has special conversion logic to HLO.
   let hasCustomHLOConverter = 1;
@@ -777,21 +777,25 @@ def HLO_FftOp: HLO_Op<"fft", [NoSideEffect]>, BASE_HLO_FftOp {
   let hasCustomHLOConverter = 1;
 }
 
+def GatherDimensionNumbers : StructAttr<"GatherDimensionNumbers", HLO_Dialect,
+      [StructFieldAttr<"offset_dims", I64ElementsAttr>,
+      StructFieldAttr<"collapsed_slice_dims", I64ElementsAttr>,
+      StructFieldAttr<"start_index_map", I64ElementsAttr>,
+      StructFieldAttr<"index_vector_dim", I64Attr>]> {
+  let description = "Structure of dimension information for gather";
+}
+
 def HLO_GatherOp: HLO_Op<"gather", [NoSideEffect]>, BASE_HLO_GatherOp {
   let arguments = (ins
     HLO_Tensor:$operand,
     HLO_IntTensor:$start_indices,
-    I64Attr:$index_vector_dim,
-    I64ElementsAttr:$offset_dims,
+    GatherDimensionNumbers:$dimension_numbers,
     I64ElementsAttr:$slice_sizes,
-    I64ElementsAttr:$collapsed_slice_dims,
-    I64ElementsAttr:$start_index_map
+    DefaultValuedAttr<BoolAttr, "false">:$indices_are_sorted
   );
 
   let results = (outs HLO_Tensor);
 
-  // TODO(b/129422361) Attributes are not supported by the codegen. The
-  // optional argument (dimensions) needs to be added as an attribute.
   let hasCustomHLOConverter = 1;
 }
 
@@ -880,7 +884,7 @@ def HLO_SortOp : HLO_Op<"sort", [NoSideEffect]>, BASE_HLO_SortOp {
   let regions = (region SizedRegion<1>:$comparator);
 
   let builders = [OpBuilder<
-    "Builder *builder, OperationState &state, ArrayRef<Value *> operands, "
+    "Builder *builder, OperationState &state, ValueRange operands, "
     "int64_t dimension, bool is_stable"
   >];
 
diff --git a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc
index e9bf3bac44b..26cd512aa85 100644
--- a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc
+++ b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc
@@ -40,7 +40,9 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/stream_executor/lib/statusor.h"
 
+using ::stream_executor::port::StatusOr;
 using ::tensorflow::int16;
 using ::tensorflow::int32;
 using ::tensorflow::int64;
@@ -149,6 +151,7 @@ I64_ELEMENTS_ATTR_TO_VECTOR(permutation);
 I64_ELEMENTS_ATTR_TO_VECTOR(start_indices);
 I64_ELEMENTS_ATTR_TO_VECTOR(limit_indices);
 I64_ELEMENTS_ATTR_TO_VECTOR(strides);
+I64_ELEMENTS_ATTR_TO_VECTOR(slice_sizes);
 
 #undef I64_ELEMENTS_ATTR_TO_VECTOR
 
@@ -267,6 +270,30 @@ static xla::ComparisonDirection Convert_comparison_direction(
       .ValueOrDie();
 }
 
+static xla::GatherDimensionNumbers Convert_gather_dimension_numbers(
+    mlir::xla_hlo::GatherDimensionNumbers input) {
+  xla::GatherDimensionNumbers output;
+
+  auto offset_dims = ConvertDenseIntAttr(input.offset_dims());
+  std::copy(offset_dims.begin(), offset_dims.end(),
+            tensorflow::protobuf::RepeatedFieldBackInserter(
+                output.mutable_offset_dims()));
+
+  auto collapsed_slice_dims = ConvertDenseIntAttr(input.collapsed_slice_dims());
+  std::copy(collapsed_slice_dims.begin(), collapsed_slice_dims.end(),
+            tensorflow::protobuf::RepeatedFieldBackInserter(
+                output.mutable_collapsed_slice_dims()));
+
+  auto start_index_map = ConvertDenseIntAttr(input.start_index_map());
+  std::copy(start_index_map.begin(), start_index_map.end(),
+            tensorflow::protobuf::RepeatedFieldBackInserter(
+                output.mutable_start_index_map()));
+
+  output.set_index_vector_dim(
+      ConvertAPInt(input.index_vector_dim().getValue()));
+  return output;
+}
+
 static xla::ScatterDimensionNumbers Convert_scatter_dimension_numbers(
     mlir::xla_hlo::ScatterDimensionNumbers input) {
   xla::ScatterDimensionNumbers output;
@@ -496,7 +523,13 @@ LogicalResult ExportXlaOp(DynamicUpdateSliceOp op, OpLoweringContext ctx) {
 LogicalResult ExportXlaOp(FftOp op, OpLoweringContext ctx) { return failure(); }
 
 LogicalResult ExportXlaOp(GatherOp op, OpLoweringContext ctx) {
-  return failure();
+  auto& value_map = *ctx.values;
+  xla::GatherDimensionNumbers dimension_numbers =
+      Convert_gather_dimension_numbers(op.dimension_numbers());
+  value_map[op] = xla::Gather(
+      value_map[op.operand()], value_map[op.start_indices()], dimension_numbers,
+      Convert_slice_sizes(op.slice_sizes()), op.indices_are_sorted());
+  return success();
 }
 
 LogicalResult ExportXlaOp(IotaOp op, OpLoweringContext ctx) {
diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir
index 8aa9b5ef101..c95b2c86960 100644
--- a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir
@@ -25,18 +25,25 @@ func @fusedBatchNorm_training(%arg0: tensor<8x8x8x8xf32>, %arg1: tensor<8xf32>,
 
 // CHECK-LABEL: func @biasAdd_NHWC
 func @biasAdd_NHWC(%arg0: tensor<1x32x10x32xi32>, %arg1: tensor<32xi32>) -> tensor<1x32x10x32xi32> {
-  // CHECK-NEXT: %0 = "xla_hlo.add"(%arg0, %arg1) {broadcast_dimensions = dense<3> : tensor<1xi64>}
+  // CHECK: "xla_hlo.add"(%arg0, %arg1) {broadcast_dimensions = dense<3> : tensor<1xi64>}
   %0 = "tf.BiasAdd"(%arg0, %arg1) {T = "tfdtype$DT_FLOAT", data_format = "NHWC"} : (tensor<1x32x10x32xi32>, tensor<32xi32>) -> tensor<1x32x10x32xi32>
   return %0 : tensor<1x32x10x32xi32>
 }
 
 // CHECK-LABEL: func @biasAdd_NCHW
 func @biasAdd_NCHW(%arg0: tensor<1x32x10x32xi32>, %arg1: tensor<32xi32>) -> tensor<1x32x10x32xi32> {
-  // CHECK-NEXT: %0 = "xla_hlo.add"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>}
+  // CHECK: "xla_hlo.add"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>}
   %0 = "tf.BiasAdd"(%arg0, %arg1) {T = "tfdtype$DT_FLOAT", data_format = "NCHW"} : (tensor<1x32x10x32xi32>, tensor<32xi32>) -> tensor<1x32x10x32xi32>
   return %0 : tensor<1x32x10x32xi32>
 }
 
+// CHECK-LABEL: func @biasAdd_dynamic
+func @biasAdd_dynamic(%arg0: tensor<?x?x?x?xi32>, %arg1: tensor<?xi32>) -> tensor<?x?x?x?xi32> {
+  // CHECK: "xla_hlo.add"(%arg0, %arg1) {broadcast_dimensions = dense<1> : tensor<1xi64>}
+  %0 = "tf.BiasAdd"(%arg0, %arg1) {data_format = "NCHW"} : (tensor<?x?x?x?xi32>, tensor<?xi32>) -> tensor<?x?x?x?xi32>
+  return %0 : tensor<?x?x?x?xi32>
+}
+
 //===----------------------------------------------------------------------===//
 // Binary op legalizations.
 //===----------------------------------------------------------------------===//
@@ -666,11 +673,18 @@ func @preventgradient(%arg0: tensor<1xi32>) -> tensor<1xi32> {
 
 // CHECK-LABEL: @const
 func @const() -> tensor<2xi32> {
-  // CHECK-NEXT: xla_hlo.constant dense<0> : tensor<2xi32>
+  // CHECK: xla_hlo.constant dense<0> : tensor<2xi32>
   %0 = "tf.Const"() {device = "", name = "", dtype = "tfdtype$DT_INT32", value = dense<0> : tensor<2xi32>} : () -> (tensor<2xi32>)
   return %0: tensor<2xi32>
 }
 
+// CHECK-LABEL: @const_dynamic_output
+func @const_dynamic_output() -> tensor<*xi32> {
+  // CHECK: xla_hlo.constant {value = dense<0> : tensor<2xi32>} : tensor<*xi32>
+  %0 = "tf.Const"() {value = dense<0> : tensor<2xi32>} : () -> (tensor<*xi32>)
+  return %0: tensor<*xi32>
+}
+
 // CHECK-LABEL: @opaque_const
 func @opaque_const() -> tensor<!tf.variant<tensor<2xi32>>> {
   // CHECK-NOT: xla_hlo.constant
@@ -838,13 +852,14 @@ func @relu6(%arg0: tensor<1xi32>) -> tensor<1xi32> {
 }
 
 // CHECK-LABEL: func @relu_grad
-// CHECK-SAME: (%[[GRADIENTS:.*]]: tensor<4x8xf32>, %[[FEATURES:.*]]: tensor<4x8xf32>)
-func @relu_grad(%gradients: tensor<4x8xf32>, %features: tensor<4x8xf32>) -> tensor<4x8xf32> {
-  // CHECK: %[[ZERO:.*]] = xla_hlo.constant dense<0.000000e+00> : tensor<4x8xf32>
-  // CHECK: %[[PRED:.*]] = "xla_hlo.compare"(%[[FEATURES]], %[[ZERO]]) {comparison_direction = "GT"} : (tensor<4x8xf32>, tensor<4x8xf32>) -> tensor<4x8xi1>
-  // CHECK: %[[RESULT:.*]] = "xla_hlo.select"(%[[PRED]], %[[GRADIENTS]], %[[ZERO]]) : (tensor<4x8xi1>, tensor<4x8xf32>, tensor<4x8xf32>) -> tensor<4x8xf32>
-  // CHECK: return %[[RESULT]] : tensor<4x8xf32>
-  %2 = "tf.ReluGrad"(%gradients, %features) : (tensor<4x8xf32>, tensor<4x8xf32>) -> tensor<4x8xf32>
+// CHECK-SAME: (%[[GRADIENTS:.*]]: tensor<4x8xf32>, %[[FEATURES:.*]]: tensor<?x?xf32>)
+func @relu_grad(%gradients: tensor<4x8xf32>, %features: tensor<?x?xf32>) -> tensor<4x8xf32> {
+  // CHECK-DAG: %[[ZERO_SCALAR:.*]] = xla_hlo.constant dense<0.000000e+00> : tensor<f32>
+  // CHECK-DAG: %[[ZERO:.*]] = xla_hlo.constant dense<0.000000e+00> : tensor<4x8xf32>
+  // CHECK-DAG: %[[PRED:.*]] = "xla_hlo.compare"(%[[FEATURES]], %[[ZERO_SCALAR]]) {comparison_direction = "GT"} : (tensor<?x?xf32>, tensor<f32>) -> tensor<*xi1>
+  // CHECK-DAG: %[[RESULT:.*]] = "xla_hlo.select"(%[[PRED]], %[[GRADIENTS]], %[[ZERO]]) : (tensor<*xi1>, tensor<4x8xf32>, tensor<4x8xf32>) -> tensor<4x8xf32>
+  // CHECK-DAG: return %[[RESULT]] : tensor<4x8xf32>
+  %2 = "tf.ReluGrad"(%gradients, %features) : (tensor<4x8xf32>, tensor<?x?xf32>) -> tensor<4x8xf32>
   return %2 : tensor<4x8xf32>
 }
 
@@ -1019,6 +1034,14 @@ func @transpose_2d(%arg0: tensor<2x3xf32>) -> tensor<3x2xf32> {
   return %0 : tensor<3x2xf32>
 }
 
+// CHECK-LABEL: @transpose_3d_int32
+func @transpose_3d_int32(%arg0: tensor<1x2x3xf32>) -> tensor<3x2x1xf32> {
+  %permutation = "tf.Const"() {value = dense<[2, 1, 0]> : tensor<3xi32>} : () -> (tensor<3xi32>)
+  // CHECK: "xla_hlo.transpose"
+  %0 = "tf.Transpose"(%arg0, %permutation) : (tensor<1x2x3xf32>, tensor<3xi32>) -> tensor<3x2x1xf32>
+  return %0 : tensor<3x2x1xf32>
+}
+
 // CHECK-LABEL: @transpose_3d
 func @transpose_3d(%arg0: tensor<1x2x3xf32>) -> tensor<3x2x1xf32> {
   %permutation = "tf.Const"() {value = dense<[2, 1, 0]> : tensor<3xi64>} : () -> (tensor<3xi64>)
@@ -1344,35 +1367,42 @@ func @tanh_unranked(%arg0: tensor<*xf32>) -> tensor<*xf32> {
 
 // CHECK-LABEL: reshape
 func @reshape(%arg0: tensor<2xf32>, %arg1: tensor<2xi32>) -> tensor<1x1xf32> {
-  // CHECK:  %0 = "xla_hlo.reshape"(%arg0) : (tensor<2xf32>) -> tensor<1x1xf32>
+  // CHECK:  "xla_hlo.reshape"
   %0 = "tf.Reshape"(%arg0, %arg1) : (tensor<2xf32>, tensor<2xi32>) -> tensor<1x1xf32>
   return %0 : tensor<1x1xf32>
 }
 
 // CHECK-LABEL: reshape_dynamic
-func @reshape_dynamic(%arg0: tensor<*xf32>, %arg1: tensor<2xi32>) -> tensor<?x?xf32> {
-  // CHECK:  %0 = "tf.Reshape"(%arg0, %arg1) : (tensor<*xf32>, tensor<2xi32>) -> tensor<?x?xf32>
+func @reshape_dynamic(%arg0: tensor<?xf32>, %arg1: tensor<2xi32>) -> tensor<1x1xf32> {
+  // CHECK:  "xla_hlo.reshape"
+  %0 = "tf.Reshape"(%arg0, %arg1) : (tensor<?xf32>, tensor<2xi32>) -> tensor<1x1xf32>
+  return %0 : tensor<1x1xf32>
+}
+
+// CHECK-LABEL: reshape_unranked
+func @reshape_unranked(%arg0: tensor<*xf32>, %arg1: tensor<2xi32>) -> tensor<?x?xf32> {
+  // CHECK:  "tf.Reshape"
   %0 = "tf.Reshape"(%arg0, %arg1) : (tensor<*xf32>, tensor<2xi32>) -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
 
 // CHECK-LABEL: squeeze
 func @squeeze(%arg0: tensor<1x1x10xf32>) -> tensor<1x10xf32> {
-  // CHECK-NEXT: %0 = "xla_hlo.reshape"(%arg0) : (tensor<1x1x10xf32>) -> tensor<1x10xf32>
+  // CHECK: "xla_hlo.reshape"
   %0 = "tf.Squeeze"(%arg0) : (tensor<1x1x10xf32>) -> tensor<1x10xf32>
   return %0 : tensor<1x10xf32>
 }
 
 // CHECK-LABEL: squeeze_dynamic
 func @squeeze_dynamic(%arg0: tensor<?x10xf32>) -> tensor<*xf32> {
-  // CHECK-NEXT: %0 = "tf.Squeeze"(%arg0) : (tensor<?x10xf32>) -> tensor<*xf32>
+  // CHECK: "tf.Squeeze"
   %0 = "tf.Squeeze"(%arg0) : (tensor<?x10xf32>) -> tensor<*xf32>
   return %0 : tensor<*xf32>
 }
 
 // CHECK-LABEL: expand_dims
 func @expand_dims(%arg0: tensor<2xf32>, %axis: tensor<i32>) -> tensor<1x2xf32> {
-  // CHECK: "xla_hlo.reshape"{{.*}} : (tensor<2xf32>) -> tensor<1x2xf32>
+  // CHECK: "xla_hlo.reshape"
   %0 = "tf.ExpandDims"(%arg0, %axis) : (tensor<2xf32>, tensor<i32>) -> tensor<1x2xf32>
   return %0 : tensor<1x2xf32>
 }
@@ -1380,7 +1410,8 @@ func @expand_dims(%arg0: tensor<2xf32>, %axis: tensor<i32>) -> tensor<1x2xf32> {
 // CHECK-LABEL: slice_constant_start
 func @slice_constant_start(%arg0: tensor<4xi32>) -> tensor<2xi32> {
   // CHECK: %[[START:.*]] = xla_hlo.constant dense<1> : tensor<1xi64>
-  // CHECK: %[[RESULT:.*]] =  "xla_hlo.dynamic-slice"(%arg0, %[[START]]) {slice_sizes = dense<2> : tensor<1xi64>} : (tensor<4xi32>, tensor<1xi64>) -> tensor<2xi32>
+  // CHECK: %[[START_I64:.*]] = "xla_hlo.convert"(%[[START]]) : (tensor<1xi64>) -> tensor<1xi64>
+  // CHECK: %[[RESULT:.*]] =  "xla_hlo.dynamic-slice"(%arg0, %[[START_I64]]) {slice_sizes = dense<2> : tensor<1xi64>} : (tensor<4xi32>, tensor<1xi64>) -> tensor<2xi32>
   // CHECK: return %[[RESULT]] : tensor<2xi32>
   %starts = "tf.Const"() {value = dense<[1]> : tensor<1xi64>} : () -> (tensor<1xi64>)
   %sizes = "tf.Const"() {value = dense<[2]> : tensor<1xi64>} : () -> (tensor<1xi64>)
@@ -1388,10 +1419,22 @@ func @slice_constant_start(%arg0: tensor<4xi32>) -> tensor<2xi32> {
   return %0 : tensor<2xi32>
 }
 
+// CHECK-LABEL: slice_i32_consts
+func @slice_i32_consts(%arg0: tensor<4xi32>) -> tensor<2xi32> {
+  // CHECK: %[[START:.*]] = xla_hlo.constant dense<1> : tensor<1xi32>
+  // CHECK: %[[START_I64:.*]] = "xla_hlo.convert"(%[[START]]) : (tensor<1xi32>) -> tensor<1xi64>
+  // CHECK: slice_sizes = dense<2> : tensor<1xi64>
+  %starts = "tf.Const"() {value = dense<[1]> : tensor<1xi32>} : () -> (tensor<1xi32>)
+  %sizes = "tf.Const"() {value = dense<[2]> : tensor<1xi32>} : () -> (tensor<1xi32>)
+  %0 = "tf.Slice"(%arg0, %starts, %sizes) : (tensor<4xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2xi32>
+  return %0 : tensor<2xi32>
+}
+
 // CHECK-LABEL: slice_constant_start_negative_one_size
 func @slice_constant_start_negative_one_size(%arg0: tensor<4xi32>) -> tensor<3xi32> {
   // CHECK: %[[START:.*]] = xla_hlo.constant dense<1> : tensor<1xi64>
-  // CHECK: %[[RESULT:.*]] =  "xla_hlo.dynamic-slice"(%arg0, %[[START]]) {slice_sizes = dense<3> : tensor<1xi64>} : (tensor<4xi32>, tensor<1xi64>) -> tensor<3xi32>
+  // CHECK: %[[START_I64:.*]] = "xla_hlo.convert"(%[[START]]) : (tensor<1xi64>) -> tensor<1xi64>
+  // CHECK: %[[RESULT:.*]] =  "xla_hlo.dynamic-slice"(%arg0, %[[START_I64]]) {slice_sizes = dense<3> : tensor<1xi64>} : (tensor<4xi32>, tensor<1xi64>) -> tensor<3xi32>
   // CHECK: return %[[RESULT]] : tensor<3xi32>
   %starts = "tf.Const"() {value = dense<[1]> : tensor<1xi64>} : () -> (tensor<1xi64>)
   %sizes = "tf.Const"() {value = dense<[-1]> : tensor<1xi64>} : () -> (tensor<1xi64>)
@@ -1402,7 +1445,8 @@ func @slice_constant_start_negative_one_size(%arg0: tensor<4xi32>) -> tensor<3xi
 // CHECK-LABEL: slice_constant_start_dynamic_shape
 func @slice_constant_start_dynamic_shape(%arg0: tensor<?x4xi32>, %arg1: tensor<2xi64>) -> tensor<1x4xi32> {
   // CHECK: %[[START:.*]] = xla_hlo.constant dense<[1, 0]> : tensor<2xi64>
-  // CHECK: %[[RESULT:.*]] = "xla_hlo.dynamic-slice"(%arg0, %[[START]]) {slice_sizes = dense<[1, 4]> : tensor<2xi64>} : (tensor<?x4xi32>, tensor<2xi64>) -> tensor<1x4xi32>
+  // CHECK: %[[START_I64:.*]] = "xla_hlo.convert"(%[[START]]) : (tensor<2xi64>) -> tensor<2xi64>
+  // CHECK: %[[RESULT:.*]] = "xla_hlo.dynamic-slice"(%arg0, %[[START_I64]]) {slice_sizes = dense<[1, 4]> : tensor<2xi64>} : (tensor<?x4xi32>, tensor<2xi64>) -> tensor<1x4xi32>
   // CHECK: return %[[RESULT]] : tensor<1x4xi32>
   %starts = "tf.Const"() {value = dense<[1, 0]> : tensor<2xi64>} : () -> (tensor<2xi64>)
   %sizes = "tf.Const"() {value = dense<[1, 4]> : tensor<2xi64>} : () -> (tensor<2xi64>)
@@ -1412,7 +1456,8 @@ func @slice_constant_start_dynamic_shape(%arg0: tensor<?x4xi32>, %arg1: tensor<2
 
 // CHECK-LABEL: slice_variable_start
 func @slice_variable_start(%arg0: tensor<3x4xi32>, %arg1: tensor<2xi64>) -> tensor<1x4xi32> {
-  // CHECK: %[[RESULT:.*]] = "xla_hlo.dynamic-slice"(%arg0, %arg1) {slice_sizes = dense<[1, 4]> : tensor<2xi64>} : (tensor<3x4xi32>, tensor<2xi64>) -> tensor<1x4xi32>
+  // CHECK: %[[START_I64:.*]] = "xla_hlo.convert"(%arg1) : (tensor<2xi64>) -> tensor<2xi64>
+  // CHECK: %[[RESULT:.*]] = "xla_hlo.dynamic-slice"(%arg0, %[[START_I64]]) {slice_sizes = dense<[1, 4]> : tensor<2xi64>} : (tensor<3x4xi32>, tensor<2xi64>) -> tensor<1x4xi32>
   // CHECK: return %[[RESULT]] : tensor<1x4xi32>
   %sizes = "tf.Const"() {value = dense<[1, 4]> : tensor<2xi64>} : () -> (tensor<2xi64>)
   %0 = "tf.Slice"(%arg0, %arg1, %sizes) : (tensor<3x4xi32>, tensor<2xi64>, tensor<2xi64>) -> tensor<1x4xi32>
@@ -1525,6 +1570,16 @@ func @mean(%arg0: tensor<4x8xf16>) -> tensor<4x1xf16> {
   return %0 : tensor<4x1xf16>
 }
 
+// CHECK-LABEL: func @mean_scalar_dim
+func @mean_scalar_dim(%arg0: tensor<4x8xf16>) -> tensor<4x1xf16> {
+  // Verify that tf.Mean op with scalar attributes are lowered successfully.
+
+  // CHECK-NOT: tf.Mean
+  %dimension = "tf.Const"() { value = dense<1> : tensor<i64> } : () -> tensor<i64>
+  %0 = "tf.Mean"(%arg0, %dimension) { keep_dims = true }: (tensor<4x8xf16>, tensor<i64>) -> tensor<4x1xf16>
+  return %0 : tensor<4x1xf16>
+}
+
 // CHECK-LABEL: func @mean_dynamic
 func @mean_dynamic(%arg0: tensor<4x?xf16>) -> tensor<4x1xf16> {
   %dimension = "tf.Const"() { value = dense<1> : tensor<1xi64> } : () -> tensor<1xi64>
@@ -1601,6 +1656,66 @@ func @max_dynamic(%arg0: tensor<4x?xf16>) -> tensor<4x1xf16> {
   return %0 : tensor<4x1xf16>
 }
 
+// CHECK-LABEL: @all
+func @all(%input: tensor<4x8xi1>) -> tensor<4xi1> {
+  %dims = "tf.Const"() { value = dense<1> : tensor<1xi32>} : () -> tensor<1xi32>
+  // CHECK: %[[INIT:.*]] = xla_hlo.constant dense<true> : tensor<i1>
+  // CHECK: "xla_hlo.reduce"(%{{.*}}, %[[INIT]]) ( {
+  // CHECK: ^{{.*}}(%[[ARGA:.*]]: tensor<i1>, %[[ARGB:.*]]: tensor<i1>):
+  // CHECK:  %[[AND:.*]] = xla_hlo.and %[[ARGA]], %[[ARGB]] : tensor<i1>
+  // CHECK:  "xla_hlo.return"(%[[AND]]) : (tensor<i1>) -> ()
+  // CHECK: }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<4x8xi1>, tensor<i1>) -> tensor<4xi1>
+  %0 = "tf.All"(%input, %dims) : (tensor<4x8xi1>, tensor<1xi32>) -> tensor<4xi1>
+  return %0 : tensor<4xi1>
+}
+
+// CHECK-LABEL: @all_keep_dim
+func @all_keep_dim(%input: tensor<4x8xi1>) -> tensor<4x1xi1> {
+  // CHECK: "xla_hlo.reshape"(%{{.*}}) : (tensor<4xi1>) -> tensor<4x1xi1>
+  %dims = "tf.Const"() { value = dense<1> : tensor<1xi32>} : () -> tensor<1xi32>
+  %0 = "tf.All"(%input, %dims) {keep_dims = true} : (tensor<4x8xi1>, tensor<1xi32>) -> tensor<4x1xi1>
+  return %0 : tensor<4x1xi1>
+}
+
+// CHECk-LABEL: @all_dynamic
+func @all_dynamic(%input: tensor<4x?xi1>) -> tensor<4x1xi1> {
+  %dims = "tf.Const"() { value = dense<1> : tensor<1xi32>} : () -> tensor<1xi32>
+  // CHECK: %[[ARG:.*]] = "xla_hlo.convert"(%{{.*}}) : (tensor<4x?xi1>) -> tensor<4x?xi1>
+  // CHECK: "xla_hlo.reduce"(%[[ARG]]
+  %0 = "tf.All"(%input, %dims) {keep_dims = true} : (tensor<4x?xi1>, tensor<1xi32>) -> tensor<4x1xi1>
+  return %0 : tensor<4x1xi1>
+}
+
+// CHECK-LABEL: @any
+func @any(%input: tensor<4x8xi1>) -> tensor<4xi1> {
+  %dims = "tf.Const"() { value = dense<1> : tensor<1xi32>} : () -> tensor<1xi32>
+  // CHECK: %[[INIT:.*]] = xla_hlo.constant dense<false> : tensor<i1>
+  // CHECK: "xla_hlo.reduce"(%{{.*}}, %[[INIT]]) ( {
+  // CHECK: ^{{.*}}(%[[ARGA:.*]]: tensor<i1>, %[[ARGB:.*]]: tensor<i1>):
+  // CHECK:  %[[AND:.*]] = xla_hlo.or %[[ARGA]], %[[ARGB]] : tensor<i1>
+  // CHECK:  "xla_hlo.return"(%[[AND]]) : (tensor<i1>) -> ()
+  // CHECK: }) {dimensions = dense<1> : tensor<1xi64>} : (tensor<4x8xi1>, tensor<i1>) -> tensor<4xi1>
+  %0 = "tf.Any"(%input, %dims) : (tensor<4x8xi1>, tensor<1xi32>) -> tensor<4xi1>
+  return %0 : tensor<4xi1>
+}
+
+// CHECK-LABEL: @any_keep_dim
+func @any_keep_dim(%input: tensor<4x8xi1>) -> tensor<4x1xi1> {
+  // CHECK: "xla_hlo.reshape"(%{{.*}}) : (tensor<4xi1>) -> tensor<4x1xi1>
+  %dims = "tf.Const"() { value = dense<1> : tensor<1xi32>} : () -> tensor<1xi32>
+  %0 = "tf.Any"(%input, %dims) {keep_dims = true} : (tensor<4x8xi1>, tensor<1xi32>) -> tensor<4x1xi1>
+  return %0 : tensor<4x1xi1>
+}
+
+// CHECk-LABEL: @any_dynamic
+func @any_dynamic(%input: tensor<4x?xi1>) -> tensor<4x1xi1> {
+  %dims = "tf.Const"() { value = dense<1> : tensor<1xi32>} : () -> tensor<1xi32>
+  // CHECK: %[[ARG:.*]] = "xla_hlo.convert"(%{{.*}}) : (tensor<4x?xi1>) -> tensor<4x?xi1>
+  // CHECK: "xla_hlo.reduce"(%[[ARG]]
+  %0 = "tf.Any"(%input, %dims) {keep_dims = true} : (tensor<4x?xi1>, tensor<1xi32>) -> tensor<4x1xi1>
+  return %0 : tensor<4x1xi1>
+}
+
 //===----------------------------------------------------------------------===//
 // Tile op legalizations.
 //===----------------------------------------------------------------------===//
@@ -1924,12 +2039,23 @@ func @split_match_and_split_into_two(%input: tensor<4x6xf32>) -> (tensor<2x6xf32
   return %0#0, %0#1 : tensor<2x6xf32>, tensor<2x6xf32>
 }
 
+// CHECK-LABEL: @split_match_and_split_into_two_dynamic
+func @split_match_and_split_into_two_dynamic(%input: tensor<4x?xf32>) -> (tensor<2x?xf32>, tensor<2x?xf32>) {
+  %cst = "tf.Const"() {value = dense<0> : tensor<i32>} : () -> tensor<i32>
+  // CHECK: %[[ONE:.*]] = "xla_hlo.slice"(%{{.*}}) {limit_indices = dense<[2, -1]> : tensor<2xi64>, start_indices = dense<0> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<4x?xf32>) -> tensor<2x?xf32>
+  // CHECK: %[[TWO:.*]] = "xla_hlo.slice"(%{{.*}}) {limit_indices = dense<[4, -1]> : tensor<2xi64>, start_indices = dense<[2, 0]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<4x?xf32>) -> tensor<2x?xf32>
+  %0:2 = "tf.Split"(%cst, %input) : (tensor<i32>, tensor<4x?xf32>) -> (tensor<2x?xf32>, tensor<2x?xf32>)
+  // CHECK: return %[[ONE]], %[[TWO]]
+  return %0#0, %0#1 : tensor<2x?xf32>, tensor<2x?xf32>
+}
+
 // CHECK-LABEL: @split_match_and_split_into_three
+// CHECK-SAME: (%[[ARG:.*]]: tensor<4x6xf32>)
 func @split_match_and_split_into_three(%input: tensor<4x6xf32>) -> (tensor<4x2xf32>, tensor<4x2xf32>, tensor<4x2xf32>) {
   %cst = "tf.Const"() {value = dense<1> : tensor<i32>} : () -> tensor<i32>
-  // CHECK: %[[ONE:.*]] = "xla_hlo.slice"(%arg0) {limit_indices = dense<[4, 2]> : tensor<2xi64>, start_indices = dense<0> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<4x6xf32>) -> tensor<4x2xf32>
-  // CHECK: %[[TWO:.*]] = "xla_hlo.slice"(%arg0) {limit_indices = dense<4> : tensor<2xi64>, start_indices = dense<[0, 2]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<4x6xf32>) -> tensor<4x2xf32>
-  // CHECK: %[[THREE:.*]] = "xla_hlo.slice"(%arg0) {limit_indices = dense<[4, 6]> : tensor<2xi64>, start_indices = dense<[0, 4]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<4x6xf32>) -> tensor<4x2xf32>
+  // CHECK: %[[ONE:.*]] = "xla_hlo.slice"(%[[ARG]]) {limit_indices = dense<[4, 2]> : tensor<2xi64>, start_indices = dense<0> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<4x6xf32>) -> tensor<4x2xf32>
+  // CHECK: %[[TWO:.*]] = "xla_hlo.slice"(%[[ARG]]) {limit_indices = dense<4> : tensor<2xi64>, start_indices = dense<[0, 2]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<4x6xf32>) -> tensor<4x2xf32>
+  // CHECK: %[[THREE:.*]] = "xla_hlo.slice"(%[[ARG]]) {limit_indices = dense<[4, 6]> : tensor<2xi64>, start_indices = dense<[0, 4]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<4x6xf32>) -> tensor<4x2xf32>
   %0:3 = "tf.Split"(%cst, %input) : (tensor<i32>, tensor<4x6xf32>) -> (tensor<4x2xf32>, tensor<4x2xf32>, tensor<4x2xf32>)
   // CHECK: return %[[ONE]], %[[TWO]], %[[THREE]]
   return %0#0, %0#1, %0#2 : tensor<4x2xf32>, tensor<4x2xf32>, tensor<4x2xf32>
@@ -1973,3 +2099,82 @@ func @topk_v2(%input: tensor<16x16xf32>) -> (tensor<16x8xf32>, tensor<16x8xi32>)
   %0:2 = "tf.TopKV2"(%input, %k): (tensor<16x16xf32>, tensor<i32>) -> (tensor<16x8xf32>, tensor<16x8xi32>)
   return %0#0, %0#1: tensor<16x8xf32>, tensor<16x8xi32>
 }
+
+//===----------------------------------------------------------------------===//
+// tf.SplitV legalization
+//===----------------------------------------------------------------------===//
+
+// CHECK-LABEL: @splitv_match_and_split_into_three
+// CHECK-SAME: (%[[ARG:.*]]: tensor<4x6xf32>)
+func @splitv_match_and_split_into_three(%input: tensor<4x6xf32>) -> (tensor<4x1xf32>, tensor<4x2xf32>, tensor<4x3xf32>) {
+  %split_sizes = "tf.Const"() {value = dense<[1, 2, 3]> : tensor<3xi32>} : () -> tensor<3xi32>
+  %split_dim = "tf.Const"() {value = dense<1> : tensor<i32>} : () -> tensor<i32>
+  // CHECK: %[[ONE:.*]] = "xla_hlo.slice"(%[[ARG]]) {limit_indices = dense<[4, 1]> : tensor<2xi64>, start_indices = dense<0> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<4x6xf32>) -> tensor<4x1xf32>
+  // CHECK: %[[TWO:.*]] = "xla_hlo.slice"(%[[ARG]]) {limit_indices = dense<[4, 3]> : tensor<2xi64>, start_indices = dense<[0, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<4x6xf32>) -> tensor<4x2xf32>
+  // CHECK: %[[THREE:.*]] = "xla_hlo.slice"(%[[ARG]]) {limit_indices = dense<[4, 6]> : tensor<2xi64>, start_indices = dense<[0, 3]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<4x6xf32>) -> tensor<4x3xf32>
+  %0:3 = "tf.SplitV"(%input, %split_sizes, %split_dim) : (tensor<4x6xf32>, tensor<3xi32>, tensor<i32>) -> (tensor<4x1xf32>, tensor<4x2xf32>, tensor<4x3xf32>)
+  // CHECK: return %[[ONE]], %[[TWO]], %[[THREE]]
+  return %0#0, %0#1, %0#2 : tensor<4x1xf32>, tensor<4x2xf32>, tensor<4x3xf32>
+}
+
+// CHECK-LABEL: @splitv_match_and_split_into_three_dynamic
+func @splitv_match_and_split_into_three_dynamic(%input: tensor<?x6xf32>) -> (tensor<?x1xf32>, tensor<?x2xf32>, tensor<?x3xf32>) {
+  %split_sizes = "tf.Const"() {value = dense<[1, 2, 3]> : tensor<3xi32>} : () -> tensor<3xi32>
+  %split_dim = "tf.Const"() {value = dense<1> : tensor<i32>} : () -> tensor<i32>
+  // CHECK: "xla_hlo.slice"(%{{.*}}) {limit_indices = dense<[-1, 1]> : tensor<2xi64>, start_indices = dense<0> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<?x6xf32>) -> tensor<?x1xf32>
+  // CHECK: "xla_hlo.slice"(%{{.*}}) {limit_indices = dense<[-1, 3]> : tensor<2xi64>, start_indices = dense<[0, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<?x6xf32>) -> tensor<?x2xf32>
+  // CHECK: "xla_hlo.slice"(%{{.*}}) {limit_indices = dense<[-1, 6]> : tensor<2xi64>, start_indices = dense<[0, 3]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} : (tensor<?x6xf32>) -> tensor<?x3xf32>
+  %0:3 = "tf.SplitV"(%input, %split_sizes, %split_dim) : (tensor<?x6xf32>, tensor<3xi32>, tensor<i32>) -> (tensor<?x1xf32>, tensor<?x2xf32>, tensor<?x3xf32>)
+  return %0#0, %0#1, %0#2 : tensor<?x1xf32>, tensor<?x2xf32>, tensor<?x3xf32>
+}
+
+// CHECK-LABEL: @splitv_dynamic_dim_in_split_sizes
+func @splitv_dynamic_dim_in_split_sizes(%input: tensor<4x6xf32>) -> (tensor<4x1xf32>, tensor<4x2xf32>, tensor<4x3xf32>) {
+  %split_sizes = "tf.Const"() {value = dense<[1, -1, 3]> : tensor<3xi32>} : () -> tensor<3xi32>
+  %split_dim = "tf.Const"() {value = dense<1> : tensor<i32>} : () -> tensor<i32>
+  // CHECK: limit_indices = dense<[4, 1]> : tensor<2xi64>, start_indices = dense<0> : tensor<2xi64>
+  // CHECK: limit_indices = dense<[4, 3]> : tensor<2xi64>, start_indices = dense<[0, 1]> : tensor<2xi64>
+  // CHECK: limit_indices = dense<[4, 6]> : tensor<2xi64>, start_indices = dense<[0, 3]> : tensor<2xi64>
+  %0:3 = "tf.SplitV"(%input, %split_sizes, %split_dim) : (tensor<4x6xf32>, tensor<3xi32>, tensor<i32>) -> (tensor<4x1xf32>, tensor<4x2xf32>, tensor<4x3xf32>)
+  return %0#0, %0#1, %0#2 : tensor<4x1xf32>, tensor<4x2xf32>, tensor<4x3xf32>
+}
+
+//===----------------------------------------------------------------------===//
+// tf.Assert legalization
+//===----------------------------------------------------------------------===//
+
+// CHECK-LABEL: @assert
+func @assert(%arg0: tensor<i1>, %arg1: tensor<*xf32>) {
+  // CHECK-NOT: tf.Assert
+  "tf.Assert"(%arg0, %arg1) {summarize = 1} : (tensor<i1>, tensor<*xf32>) -> ()
+  return
+}
+
+//===----------------------------------------------------------------------===//
+// tf.Unpack legalization
+//===----------------------------------------------------------------------===//
+
+// CHECK-LABEL: @unpack
+func @unpack(%input: tensor<4x3x6xf32>) -> (tensor<4x?xf32>, tensor<4x6xf32>, tensor<4x6xf32>) {
+  // CHECK: %[[SLICE1:.*]] = "xla_hlo.slice"(%{{.*}}) {limit_indices = dense<[4, 1, 6]> : tensor<3xi64>, start_indices = dense<0> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<4x3x6xf32>) -> tensor<4x1x6xf32>
+  // CHECK: %[[RES1:.*]] = "xla_hlo.reshape"(%[[SLICE1]]) : (tensor<4x1x6xf32>) -> tensor<4x?xf32>
+  // CHECK: %[[SLICE2:.*]] = "xla_hlo.slice"(%{{.*}}) {limit_indices = dense<[4, 2, 6]> : tensor<3xi64>, start_indices = dense<[0, 1, 0]> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<4x3x6xf32>) -> tensor<4x1x6xf32>
+  // CHECK: %[[RES2:.*]] = "xla_hlo.reshape"(%[[SLICE2]]) : (tensor<4x1x6xf32>) -> tensor<4x6xf32>
+  // CHECK: %[[SLICE3:.*]] = "xla_hlo.slice"(%{{.*}}) {limit_indices = dense<[4, 3, 6]> : tensor<3xi64>, start_indices = dense<[0, 2, 0]> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<4x3x6xf32>) -> tensor<4x1x6xf32>
+  // CHECK: %[[RES3:.*]] = "xla_hlo.reshape"(%[[SLICE3]]) : (tensor<4x1x6xf32>) -> tensor<4x6xf32>
+
+  %0:3 = "tf.Unpack"(%input) {axis = 1} : (tensor<4x3x6xf32>) -> (tensor<4x?xf32>, tensor<4x6xf32>, tensor<4x6xf32>)
+  // return %[[RES1]], %[[RES2]], %[[RES3]]
+  return %0#0, %0#1, %0#2 : tensor<4x?xf32>, tensor<4x6xf32>, tensor<4x6xf32>
+}
+
+// CHECK-LABEL: @unpack_dynamic
+func @unpack_dynamic(%input: tensor<?x?x2xf32>) -> (tensor<?x?xf32>, tensor<?x?xf32>) {
+  // CHECK: %[[SLICE1:.*]] = "xla_hlo.slice"(%{{.*}}) {limit_indices = dense<[-1, -1, 1]> : tensor<3xi64>, start_indices = dense<0> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<?x?x2xf32>) -> tensor<?x?x1xf32>
+  // CHECK: "xla_hlo.reshape"(%[[SLICE1]]) : (tensor<?x?x1xf32>) -> tensor<?x?xf32>
+  // CHECK: %[[SLICE2:.*]] = "xla_hlo.slice"(%{{.*}}) {limit_indices = dense<[-1, -1, 2]> : tensor<3xi64>, start_indices = dense<[0, 0, 1]> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} : (tensor<?x?x2xf32>) -> tensor<?x?x1xf32>
+  // CHECK: "xla_hlo.reshape"(%[[SLICE2]]) : (tensor<?x?x1xf32>) -> tensor<?x?xf32>
+
+  %0:2 = "tf.Unpack"(%input) {axis = -1} : (tensor<?x?x2xf32>) -> (tensor<?x?xf32>, tensor<?x?xf32>)
+  return %0#0, %0#1 : tensor<?x?xf32>, tensor<?x?xf32>
+}
diff --git a/tensorflow/compiler/mlir/xla/tests/translate/export.mlir b/tensorflow/compiler/mlir/xla/tests/translate/export.mlir
index ffcc1cc9df3..85ed317f8c6 100644
--- a/tensorflow/compiler/mlir/xla/tests/translate/export.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/translate/export.mlir
@@ -317,16 +317,33 @@ func @main(%arg0: tensor<10xf32>) -> tensor<10xf32> {
 
 // -----
 
-// CHECK-LABEL: HloModule
+// CHECK-LABEL:  HloModule
 func @main(%arg0: tensor<3x4xi32>, %arg1: tensor<4x5xi32>) -> tensor<3x5xi32> {
   // Simple einsum is lowered to HLO dot op.
-  // CHECK: dot(s32[3,4] %{{.*}}, s32[4,5] %{{.*}}), lhs_contracting_dims={1}, rhs_contracting_dims={0}
+  // CHECK:  dot(s32[3,4] %{{.*}}, s32[4,5] %{{.*}}), lhs_contracting_dims={1}, rhs_contracting_dims={0}
   %0 = "xla_hlo.einsum"(%arg0, %arg1) {einsum_config = "ab,bc->ac"} : (tensor<3x4xi32>, tensor<4x5xi32>) -> tensor<3x5xi32>
   return %0 : tensor<3x5xi32>
 }
 
 // -----
 
+// CHECK-LABEL:  HloModule
+func @main(%arg0: tensor<200x100x300xf32>, %arg1: tensor<10x2xi32>) -> tensor<10x300xf32> {
+  // CHECK:  [[ARG0:%.*]] = f32[200,100,300] parameter(0)
+  // CHECK:  [[ARG1:%.*]] = s32[10,2] parameter(1)
+  // CHECK:  f32[10,300] gather(f32[200,100,300] [[ARG0]], s32[10,2] [[ARG1]])
+  // CHECK-SAME:  offset_dims={1}
+  // CHECK-SAME:  collapsed_slice_dims={0,1}
+  // CHECK-SAME:  start_index_map={0,1}
+  // CHECK-SAME:  index_vector_dim=1
+  // CHECK-SAME:  slice_sizes={1,1,300}
+  // CHECK-SAME:  indices_are_sorted=true
+  %0 = "xla_hlo.gather"(%arg0, %arg1) {dimension_numbers = {collapsed_slice_dims = dense<[0, 1]> : tensor<2xi64>, index_vector_dim = 1 : i64, offset_dims = dense<1> : tensor<1xi64>, start_index_map = dense<[0, 1]> : tensor<2xi64>}, indices_are_sorted = true, name = "gather", slice_sizes = dense<[1, 1, 300]> : tensor<3xi64>} : (tensor<200x100x300xf32>, tensor<10x2xi32>) -> tensor<10x300xf32>
+    return %0 : tensor<10x300xf32>
+}
+
+// -----
+
 // CHECK-LABEL:  HloModule
 func @main(%arg: tensor<4x2xf32>) -> tensor<i32> {
   %0 = "xla_hlo.get_dimension_size"(%arg) {dimension = 1 : i32} : (tensor<4x2xf32>) -> tensor<i32>
diff --git a/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt b/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt
index 77d74253132..a68e0237b14 100644
--- a/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt
+++ b/tensorflow/compiler/mlir/xla/tests/translate/import.hlotxt
@@ -317,6 +317,28 @@ ENTRY %dummy_main (Arg_0.1: f32[]) -> f32[] {
   ROOT %floor.2 = f32[16] floor(f32[16] %arg0.1)
 }
 
+// CHECK-LABEL:  func @test_gather(
+// CHECK-SAME:  [[ARG0:%.+]]: tensor<200x100x300xf32>, [[ARG1:%.+]]: tensor<10x2xi32>) -> tensor<10x300xf32> {
+%test_gather (arg.0: f32[200,100,300], arg.1: s32[10,2]) -> f32[10,300] {
+  %arg.0 = f32[200,100,300] parameter(0)
+  %arg.1 = s32[10,2] parameter(1)
+  // CHECK:  "xla_hlo.gather"([[ARG0]], [[ARG1]])
+  // CHECK-SAME:  dimension_numbers
+  // CHECK-SAME:  collapsed_slice_dims = dense<[0, 1]> : tensor<2xi64>
+  // CHECK-SAME:  index_vector_dim = 1 : i64
+  // CHECK-SAME:  offset_dims = dense<1> : tensor<1xi64>
+  // CHECK-SAME:  start_index_map = dense<[0, 1]> : tensor<2xi64>
+  // CHECK-SAME:  indices_are_sorted = true
+  // CHECK-SAME:  slice_sizes = dense<[1, 1, 300]> : tensor<3xi64>
+  ROOT gather = f32[10,300] gather(f32[200,100,300] %arg.0, s32[10,2] %arg.1),
+      collapsed_slice_dims={0,1},
+      index_vector_dim=1,
+      offset_dims={1},
+      start_index_map={0,1},
+      indices_are_sorted=true,
+      slice_sizes={1,1,300}
+}
+
 // CHECK-LABEL:  func @test_get_dimension_size
 // CHECK-SAME:  ([[ARG:%.*]]: tensor<4x2xf32>)
 %test_get_dimension_size (Arg_0.1: f32[4,2]) -> s32[] {
diff --git a/tensorflow/compiler/mlir/xla/transforms/hlo_legalize_to_lhlo.cc b/tensorflow/compiler/mlir/xla/transforms/hlo_legalize_to_lhlo.cc
index 58d5b7aa02b..4a74fe4b2ae 100644
--- a/tensorflow/compiler/mlir/xla/transforms/hlo_legalize_to_lhlo.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/hlo_legalize_to_lhlo.cc
@@ -18,6 +18,7 @@ limitations under the License.
 #include "absl/memory/memory.h"
 #include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
 #include "mlir/IR/Attributes.h"  // TF:local_config_mlir
+#include "mlir/IR/BlockAndValueMapping.h"  // TF:local_config_mlir
 #include "mlir/IR/Builders.h"  // TF:local_config_mlir
 #include "mlir/IR/Function.h"  // TF:local_config_mlir
 #include "mlir/IR/Location.h"  // TF:local_config_mlir
@@ -38,13 +39,19 @@ namespace {
 
 constexpr StringRef kTempBufferAttr = "temp";
 
-Value* GetTensorStoreMemRef(Value* value) {
+Value* GetTensorStoreOrReturnMemRef(Value* value) {
   for (const auto& user : value->getUsers()) {
     if (auto tensor_store = dyn_cast<TensorStoreOp>(user)) {
       if (tensor_store.getOperand(0) == value) {
         return tensor_store.getOperand(1);
       }
     }
+    if (auto return_op = dyn_cast<xla_hlo::ReturnOp>(user)) {
+      if (return_op.getOperand(0) == value) {
+        auto block = return_op.getOperation()->getBlock();
+        return *block->args_rbegin();
+      }
+    }
   }
   return nullptr;
 }
@@ -88,8 +95,8 @@ Value* InsertAllocAndDealloc(Location loc, Value* result,
 /// function to store that values held in the tensor.
 Value* GetBufferForResultValue(Location loc, Value* result,
                                ConversionPatternRewriter* rewriter) {
-  if (auto tensor_store_memref = GetTensorStoreMemRef(result)) {
-    return tensor_store_memref;
+  if (auto existing_memref = GetTensorStoreOrReturnMemRef(result)) {
+    return existing_memref;
   }
   return InsertAllocAndDealloc(loc, result, rewriter);
 }
@@ -117,7 +124,63 @@ class HloToLhloOpConverter : public ConversionPattern {
     rewriter.create<LhloOpTy>(op->getLoc(), llvm::None, buffer_args,
                               op->getAttrs());
     rewriter.replaceOp(op, ArrayRef<Value*>(buffer_args).slice(operands.size()),
-                       llvm::to_vector<4>(original_results));
+                       original_results);
+    return matchSuccess();
+  }
+};
+
+struct HloToLHloReduceConverter
+    : public OpConversionPattern<xla_hlo::ReduceOp> {
+ public:
+  using OpConversionPattern::OpConversionPattern;
+
+  PatternMatchResult matchAndRewrite(
+      xla_hlo::ReduceOp op, ArrayRef<Value*> operands,
+      ConversionPatternRewriter& rewriter) const final {
+    auto loc = op.getLoc();
+    // TODO(b/137624192) Implement variadic reduce.
+    if (op.getNumResults() != 1) return matchFailure();
+    if (op.getParentRegion()->getBlocks().size() != 1) {
+      emitError(loc,
+                "tensor to buffer conversion expects a single block in the "
+                "region containing the operation");
+    }
+    const auto& original_results = op.getResults();
+    SmallVector<Value*, 4> buffer_args(operands.begin(), operands.end());
+    for (auto result : original_results) {
+      buffer_args.push_back(GetBufferForResultValue(loc, result, &rewriter));
+    }
+    auto new_op = rewriter.create<xla_lhlo::ReduceOp>(
+        loc, llvm::None, buffer_args, op.getAttrs());
+
+    // Copy over the operations inside the region.
+    rewriter.inlineRegionBefore(op.body(), new_op.body(), new_op.body().end());
+
+    // Create new block arguments with correct type.
+    auto& entry_block = new_op.body().front();
+    int original_arg_count = entry_block.getNumArguments();
+    for (int i = 0; i < original_arg_count; ++i) {
+      auto old_arg = entry_block.getArgument(i);
+      auto old_type = old_arg->getType().cast<TensorType>();
+      auto new_type =
+          MemRefType::get(old_type.getShape(), old_type.getElementType());
+      auto new_arg = entry_block.addArgument(new_type);
+      rewriter.replaceUsesOfBlockArgument(old_arg, new_arg);
+    }
+    // Add an argument for the result.
+    entry_block.addArgument(
+        entry_block.getArgument(original_arg_count)->getType());
+    // Remove the old arguments.
+    for (int i = original_arg_count - 1; i >= 0; --i) {
+      entry_block.eraseArgument(i);
+    }
+    // Insert terminator at the end.
+    rewriter.setInsertionPointToEnd(&entry_block);
+    rewriter.create<xla_lhlo::TerminatorOp>(loc);
+
+    rewriter.replaceOp(op, ArrayRef<Value*>(buffer_args).slice(operands.size()),
+                       original_results);
+
     return matchSuccess();
   }
 };
@@ -130,11 +193,12 @@ class HloToLhloTensorLoadConverter : public ConversionPattern {
   PatternMatchResult matchAndRewrite(
       Operation* op, ArrayRef<Value*> operands,
       ConversionPatternRewriter& rewriter) const final {
-    rewriter.replaceOp(op, operands, llvm::to_vector<4>(op->getResults()));
+    rewriter.replaceOp(op, operands, op->getResults());
     return matchSuccess();
   }
 };
 
+// TODO(b/137624192): Rewrite into a copy and elide copy if possible.
 class HloToLhloTensorStoreConverter : public ConversionPattern {
  public:
   explicit HloToLhloTensorStoreConverter(MLIRContext* context)
@@ -148,6 +212,19 @@ class HloToLhloTensorStoreConverter : public ConversionPattern {
   }
 };
 
+// TODO(b/137624192): Rewrite into a copy and elide copy if possible.
+class HloToLhloReturnConverter : public OpConversionPattern<xla_hlo::ReturnOp> {
+ public:
+  using OpConversionPattern::OpConversionPattern;
+
+  PatternMatchResult matchAndRewrite(
+      xla_hlo::ReturnOp op, ArrayRef<Value*> operands,
+      ConversionPatternRewriter& rewriter) const final {
+    rewriter.eraseOp(op);
+    return matchSuccess();
+  }
+};
+
 // Lowers from HLO dialect to LHLO dialect allocating/deallocating temporary
 // buffers if necessary.
 //
@@ -215,6 +292,7 @@ void populateHLOToLHLOConversionPattern(MLIRContext* context,
                            xla_lhlo::BroadcastInDimOp>,
       HloToLhloOpConverter<xla_hlo::CeilOp, xla_lhlo::CeilOp>,
       HloToLhloOpConverter<xla_hlo::CompareOp, xla_lhlo::CompareOp>,
+      HloToLhloOpConverter<xla_hlo::ConstOp, xla_lhlo::ConstOp>,
       HloToLhloOpConverter<xla_hlo::ConvertOp, xla_lhlo::ConvertOp>,
       HloToLhloOpConverter<xla_hlo::CosOp, xla_lhlo::CosOp>,
       HloToLhloOpConverter<xla_hlo::DivOp, xla_lhlo::DivOp>,
@@ -229,6 +307,7 @@ void populateHLOToLHLOConversionPattern(MLIRContext* context,
       HloToLhloOpConverter<xla_hlo::SignOp, xla_lhlo::SignOp>,
       HloToLhloOpConverter<xla_hlo::SubOp, xla_lhlo::SubOp>,
       HloToLhloOpConverter<xla_hlo::TanhOp, xla_lhlo::TanhOp>,
+      HloToLHloReduceConverter, HloToLhloReturnConverter,
       HloToLhloTensorLoadConverter, HloToLhloTensorStoreConverter
   >(context);
   // clang-format on
diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_control_flow.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_control_flow.cc
index 9be161851d9..8a8afc01bec 100644
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_control_flow.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_control_flow.cc
@@ -53,8 +53,7 @@ LogicalResult ReplaceTerminators(Region* region, Block* target_block,
     auto return_op = dyn_cast<xla_hlo::ReturnOp>(block->getTerminator());
     if (!return_op) continue;
     builder->setInsertionPointToEnd(block);
-    builder->create<mlir::BranchOp>(
-        loc, target_block, llvm::to_vector<4>(return_op.getOperands()));
+    builder->create<mlir::BranchOp>(loc, target_block, return_op.getOperands());
     return_op.erase();
   }
 
@@ -196,8 +195,7 @@ LogicalResult LowerWhileOp(mlir::xla_hlo::WhileOp while_op) {
         dyn_cast<mlir::xla_hlo::ReturnOp>(new_block->getTerminator());
     if (!return_op) continue;
     builder.setInsertionPointToEnd(new_block);
-    builder.create<mlir::BranchOp>(loc, cond_block,
-                                   llvm::to_vector<4>(return_op.getOperands()));
+    builder.create<mlir::BranchOp>(loc, cond_block, return_op.getOperands());
     return_op.erase();
   }
 
diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
index f0ba67e2fd5..02a9c7e69e0 100644
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
@@ -127,8 +127,8 @@ static llvm::Optional<int64_t> GetIntegerHLOAxisFromTFAxis(Value *value,
 
 /// Returns a `ConvertOp` that casts the elements to a i64 type while retaining
 /// the shape of the input value.
-static ConvertOp CastElementsToI64(Location loc, Value *value,
-                                   PatternRewriter *rewriter) {
+static ConvertOp CastValueToI64(Location loc, Value *value,
+                                PatternRewriter *rewriter) {
   return rewriter->create<ConvertOp>(loc, value, rewriter->getIntegerType(64));
 }
 
@@ -207,7 +207,8 @@ static IntegerAttr getFeatureDimensionAttr(Builder &b, StringAttr format,
 // Bias op utilities.
 //===----------------------------------------------------------------------===//
 
-/// Return a 1D DenseIntElementsAttr for the feature dimension of a BiasAdd.
+// Return a 1D DenseIntElementsAttr for the feature dimension of a BiasAdd.
+// Requires input to have ranked tensor.
 static DenseIntElementsAttr getBiasFeatureDimension(Builder &b,
                                                     StringAttr format,
                                                     Value *input) {
@@ -418,7 +419,8 @@ static DenseIntElementsAttr TFSliceSizes2HLOSliceSizes(
     Builder *builder) {
   DenseIntElementsAttr constant_start_indices;
   if (!matchPattern(start_indices, m_Constant(&constant_start_indices))) {
-    return slice_sizes;
+    return xla::ConvertElementsAttr(slice_sizes, builder->getIntegerType(64))
+        .cast<DenseIntElementsAttr>();
   }
 
   auto input_ty = input->getType().dyn_cast<RankedTensorType>();
@@ -687,7 +689,7 @@ class ConvertEinsumOp : public OpRewritePattern<TF::EinsumOp> {
       rewriter.replaceOpWithNewOp<UnaryEinsumOp>(
           op, op.getType(), *op.inputs().begin(), equation);
     } else if (op.N() == 2) {
-      auto inputs = llvm::to_vector<2>(op.inputs());
+      ValueRange inputs = op.inputs();
       rewriter.replaceOpWithNewOp<EinsumOp>(op, op.getType(), inputs[0],
                                             inputs[1], equation);
     } else {
@@ -924,7 +926,7 @@ class ConvertSizeOp : public OpRewritePattern<TF::SizeOp> {
 };
 
 // Converts the tf.Split op into a series of HLO slice ops when the tensor to be
-// split has fuly static shape and the dimension to split is a constant.
+// split has fully static shape and the dimension to split is a constant.
 //
 // The main logic of this pattern is to calculate the index start and end range
 // for each slice. And this happens only on the dimension to be split; for all
@@ -962,9 +964,9 @@ class ConvertSplitOp : public OpRewritePattern<TF::SplitOp> {
 
   PatternMatchResult matchAndRewrite(TF::SplitOp op,
                                      PatternRewriter &rewriter) const override {
-    // We can only match when the tensor to be split has fully static shape.
+    // We can only split along static dimensions.
     auto input_type = op.value()->getType().dyn_cast<RankedTensorType>();
-    if (!input_type || !input_type.hasStaticShape()) return matchFailure();
+    if (!input_type) return matchFailure();
 
     // We can only match when the split dimension is a constant scalar.
     DenseIntElementsAttr split_dim_attr;
@@ -978,6 +980,10 @@ class ConvertSplitOp : public OpRewritePattern<TF::SplitOp> {
 
     // Calculate the dimension size for each slice along the split dimension.
     int64_t input_dim_size = input_type.getDimSize(dim_index);
+    // If we are splitting along the dynamic dimension then we cannot compute
+    // the static dimension length.
+    if (TensorType::isDynamic(input_dim_size)) return matchFailure();
+
     int64_t num_splits = op.getNumResults();
     int64_t slice_size = input_dim_size / num_splits;
 
@@ -1011,6 +1017,118 @@ class ConvertSplitOp : public OpRewritePattern<TF::SplitOp> {
   }
 };
 
+// Converts the tf.SplitV op into a series of HLO slice ops when the tensor to
+// be split has fully static shape and the dimension to split and split sizes
+// are constants.
+//
+// This is similar to the conversion for tf.Split op other than that the size of
+// each chunk on the dimension to split is explicitly given as an op operand
+// and they are not necessarily the same.
+//
+// For example, given the following IR:
+//
+// %split_sizes = "tf.Const"() {value = dense<[1, -1, 3]> : tensor<3xi32>}
+// %split_dim = "tf.Const"() {value = dense<1> : tensor<i32>}
+// %0:3 = "tf.SplitV"(%input, %split_sizes, %split_dim) :
+//                   (tensor<4x6xf32>, tensor<3xi32>, tensor<i32>) ->
+//                   (tensor<4x1xf32>, tensor<4x2xf32>, tensor<4x3xf32>)
+//
+// We will generate slices following slices:
+// %0 = "xla_hlo.slice"(%input) {
+//        limit_indices = dense<[4, 1]> : tensor<2xi64>,
+//        start_indices = dense<0> : tensor<2xi64>,
+//        strides = dense<1> : tensor<2xi64>} :
+//        (tensor<4x6xf32>) -> tensor<4x1xf32>
+// %1 = "xla_hlo.slice"(%input) {
+//        limit_indices = dense<[4, 3]> : tensor<2xi64>,
+//        start_indices = dense<[0, 1]> : tensor<2xi64>,
+//        strides = dense<1> : tensor<2xi64>} :
+//        (tensor<4x6xf32>) -> tensor<4x2xf32>
+// %2 = "xla_hlo.slice"(%input) {
+//        limit_indices = dense<[4, 6]> : tensor<2xi64>,
+//        start_indices = dense<[0, 3]> : tensor<2xi64>,
+//        strides = dense<1> : tensor<2xi64>} :
+//        (tensor<4x6xf32>) -> tensor<4x3xf32>
+class ConvertSplitVOp : public OpRewritePattern<TF::SplitVOp> {
+ public:
+  using OpRewritePattern::OpRewritePattern;
+
+  PatternMatchResult matchAndRewrite(TF::SplitVOp op,
+                                     PatternRewriter &rewriter) const override {
+    // We can only split along static dimensions.
+    // TODO(b/145731001): enhance to support dynamic-shaped inputs.
+    auto input_type = op.value()->getType().dyn_cast<RankedTensorType>();
+    if (!input_type) return matchFailure();
+
+    // We can only match when the split dimension is a constant scalar.
+    DenseIntElementsAttr split_dim_attr;
+    if (!matchPattern(op.split_dim(), m_Constant(&split_dim_attr)))
+      return matchFailure();
+
+    // We can only match when the split sizes is a constant int vector.
+    DenseIntElementsAttr split_sizes_attr;
+    if (!matchPattern(op.size_splits(), m_Constant(&split_sizes_attr)))
+      return matchFailure();
+
+    // Get each chunck's size along the dimension to split. It may contain
+    // dynamic sizes and we need to update it if so.
+    SmallVector<int64_t, 4> split_sizes;
+    int64_t total_dim_size = 0;  // Total dimension size assigned to splits
+    llvm::Optional<int> dynamic_dim_index;
+    split_sizes.reserve(
+        split_sizes_attr.getType().cast<ShapedType>().getNumElements());
+    for (auto dim : llvm::enumerate(split_sizes_attr)) {
+      int64_t dim_val = dim.value().getSExtValue();
+      split_sizes.push_back(dim_val);
+      if (dim_val == ShapedType::kDynamicSize) {
+        // We cannot have more than one dynamic dimension.
+        assert(!dynamic_dim_index && "invalid split sizes");
+        dynamic_dim_index = dim.index();
+      } else {
+        total_dim_size += dim_val;
+      }
+    }
+
+    // Get the dimension we are splitting at. Offset properly if it's negative.
+    int64_t input_rank = input_type.getRank();
+    int64_t dim_index = (*split_dim_attr.begin()).getSExtValue();
+    if (dim_index < 0) dim_index += input_rank;
+
+    int64_t input_dim_size = input_type.getDimSize(dim_index);
+    if (TensorType::isDynamic(input_dim_size)) return matchFailure();
+
+    assert(((dynamic_dim_index && total_dim_size <= input_dim_size) ||
+            (!dynamic_dim_index && total_dim_size == input_dim_size)) &&
+           "invalid split sizes");
+
+    // Update the dynamic dimension with calculated concrete size.
+    if (dynamic_dim_index)
+      split_sizes[*dynamic_dim_index] = input_dim_size - total_dim_size;
+
+    // Parameters for constructing each slice.
+    SmallVector<int64_t, 4> begin_indices(input_rank, 0);
+    auto end_indices = llvm::to_vector<4>(input_type.getShape());
+    SmallVector<int64_t, 4> strides(input_rank, 1);
+
+    // All HLO slice results used to replace the original tf.Split op.
+    SmallVector<Value *, 4> slices;
+    slices.reserve(op.getNumResults());
+
+    for (int i = 0; i < op.getNumResults(); ++i) {
+      end_indices[dim_index] = begin_indices[dim_index] + split_sizes[i];
+      slices.push_back(rewriter.create<xla_hlo::SliceOp>(
+          op.getLoc(), op.value(), GetI64ElementsAttr(begin_indices, &rewriter),
+          GetI64ElementsAttr(end_indices, &rewriter),
+          GetI64ElementsAttr(strides, &rewriter)));
+      // Prepare the begin indice for the next slice.
+      begin_indices[dim_index] = end_indices[dim_index];
+    }
+
+    rewriter.replaceOp(op, slices);
+    return matchSuccess();
+  }
+};
+
 // Converts StridedSlice op to HLO Slice op along with Reverse op to handle
 // negative strides and Reshape op to update the output shape. Indices and
 // strides operands are converted to attributes with non-negative indexing.
@@ -1182,8 +1300,7 @@ class GenericConvertReductionOp : public OpRewritePattern<OpTy> {
     ArrayRef<int64_t> input_shape = input_ty.getShape();
 
     DenseIntElementsAttr dimensions;
-    if (!matchPattern(op.reduction_indices(), m_Constant(&dimensions)) ||
-        dimensions.getType().getRank() != 1)
+    if (!matchPattern(op.reduction_indices(), m_Constant(&dimensions)))
       return this->matchFailure();
 
     // Build the final shape from input_shape and dimensions using a bitmap
@@ -1260,7 +1377,6 @@ class ConvertMeanOp
     : public GenericConvertReductionOp<ConvertMeanOp, TF::MeanOp, AddOp> {
  public:
   using GenericConvertReductionOp::GenericConvertReductionOp;
-
   static Value *GetInitialValue(Type reduce_element_type, Location loc,
                                 PatternRewriter &rewriter) {
     return GetScalarConstOfType(reduce_element_type, loc, 0, &rewriter);
@@ -1300,6 +1416,36 @@ class ConvertMaxOp
   }
 };
 
+// Converts All op to HLO Reduce op.
+//
+//   %init = constant dense<...> : tensor<T>
+//   %max = "xla_hlo.reduce"(%inp, %init) ["xla_hlo.and"]
+//               {dimensions = ...}
+class ConvertAllOp
+    : public GenericConvertReductionOp<ConvertAllOp, TF::AllOp, AndOp> {
+ public:
+  using GenericConvertReductionOp::GenericConvertReductionOp;
+  static Value *GetInitialValue(Type reduce_element_type, Location loc,
+                                PatternRewriter &rewriter) {
+    return GetScalarConstOfType(reduce_element_type, loc, 1, &rewriter);
+  }
+};
+
+// Converts Any op to HLO Reduce op.
+//
+//   %init = constant dense<...> : tensor<T>
+//   %max = "xla_hlo.reduce"(%inp, %init) ["xla_hlo.or"]
+//               {dimensions = ...}
+class ConvertAnyOp
+    : public GenericConvertReductionOp<ConvertAnyOp, TF::AnyOp, OrOp> {
+ public:
+  using GenericConvertReductionOp::GenericConvertReductionOp;
+  static Value *GetInitialValue(Type reduce_element_type, Location loc,
+                                PatternRewriter &rewriter) {
+    return GetScalarConstOfType(reduce_element_type, loc, 0, &rewriter);
+  }
+};
+
 // Converts tensorflow ArgMin or ArgMax op to xla_hlo operations that perform
 // a reduction on the original input and the corresponding index. The reduction
 // sub-computation selects the max (or min) value and the index for the value.
@@ -2000,6 +2146,53 @@ class ConvertTopKV2Op : public OpRewritePattern<TF::TopKV2Op> {
   }
 };
 
+// Converts tf.Unpack to a series of XLA HLO slice ops.
+//
+// Each slice takes one element along the dimension to unpack and takes the full
+// range for all other dimenions. Each slice is then reshaped to drop the
+// dimension to unpack (which is always of size 1).
+// TODO(antiagainst): consider changing this into a TF internal lowering pass.
+class ConvertUnpackOp : public OpRewritePattern<TF::UnpackOp> {
+ public:
+  using OpRewritePattern::OpRewritePattern;
+
+  PatternMatchResult matchAndRewrite(TF::UnpackOp op,
+                                     PatternRewriter &rewriter) const override {
+    auto value_type = op.value()->getType().cast<RankedTensorType>();
+    if (!value_type) return matchFailure();
+
+    int64_t value_rank = value_type.getRank();
+    int64_t axis = op.axis().getSExtValue();
+    if (axis < 0) axis += value_rank;
+
+    // Parameters for constructing each slice.
+    SmallVector<int64_t, 4> begin_indices(value_rank, 0);
+    auto end_indices = llvm::to_vector<4>(value_type.getShape());
+    SmallVector<int64_t, 4> strides(value_rank, 1);
+
+    // All HLO slice+reshape results used to replace the original tf.Unpack op.
+    SmallVector<Value *, 4> results;
+    results.reserve(op.getNumResults());
+
+    for (int i = 0; i < op.getNumResults(); ++i) {
+      begin_indices[axis] = i;
+      end_indices[axis] = i + 1;
+
+      auto slice_op = rewriter.create<xla_hlo::SliceOp>(
+          op.getLoc(), op.value(), GetI64ElementsAttr(begin_indices, &rewriter),
+          GetI64ElementsAttr(end_indices, &rewriter),
+          GetI64ElementsAttr(strides, &rewriter));
+      // Reshape to drop the axis dimension.
+      auto reshape_op = rewriter.create<xla_hlo::ReshapeOp>(
+          op.getLoc(), op.getType(i), slice_op);
+      results.push_back(reshape_op);
+    }
+
+    rewriter.replaceOp(op, results);
+    return matchSuccess();
+  }
+};
+
 #include "tensorflow/compiler/mlir/xla/transforms/generated_legalize_tf.inc"
 
 LogicalResult legalizeTF(Operation *op, bool allow_partial_conversion) {
@@ -2013,16 +2206,16 @@ LogicalResult legalizeTF(Operation *op, bool allow_partial_conversion) {
   // level TensorFlow ops. So, we don't have to target all the TensorFlow ops
   // here for lowering to HLO.
   TF::PopulateLoweringTFPatterns(context, &patterns);
-  patterns
-      .insert<ConvertArgMaxOp, ConvertBF16FloorDivOp, ConvertConv2D,
-              ConvertEinsumOp, ConvertMaxPoolOp, ConvertRangeOp,
-              ConvertSigmoidOp, ConvertSizeOp, ConvertMaxPoolOp, ConvertRangeOp,
-              ConvertSigmoidOp, ConvertSoftmaxOp<TF::LogSoftmaxOp, true>,
-              ConvertSoftmaxOp<TF::SoftmaxOp, false>, ConvertSplitOp,
-              ConvertStridedSliceOp, ConvertTopKV2Op, ConvertMeanOp,
-              ConvertSumOp, ConvertMaxOp, ConvertTileOp, ConvertMaxPoolGradOp,
-              ConvertOneHotOp, ConvertConv2DBackpropInputOp,
-              ConvertConv2DBackpropFilterOp>(op->getContext());
+  patterns.insert<
+      ConvertArgMaxOp, ConvertBF16FloorDivOp, ConvertConv2D, ConvertEinsumOp,
+      ConvertMaxPoolOp, ConvertRangeOp, ConvertSigmoidOp, ConvertSizeOp,
+      ConvertMaxPoolOp, ConvertRangeOp, ConvertSigmoidOp,
+      ConvertSoftmaxOp<TF::LogSoftmaxOp, true>,
+      ConvertSoftmaxOp<TF::SoftmaxOp, false>, ConvertSplitOp, ConvertSplitVOp,
+      ConvertStridedSliceOp, ConvertTopKV2Op, ConvertUnpackOp, ConvertMeanOp,
+      ConvertSumOp, ConvertMaxOp, ConvertAllOp, ConvertAnyOp, ConvertTileOp,
+      ConvertMaxPoolGradOp, ConvertOneHotOp, ConvertConv2DBackpropInputOp,
+      ConvertConv2DBackpropFilterOp>(op->getContext());
 
   ConversionTarget target(*context);
   target.addLegalDialect<XlaHloDialect>();
diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_control_flow.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_control_flow.cc
index a794e274f59..d2177041ba7 100644
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_control_flow.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_control_flow.cc
@@ -95,8 +95,7 @@ void ImportXlaRegion(mlir::FuncOp func, Region* dest_region, Location loc,
     detupled_args.push_back(extract);
   }
 
-  llvm::SmallVector<Value*, 4> result(
-      builder.create<CallOp>(loc, func, detupled_args).getResults());
+  auto result = builder.create<CallOp>(loc, func, detupled_args).getResults();
   if (!tuple_return) {
     builder.create<xla_hlo::ReturnOp>(loc, result);
   } else {
diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td
index fb8c6736309..14075134f11 100644
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td
@@ -29,6 +29,9 @@ def FeatureDimension : NativeCodeCall<
 def FalseBoolAttr : AttrConstraint<CPred<"!$_self.getValue()">>;
 def TrueBoolAttr : AttrConstraint<CPred<"$_self.getValue()">>;
 
+def CastValueToI64: NativeCodeCall<
+  "CastValueToI64($0->getLoc(), $1, &$_builder)">;
+
 def : Pattern<
     (TF_FusedBatchNormOp:$root $x, $scale, $offset, $mean, $variance, $epsilon,
                                $data_format, FalseBoolAttr:$is_training),
@@ -43,13 +46,22 @@ def : Pattern<
     [(HasNoUseOf:$root__1), (HasNoUseOf:$root__2),
      (HasNoUseOf:$root__3), (HasNoUseOf:$root__4)]>;
 
+//===----------------------------------------------------------------------===//
+// Assert op pattern.
+//===----------------------------------------------------------------------===//
+
+// HLO and XLA doesn't support Assertions.
+def LowerAssert : Pattern<(TF_AssertOp $condition, $data, $summarize), []>;
+
 //===----------------------------------------------------------------------===//
 // Bias op patterns.
 //===----------------------------------------------------------------------===//
 def BiasAddFeatureDimension : NativeCodeCall<
     "getBiasFeatureDimension($_builder, $0, $1)">;
 
-def : Pat<(TF_BiasAddOp AnyStaticShapeTensor:$input, $bias, $data_format),
+// $input needs to be a ranked tensor to identify index of the feature
+// dimension depending on the data_format 'NHWC' or 'NCHW'.
+def : Pat<(TF_BiasAddOp AnyRankedTensor:$input, $bias, $data_format),
           (HLO_AddOp $input, $bias,
               (BiasAddFeatureDimension $data_format, $input))>;
 
@@ -298,7 +310,7 @@ def : Pat<(TF_MatMulOp $a, $b, $transpose_a, $transpose_b),
 //===----------------------------------------------------------------------===//
 
 def : Pat<(TF_ConstOp:$res ElementsAttr:$value), (HLO_ConstOp $value),
-          [(AnyStaticShapeTensor $res), (HLO_Tensor $res)]>;
+          [(HLO_Tensor $res)]>;
 
 //===----------------------------------------------------------------------===//
 // Relu op patterns.
@@ -316,11 +328,21 @@ def : Pat<(TF_Relu6Op AnyStaticShapeTensor:$input),
                        (HLO_ConstOp (ConstantSplat<"6"> $input)))>;
 
 // ReluGrad(gradients, features) = gradients * (features > 0)
-def : Pat<(TF_ReluGradOp AnyStaticShapeTensor:$gradients, AnyStaticShapeTensor:$features),
+//
+// $gradients needs to be of static shape so that on_true and on_false operands
+// of SelectOp have same shape.
+//
+// $features needs to be ranked for computation of the broadcast dimensions for
+// CompareOp.
+//
+// TODO(hinsu): Relax $gradients static shape requirement when there is a way
+// to create splat tensor of dynamic shape in HLO.
+def : Pat<(TF_ReluGradOp AnyStaticShapeTensor:$gradients, AnyRankedTensor:$features),
           (HLO_SelectOp
-            (HLO_CompareOp $features, (HLO_ConstOp:$zero (ConstantSplat<"0"> $features)),
+            (HLO_CompareOp $features,
+              (HLO_ConstOp (GetScalarOfType<0> $features)),
               (NullDenseIntElementsAttr), HLO_COMPARISON_DIRECTION_GT),
-            $gradients, $zero)>;
+            $gradients, (HLO_ConstOp (ConstantSplat<"0"> $gradients)))>;
 
 //===----------------------------------------------------------------------===//
 // Slice op patterns.
@@ -333,9 +355,9 @@ def TFSliceSizes2HLOSliceSizes : NativeCodeCall<
     "TFSliceSizes2HLOSliceSizes($0, $1, $2.cast<DenseIntElementsAttr>(),"
     "&$_builder)">;
 
-def : Pat<(TF_SliceOp HLO_Tensor:$input, HLO_Tensor:$starting_indices,
-           (TF_ConstOp I64ElementsAttr:$slice_sizes)),
-          (HLO_DynamicSliceOp $input, $starting_indices,
+def : Pat<(TF_SliceOp:$op HLO_Tensor:$input, HLO_Tensor:$starting_indices,
+           (TF_ConstOp $slice_sizes)),
+          (HLO_DynamicSliceOp $input, (CastValueToI64 $op, $starting_indices),
            (TFSliceSizes2HLOSliceSizes $input, $starting_indices, $slice_sizes)),
           [(CanBeTranslatedToDynamicSlice $input, $starting_indices,
             $slice_sizes)]>;
@@ -383,19 +405,21 @@ foreach Mapping = [
 def : Pat<(TF_CastOp HLO_Tensor:$arg, ConstBoolAttrFalse),
           (HLO_ConvertOp $arg)>;
 
-def : Pat<(TF_TransposeOp:$res $arg, (TF_ConstOp I64ElementsAttr:$permutation)),
-          (HLO_TransposeOp $arg, (CastIntElementsAttr $permutation))>;
+def : Pat<(TF_TransposeOp:$res $arg, (TF_ConstOp $permutation)),
+          (HLO_TransposeOp $arg, (CastElementsToI64Elements $permutation))>;
 
+// Result of the following ops changing tensor shape needs to have static
+// shape as HLO doesn't yet support dynamic reshaping ops.
+//
+// TODO(hinsu): Update once HLO supports dynamic reshaping ops.
 foreach TfOp = [TF_ExpandDimsOp, TF_ReshapeOp, TF_SqueezeOp, ] in {
-  def : Pat<(TfOp:$res AnyStaticShapeTensor:$arg, $ignored),
+  def : Pat<(TfOp:$res $arg, $ignored),
             (HLO_ReshapeOp $arg), [(AnyStaticShapeTensor $res)]>;
 }
 
 //===----------------------------------------------------------------------===//
 // RngUniform.
 //===----------------------------------------------------------------------===//
-def CastElementsToI64: NativeCodeCall<
-  "CastElementsToI64($0->getLoc(), $1, &$_builder)">;
 
 // TODO(misard,phawkins): handle random number generator seeds/states correctly.
 def : Pat<(TF_RandomUniformOp:$old $shape, $seed, $seed2),
@@ -404,5 +428,5 @@ def : Pat<(TF_RandomUniformOp:$old $shape, $seed, $seed2),
               (NativeCodeCall<"$_builder.getFloatAttr(old.dtype(), 0.0)">)),
             (HLO_ConstOp
               (NativeCodeCall<"$_builder.getFloatAttr(old.dtype(), 1.0)">)),
-            (CastElementsToI64 $old, $shape)),
+            (CastValueToI64 $old, $shape)),
             [(IsShapedTensor $shape)]>;
diff --git a/tensorflow/compiler/mlir/xla/transforms/lhlo_fuse_linalg.cc b/tensorflow/compiler/mlir/xla/transforms/lhlo_fuse_linalg.cc
index 4dabe0dea42..928bfc20cdb 100644
--- a/tensorflow/compiler/mlir/xla/transforms/lhlo_fuse_linalg.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/lhlo_fuse_linalg.cc
@@ -54,7 +54,8 @@ struct LhloFuseLinalg : public FunctionPass<LhloFuseLinalg> {
       auto op = cast<LinalgOp>(generic_op.getOperation());
       for (const Value* result : op.getOutputs()) {
         if (!func_args.count(result)) continue;
-        if (linalg::tileLinalgOp(b, op, tile_sizes, &folder)) {
+        if (linalg::tileLinalgOp(b, op, tile_sizes, /*permutation=*/{},
+                                 &folder)) {
           generic_op.erase();
           return;
         }
diff --git a/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_linalg.cc b/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_linalg.cc
index 28bacfa87f0..c4787d9bfd9 100644
--- a/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_linalg.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/lhlo_legalize_to_linalg.cc
@@ -112,7 +112,7 @@ class PointwiseToLinalgConverter : public OpConversionPattern<LhloOp> {
     rewriter.setInsertionPointToEnd(block);
     Operation* op = MapLhloOpToStdScalarOp<LhloOp>(
         llvm::cast<LhloOp>(lhlo_op), bodyResultTypes, bodyArgs, rewriter);
-    rewriter.create<linalg::YieldOp>(loc, llvm::to_vector<1>(op->getResults()));
+    rewriter.create<linalg::YieldOp>(loc, op->getResults());
     rewriter.eraseOp(lhlo_op);
     return ConversionPattern::matchSuccess();
   }
diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py
index bfd0ce3d072..4d85ca67777 100644
--- a/tensorflow/compiler/tests/binary_ops_test.py
+++ b/tensorflow/compiler/tests/binary_ops_test.py
@@ -653,7 +653,13 @@ class BinaryOpsTest(xla_test.XLATestCase):
       divs = np.arange(-3, 3, .25, dtype=dtype).reshape(1, 24)
       np_result = np.true_divide(nums, divs)
       np_result[:, divs[0] == 0] = 0
-      self._testBinary(gen_math_ops.div_no_nan, nums, divs, expected=np_result)
+      self._testBinary(
+          gen_math_ops.div_no_nan,
+          nums,
+          divs,
+          expected=np_result,
+          rtol=7e-15 if dtype == np.float64 else None,
+          atol=3.9e-15 if dtype == np.float64 else None)
 
     if dtype not in self.complex_types:  # floordiv unsupported for complex.
       self._testBinary(
diff --git a/tensorflow/compiler/tests/tensor_array_ops_test.py b/tensorflow/compiler/tests/tensor_array_ops_test.py
index 99847e84c28..1bc88509542 100644
--- a/tensorflow/compiler/tests/tensor_array_ops_test.py
+++ b/tensorflow/compiler/tests/tensor_array_ops_test.py
@@ -164,7 +164,8 @@ class TensorArrayTest(xla_test.XLATestCase):
             dtype=tf_dtype, tensor_array_name="foo", size=3)
 
         # Unpack a matrix into vectors.
-        w1 = ta.unstack(convert([[1.0, 1.1], [2.0, 2.1], [3.0, 3.1]]))
+        w1 = ta.unstack(
+            convert([[1.0, 1.03125], [2.0, 2.03125], [3.0, 3.03125]]))
         r0 = w1.read(0)
         r1 = w1.read(1)
         r2 = w1.read(2)
@@ -172,9 +173,9 @@ class TensorArrayTest(xla_test.XLATestCase):
 
       d0, d1, d2 = self.evaluate(xla.compile(fn))
 
-      self.assertAllEqual(convert([1.0, 1.1]), d0)
-      self.assertAllEqual(convert([2.0, 2.1]), d1)
-      self.assertAllEqual(convert([3.0, 3.1]), d2)
+      self.assertAllEqual(convert([1.0, 1.03125]), d0)
+      self.assertAllEqual(convert([2.0, 2.03125]), d1)
+      self.assertAllEqual(convert([3.0, 3.03125]), d2)
 
       def fn():
         # Reset ta because we're going to change the shape, else shape
diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc
index d011be2c5af..20804af5229 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc
@@ -307,7 +307,7 @@ void UpdateToEngineNode(const std::vector<EngineInfo>& infos,
       }
     }
   }
-  LOG(FATAL) << "Node " << (**node).name() << " not found in any engine.";
+  LOG(FATAL) << "Node " << node_name << " not found in any engine.";
 }
 
 // Function to insert a TRT engine node into the graph.
diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
index 6c2b8fdc091..ef03ab91714 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
@@ -654,9 +654,8 @@ class ConverterTest : public ::testing::Test {
   ConverterTest() { Reset(); }
 
   void Reset() {
-    builder_.reset(nvinfer1::createInferBuilder(logger_));
     converter_ =
-        std::move(Converter::Create(builder_.get(), TrtPrecisionMode::FP32,
+        std::move(Converter::Create(TrtPrecisionMode::FP32,
                                     /*use_calibration=*/false, &logger_)
                       .ValueOrDie());
     weight_store_ = &converter_->weight_store_;
@@ -702,9 +701,6 @@ class ConverterTest : public ::testing::Test {
 
  private:
   Logger logger_;
-  // These members are ordered in a way such that the destruction order is:
-  // converter_ -> builder_
-  TrtUniquePtrType<nvinfer1::IBuilder> builder_;
 
  protected:
   std::unique_ptr<Converter> converter_;
@@ -996,9 +992,7 @@ TEST_F(ConverterTest, MaybeApplyQuantizationRanges) {
   FakeITensor input, infer_1, infer_2, infer_3;
   FakeITensor not_infer;
   Logger logger;
-  TrtUniquePtrType<nvinfer1::IBuilder> builder(
-      nvinfer1::createInferBuilder(logger));
-  auto int8_converter = Converter::Create(builder.get(), TrtPrecisionMode::INT8,
+  auto int8_converter = Converter::Create(TrtPrecisionMode::INT8,
                                           /*use_calibration=*/true, &logger)
                             .ValueOrDie();
   int8_converter->ProvideQuantizationRange(&input, -5.0f, 5.0f);
@@ -1255,12 +1249,8 @@ class OpConverterTest : public ::testing::Test {
     engine_.reset(nullptr);
 
     // Re-create them in proper order.
-    builder_.reset(nvinfer1::createInferBuilder(logger_));
-    builder_->setMaxWorkspaceSize(1 << 26);
-
-    // Reset the converter.
     converter_ =
-        std::move(Converter::Create(builder_.get(), precision_mode_to_test_,
+        std::move(Converter::Create(precision_mode_to_test_,
                                     /*use_calibration=*/false, &logger_)
                       .ValueOrDie());
 
@@ -1294,18 +1284,13 @@ class OpConverterTest : public ::testing::Test {
     TF_EXPECT_OK(converter_->RenameAndMarkOutputTensors(output_info));
 
     // Build the TRT engine.
-    if (precision_mode == TrtPrecisionMode::FP16) {
-      builder_->setFp16Mode(true);
-    } else if (precision_mode == TrtPrecisionMode::INT8) {
-      // Setting FP16 mode as well allows TRT to also consider FP16 kernels and
-      // use them in situations where they are faster than INT8 or where INT8 is
-      // not supported for a given layer.
-      builder_->setFp16Mode(true);
-      builder_->setInt8Mode(true);
-    }
     ASSERT_EQ(nullptr, engine_.get());
-    builder_->setMaxBatchSize(batch_size);
-    TF_ASSERT_OK(converter_->BuildCudaEngine(&engine_));
+    TF_ASSERT_OK(
+        converter_->BuildCudaEngine(&engine_,
+                                    /*max_batch_size=*/batch_size,
+                                    /*max_workspace_size_bytes=*/1 << 26,
+                                    /*allocator=*/nullptr,
+                                    /*calibrator=*/nullptr));
     CHECK_NOTNULL(engine_.get());
     CheckDataTypeMatches(input_data);
     CheckDataTypeMatches(*output_data);
@@ -1473,7 +1458,6 @@ class OpConverterTest : public ::testing::Test {
 
  private:
   Logger logger_;
-  TrtUniquePtrType<nvinfer1::IBuilder> builder_;
   TrtUniquePtrType<nvinfer1::ICudaEngine> engine_;
   cudaStream_t stream_;
   // Used to create placeholders with shape and data type information. The
diff --git a/tensorflow/compiler/tf2xla/kernels/data_format_ops.cc b/tensorflow/compiler/tf2xla/kernels/data_format_ops.cc
index fea2407a5d1..fb89742b139 100644
--- a/tensorflow/compiler/tf2xla/kernels/data_format_ops.cc
+++ b/tensorflow/compiler/tf2xla/kernels/data_format_ops.cc
@@ -143,6 +143,10 @@ class DataFormatVecPermuteOp : public XlaOpKernel {
 REGISTER_XLA_OP(
     Name("DataFormatVecPermute").TypeConstraint("T", {DT_INT32, DT_INT64}),
     DataFormatVecPermuteOp);
+REGISTER_XLA_OP(Name("DataFormatVecPermute")
+                    .Label("host")
+                    .TypeConstraint("T", {DT_INT32, DT_INT64}),
+                DataFormatVecPermuteOp);
 
 }  // namespace
 }  // namespace tensorflow
diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc
index 9d10be1d90a..defd96b570c 100644
--- a/tensorflow/compiler/tf2xla/xla_compiler.cc
+++ b/tensorflow/compiler/tf2xla/xla_compiler.cc
@@ -723,17 +723,6 @@ Status XlaCompiler::CompileFunction(
 
   std::unique_ptr<Graph> graph = GetGraph(fbody);
 
-  // Clear the "_kernel" attribute if it is set to "host". This is used to
-  // indicate that a computation should happen on the host instead of the
-  // accelerator, but doesn't make sense in XLA.
-  const char* const kKernelAttr = "_kernel";
-  for (Node* n : graph->nodes()) {
-    string value;
-    if (TryGetNodeAttr(n->attrs(), kKernelAttr, &value) && value == "host") {
-      n->ClearAttr(kKernelAttr);
-    }
-  }
-
   // _Arg and _Retval nodes don't exist in the stored subgraph for the function;
   // they are added by the function body looked up.  Therefore, they don't have
   // core assignments here.
@@ -1059,7 +1048,12 @@ Status XlaCompiler::BuildArguments(
     const XlaCompiler::Argument& arg = args[input_to_args->at(i)];
     VLOG(2) << "  XLA arg " << i
             << " shape: " << xla::ShapeUtil::HumanString(arg_shapes[i])
-            << " name: " << arg.name << " TF arg " << input_to_args->at(i);
+            << " name: " << arg.name << " TF arg " << input_to_args->at(i)
+            << " node name: " << arg.node_name
+            << (arg_shardings.find(i) == arg_shardings.end()
+                    ? ""
+                    : absl::StrCat(" sharding: ",
+                                   arg_shardings.at(i).DebugString()));
     XlaExpression& arg_expression = (*arg_expressions)[input_to_args->at(i)];
     switch (arg.kind) {
       case XlaCompiler::Argument::kResource: {
diff --git a/tensorflow/compiler/tf2xla/xla_compiler.h b/tensorflow/compiler/tf2xla/xla_compiler.h
index c3e9b3edeca..670da043c1a 100644
--- a/tensorflow/compiler/tf2xla/xla_compiler.h
+++ b/tensorflow/compiler/tf2xla/xla_compiler.h
@@ -147,6 +147,9 @@ class XlaCompiler {
     // The name of this argument, used for debugging.
     string name;
 
+    // The name of TensorFlow _Arg node, used for debugging.
+    string node_name;
+
     // For a kResource, what kind of resource is it?
     XlaResource::Kind resource_kind = XlaResource::kInvalid;
 
diff --git a/tensorflow/compiler/tf2xla/xla_op_registry.cc b/tensorflow/compiler/tf2xla/xla_op_registry.cc
index e70012f761a..a43608bd434 100644
--- a/tensorflow/compiler/tf2xla/xla_op_registry.cc
+++ b/tensorflow/compiler/tf2xla/xla_op_registry.cc
@@ -61,6 +61,7 @@ XlaOpRegistry::~XlaOpRegistry() = default;
 /* static */ bool XlaOpRegistry::IsCompatible(const OpRegistration& x,
                                               const OpRegistration& y) {
   if (x.name != y.name) return true;
+  if (x.label != y.label) return true;
   // The registrations refer to the same Op: ensures they are compatible and
   // are restricted to different device whitelists.
   if (x.compilation_only != y.compilation_only) {
@@ -256,6 +257,7 @@ void XlaOpRegistry::RegisterCompilationKernels() {
         std::unique_ptr<KernelDef> kdef(new KernelDef);
         kdef->set_op(op_registration->name);
         kdef->set_device_type(backend.first);
+        kdef->set_label(op_registration->label);
 
         // Constrain each type attribute to the intersection of:
         // a) the types supported by the backend, and
@@ -539,6 +541,11 @@ XlaOpRegistrationBuilder& XlaOpRegistrationBuilder::IsMetadataOp() {
   return *this;
 }
 
+XlaOpRegistrationBuilder& XlaOpRegistrationBuilder::Label(std::string label) {
+  registration_->label = label;
+  return *this;
+}
+
 std::unique_ptr<XlaOpRegistry::OpRegistration> XlaOpRegistrationBuilder::Build(
     XlaOpRegistry::Factory factory) {
   registration_->factory = factory;
diff --git a/tensorflow/compiler/tf2xla/xla_op_registry.h b/tensorflow/compiler/tf2xla/xla_op_registry.h
index af08790e02e..c6f6ffb2853 100644
--- a/tensorflow/compiler/tf2xla/xla_op_registry.h
+++ b/tensorflow/compiler/tf2xla/xla_op_registry.h
@@ -270,6 +270,8 @@ class XlaOpRegistry {
     // operands and not their values.
     bool is_metadata_op = false;
 
+    std::string label;
+
     // Factory used to build OpKernels that perform symbolic execution.
     Factory factory;
   };
@@ -350,6 +352,9 @@ class XlaOpRegistrationBuilder {
   // operands and not their values.
   XlaOpRegistrationBuilder& IsMetadataOp();
 
+  // Specifies a particular value for the "_kernel" attr.
+  XlaOpRegistrationBuilder& Label(std::string label);
+
   std::unique_ptr<XlaOpRegistry::OpRegistration> Build(
       XlaOpRegistry::Factory factory);
 
diff --git a/tensorflow/compiler/xla/client/lib/math.cc b/tensorflow/compiler/xla/client/lib/math.cc
index 989968b5cbc..8c85482c8f8 100644
--- a/tensorflow/compiler/xla/client/lib/math.cc
+++ b/tensorflow/compiler/xla/client/lib/math.cc
@@ -319,6 +319,8 @@ XlaOp Erf(XlaOp x) {
   });
 }
 
+namespace {
+
 // Approximation for the inverse error function from
 //   Giles, M., "Approximating the erfinv function".
 // The approximation has the form:
@@ -331,7 +333,7 @@ XlaOp Erf(XlaOp x) {
 //     p = sum_{i=1}^n gq[i]*w^i
 //   }
 //   return p*x
-XlaOp ErfInv(XlaOp x) {
+XlaOp ErfInv32(XlaOp x) {
   constexpr int kDegree = 9;
   constexpr std::array<float, 9> w_less_than_5_constants = {
       2.81022636e-08f,  3.43273939e-07f, -3.5233877e-06f,
@@ -371,6 +373,101 @@ XlaOp ErfInv(XlaOp x) {
   });
 }
 
+XlaOp ErfInv64(XlaOp x) {
+  constexpr std::array<double, 23> w_less_than_6_25_constants = {
+      -3.6444120640178196996e-21, -1.685059138182016589e-19,
+      1.2858480715256400167e-18,  1.115787767802518096e-17,
+      -1.333171662854620906e-16,  2.0972767875968561637e-17,
+      6.6376381343583238325e-15,  -4.0545662729752068639e-14,
+      -8.1519341976054721522e-14, 2.6335093153082322977e-12,
+      -1.2975133253453532498e-11, -5.4154120542946279317e-11,
+      1.051212273321532285e-09,   -4.1126339803469836976e-09,
+      -2.9070369957882005086e-08, 4.2347877827932403518e-07,
+      -1.3654692000834678645e-06, -1.3882523362786468719e-05,
+      0.0001867342080340571352,   -0.00074070253416626697512,
+      -0.0060336708714301490533,  0.24015818242558961693,
+      1.6536545626831027356};
+  constexpr std::array<double, 19> w_less_than_16_constants = {
+      2.2137376921775787049e-09,  9.0756561938885390979e-08,
+      -2.7517406297064545428e-07, 1.8239629214389227755e-08,
+      1.5027403968909827627e-06,  -4.013867526981545969e-06,
+      2.9234449089955446044e-06,  1.2475304481671778723e-05,
+      -4.7318229009055733981e-05, 6.8284851459573175448e-05,
+      2.4031110387097893999e-05,  -0.0003550375203628474796,
+      0.00095328937973738049703,  -0.0016882755560235047313,
+      0.0024914420961078508066,   -0.0037512085075692412107,
+      0.005370914553590063617,    1.0052589676941592334,
+      3.0838856104922207635,
+  };
+  constexpr std::array<double, 17> w_greater_than_16_constants = {
+      -2.7109920616438573243e-11, -2.5556418169965252055e-10,
+      1.5076572693500548083e-09,  -3.7894654401267369937e-09,
+      7.6157012080783393804e-09,  -1.4960026627149240478e-08,
+      2.9147953450901080826e-08,  -6.7711997758452339498e-08,
+      2.2900482228026654717e-07,  -9.9298272942317002539e-07,
+      4.5260625972231537039e-06,  -1.9681778105531670567e-05,
+      7.5995277030017761139e-05,  -0.00021503011930044477347,
+      -0.00013871931833623122026, 1.0103004648645343977,
+      4.8499064014085844221,
+  };
+  // Compute logarithm of (1+arg) using log1p(arg) which is more precise than
+  // log(1+arg) when arg is close to zero. For more details, see
+  // https://en.cppreference.com/w/cpp/numeric/math/log1p
+  auto w = -Log1p(-x * x);
+
+  auto lt_6_25 = Lt(w, ScalarLike(x, 6.25));
+  auto lt_16 = Lt(w, ScalarLike(x, 16));
+  auto coefficient = [&](int i) {
+    auto c = FullLike(x, w_less_than_6_25_constants[i]);
+    if (i < 19) {
+      c = Select(lt_6_25, c, FullLike(x, w_less_than_16_constants[i]));
+    }
+    if (i < 17) {
+      c = Select(lt_16, c, FullLike(x, w_greater_than_16_constants[i]));
+    }
+    return c;
+  };
+  auto sqrt_w = Sqrt(w);
+  w = Select(lt_6_25, w - ScalarLike(x, 3.125),
+             sqrt_w - Select(lt_16, ScalarLike(x, 3.25), ScalarLike(x, 5.0)));
+  auto p = coefficient(0);
+  for (int i = 1; i < 17; ++i) {
+    p = coefficient(i) + p * w;
+  }
+  for (int i = 17; i < 19; ++i) {
+    p = Select(lt_16, coefficient(i) + p * w, p);
+  }
+  for (int i = 19; i < 23; ++i) {
+    p = Select(lt_6_25, coefficient(i) + p * w, p);
+  }
+  // Result modulo edge cases.
+  XlaOp result = p * x;
+
+  // Handle edge cases, namely erfinv(+/-1) = +/-inf.  (The above computation is
+  // indeterminate, and can give nan or -/+inf.)
+  auto& b = *x.builder();
+  return b.ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
+    TF_ASSIGN_OR_RETURN(Shape shape, b.GetShape(x));
+    return Select(Eq(Abs(x), ScalarLike(x, 1)),
+                  x * MaxValue(&b, shape.element_type()), result);
+  });
+}
+
+}  // namespace
+
+XlaOp ErfInv(XlaOp x) {
+  auto& b = *x.builder();
+  return b.ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
+    TF_RETURN_IF_ERROR(EnsureOperandIsRealFp("ErfInv", x));
+    TF_ASSIGN_OR_RETURN(auto shape, b.GetShape(x));
+    if (shape.element_type() == F64) {
+      return ErfInv64(x);
+    }
+    return DoWithUpcastToF32(x, {BF16, F16},
+                             [](XlaOp x) { return ErfInv32(x); });
+  });
+}
+
 namespace {
 // Coefficients for the Lanczos approximation of the gamma function. The
 // coefficients are uniquely determined by the choice of g and n (kLanczosGamma
diff --git a/tensorflow/compiler/xla/client/lib/math_test.cc b/tensorflow/compiler/xla/client/lib/math_test.cc
index 6415e9383b5..8d13922e0e3 100644
--- a/tensorflow/compiler/xla/client/lib/math_test.cc
+++ b/tensorflow/compiler/xla/client/lib/math_test.cc
@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "tensorflow/compiler/xla/client/lib/math.h"
+
 #include "tensorflow/compiler/xla/client/lib/constants.h"
 #include "tensorflow/compiler/xla/client/xla_builder.h"
 #include "tensorflow/compiler/xla/literal_util.h"
@@ -116,6 +117,10 @@ class MathTypedTest : public MathTest {
   //
   // For good measure, we also check pow with an exponent other than 0.5.
   void TestSqrtPowInequivalence() {
+    // TODO(b/145798892): test fails on GPU for double values.
+    if (std::is_same<T, double>::value) {
+      return;
+    }
     SetFastMathDisabled(true);
 
     // Tests disable constant folding by default, but this test needs it
@@ -151,11 +156,16 @@ class MathTypedTest : public MathTest {
 };
 
 // TODO(b/123355973): Add bfloat16 to TestTypes once it's working.
-#ifdef XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16
-using TestTypes = ::testing::Types<float>;
-#else
-using TestTypes = ::testing::Types<float, Eigen::half>;
+using TestTypes = ::testing::Types<float
+#ifndef XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16
+                                   ,
+                                   Eigen::half
 #endif
+#ifndef XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT64
+                                   ,
+                                   double
+#endif
+                                   >;
 
 TYPED_TEST_CASE(MathTypedTest, TestTypes);
 
@@ -224,6 +234,28 @@ XLA_TEST_F(MathTest, SqrtF32) {
   ComputeAndCompareR0<float>(&builder, 0.0f, {zero_data.get()}, error_spec_);
 }
 
+#ifndef XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT64
+XLA_TEST_F(MathTest, ErfInvF64) {
+  XlaBuilder builder(TestName());
+  auto x = ConstantR1<double>(
+      &builder, {-0.9, -0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1, 0.0, 0.1,
+                 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9});
+  ErfInv(x);
+
+  std::vector<double> expected = {-1.163087153676674,   -0.9061938024368231,
+                                  -0.732869077959217,   -0.5951160814499948,
+                                  -0.4769362762044698,  -0.37080715859355795,
+                                  -0.27246271472675443, -0.1791434546212916,
+                                  -0.08885599049425767, 0.,
+                                  0.08885599049425777,  0.1791434546212916,
+                                  0.27246271472675443,  0.37080715859355784,
+                                  0.4769362762044698,   0.5951160814499948,
+                                  0.732869077959217,    0.9061938024368231,
+                                  1.1630871536766736};
+  ComputeAndCompareR1<double>(&builder, expected, {}, ErrorSpec{1e-15});
+}
+#endif
+
 XLA_TEST_F(MathTest, SquareTenValues) {
   XlaBuilder builder(TestName());
   auto x = ConstantR1<float>(
diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc
index 8f480c0dec3..290b9c0f647 100644
--- a/tensorflow/compiler/xla/client/xla_builder.cc
+++ b/tensorflow/compiler/xla/client/xla_builder.cc
@@ -2112,7 +2112,8 @@ XlaOp XlaBuilder::CrossReplicaSum(
 
 XlaOp XlaBuilder::AllReduce(XlaOp operand, const XlaComputation& computation,
                             absl::Span<const ReplicaGroup> replica_groups,
-                            const absl::optional<ChannelHandle>& channel_id) {
+                            const absl::optional<ChannelHandle>& channel_id,
+                            const absl::optional<Shape>& shape_with_layout) {
   return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
     HloInstructionProto instr;
     TF_ASSIGN_OR_RETURN(const Shape* operand_shape, GetShapePtr(operand));
@@ -2136,9 +2137,31 @@ XlaOp XlaBuilder::AllReduce(XlaOp operand, const XlaComputation& computation,
       operand_shapes.push_back(operand_shape);
       operands.push_back(operand);
     }
-    TF_ASSIGN_OR_RETURN(Shape shape,
+
+    TF_ASSIGN_OR_RETURN(Shape inferred_shape,
                         ShapeInference::InferAllReduceShape(operand_shapes));
-    *instr.mutable_shape() = shape.ToProto();
+    if (shape_with_layout) {
+      if (!LayoutUtil::HasLayout(*shape_with_layout)) {
+        return InvalidArgument("shape_with_layout must have the layout set: %s",
+                               shape_with_layout->ToString());
+      }
+      if (!ShapeUtil::Compatible(*shape_with_layout, *operand_shape)) {
+        return InvalidArgument(
+            "Provided shape_with_layout must be compatible with the "
+            "operand shape: %s vs %s",
+            shape_with_layout->ToString(), operand_shape->ToString());
+      }
+      instr.set_constrain_layout(true);
+      if (operand_shape->IsTuple() && !inferred_shape.IsTuple()) {
+        // For a single-element tuple, take the tuple element shape.
+        TF_RET_CHECK(shape_with_layout->tuple_shapes_size() == 1);
+        *instr.mutable_shape() = shape_with_layout->tuple_shapes(0).ToProto();
+      } else {
+        *instr.mutable_shape() = shape_with_layout->ToProto();
+      }
+    } else {
+      *instr.mutable_shape() = inferred_shape.ToProto();
+    }
 
     for (const ReplicaGroup& group : replica_groups) {
       *instr.add_replica_groups() = group;
@@ -2153,10 +2176,10 @@ XlaOp XlaBuilder::AllReduce(XlaOp operand, const XlaComputation& computation,
     TF_ASSIGN_OR_RETURN(
         auto all_reduce,
         AddInstruction(std::move(instr), HloOpcode::kAllReduce, operands));
-    if (operand_shape->IsTuple() && !shape.IsTuple()) {
+    if (operand_shape->IsTuple() && !inferred_shape.IsTuple()) {
       // For a single-element tuple, wrap the result into a tuple.
       TF_RET_CHECK(operand_shapes.size() == 1);
-      TF_RET_CHECK(ShapeUtil::Compatible(*operand_shapes[0], shape));
+      TF_RET_CHECK(ShapeUtil::Compatible(*operand_shapes[0], inferred_shape));
       return Tuple({all_reduce});
     }
     return all_reduce;
@@ -3282,9 +3305,10 @@ XlaOp CrossReplicaSum(const XlaOp operand,
 
 XlaOp AllReduce(const XlaOp operand, const XlaComputation& computation,
                 absl::Span<const ReplicaGroup> replica_groups,
-                const absl::optional<ChannelHandle>& channel_id) {
+                const absl::optional<ChannelHandle>& channel_id,
+                const absl::optional<Shape>& shape_with_layout) {
   return operand.builder()->AllReduce(operand, computation, replica_groups,
-                                      channel_id);
+                                      channel_id, shape_with_layout);
 }
 
 XlaOp AllToAll(const XlaOp operand, int64 split_dimension,
diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h
index 3822e907203..5e93bb2b3ba 100644
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h
@@ -514,7 +514,8 @@ class XlaBuilder {
   XlaOp AllReduce(
       XlaOp operand, const XlaComputation& computation,
       absl::Span<const ReplicaGroup> replica_groups = {},
-      const absl::optional<ChannelHandle>& channel_id = absl::nullopt);
+      const absl::optional<ChannelHandle>& channel_id = absl::nullopt,
+      const absl::optional<Shape>& shape_with_layout = absl::nullopt);
 
   XlaOp AllToAll(XlaOp operand, int64 split_dimension, int64 concat_dimension,
                  int64 split_count,
@@ -922,7 +923,8 @@ class XlaBuilder {
                                absl::Span<const ReplicaGroup> replica_groups);
   friend XlaOp AllReduce(XlaOp operand, const XlaComputation& computation,
                          absl::Span<const ReplicaGroup> replica_groups,
-                         const absl::optional<ChannelHandle>& channel_id);
+                         const absl::optional<ChannelHandle>& channel_id,
+                         const absl::optional<Shape>& shape_with_layout);
   friend XlaOp AllToAll(XlaOp operand, int64 split_dimension,
                         int64 concat_dimension, int64 split_count,
                         const std::vector<ReplicaGroup>& replica_groups);
@@ -1666,10 +1668,14 @@ XlaOp CrossReplicaSum(XlaOp operand,
 // - `channel_id`: for Allreduce nodes from different modules, if they have the
 // same channel_id, they will be 'AllReduce'd. If empty, AllReduce will not be
 // applied cross modules.
-XlaOp AllReduce(
-    XlaOp operand, const XlaComputation& computation,
-    absl::Span<const ReplicaGroup> replica_groups = {},
-    const absl::optional<ChannelHandle>& channel_id = absl::nullopt);
+//
+// - `shape_with_layout`: forces the layout of the AllReduce to the given
+// layout. This is used to guarantee the same layout for a group of AllReduce
+// ops compiled separately.
+XlaOp AllReduce(XlaOp operand, const XlaComputation& computation,
+                absl::Span<const ReplicaGroup> replica_groups = {},
+                const absl::optional<ChannelHandle>& channel_id = absl::nullopt,
+                const absl::optional<Shape>& shape_with_layout = absl::nullopt);
 
 // Enqueues an operation that do an Alltoall of the operand cross cores.
 XlaOp AllToAll(XlaOp operand, int64 split_dimension, int64 concat_dimension,
diff --git a/tensorflow/compiler/xla/literal.cc b/tensorflow/compiler/xla/literal.cc
index 3a219673304..bbd640f6064 100644
--- a/tensorflow/compiler/xla/literal.cc
+++ b/tensorflow/compiler/xla/literal.cc
@@ -38,6 +38,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/hash/hash.h"
 #include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/mem.h"
 #include "tensorflow/core/platform/types.h"
 
 namespace xla {
@@ -131,18 +132,23 @@ void Literal::SetPiece(const Shape& shape, Piece* piece, bool allocate_arrays) {
     }
   } else if (shape.IsArray()) {
     if (allocate_arrays) {
+      // Literals can be used as DMA targets, which can require alignment. We
+      // force a 16-byte minimum alignment.
+      constexpr int kMinimumAlignment = 16;
       if (LayoutUtil::IsSparseArray(shape)) {
         // For sparse arrays, the buffer must be of the size of the maximum
         // number of sparse elements possible.
         const int64 max_sparse_elements =
             LayoutUtil::MaxSparseElements(shape.layout());
-        piece->set_buffer(
-            new char[max_sparse_elements *
-                     ShapeUtil::ByteSizeOfPrimitiveType(shape.element_type())]);
+        piece->set_buffer(static_cast<char*>(tensorflow::port::AlignedMalloc(
+            max_sparse_elements *
+                ShapeUtil::ByteSizeOfPrimitiveType(shape.element_type()),
+            kMinimumAlignment)));
         piece->set_sparse_indices(
             new SparseIndexArray(max_sparse_elements, shape.rank()));
       } else {
-        piece->set_buffer(new char[piece->size_bytes()]);
+        piece->set_buffer(static_cast<char*>(tensorflow::port::AlignedMalloc(
+            piece->size_bytes(), kMinimumAlignment)));
       }
     }
   } else {
@@ -174,7 +180,7 @@ void Literal::DeallocateBuffers() {
   root_piece_->ForEachMutableSubpiece(
       [&](const ShapeIndex& index, Piece* piece) {
         if (piece->buffer() != nullptr) {
-          delete[] piece->buffer();
+          tensorflow::port::AlignedFree(piece->buffer());
           delete piece->sparse_indices();
         }
       });
@@ -504,7 +510,7 @@ Status Literal::MoveFrom(Literal&& src_literal,
           dest_index.push_back(i);
         }
         Piece& dest_piece = piece(dest_index);
-        delete[] dest_piece.buffer();
+        tensorflow::port::AlignedFree(dest_piece.buffer());
         dest_piece.set_buffer(src_piece.buffer());
         delete dest_piece.sparse_indices();
         dest_piece.set_sparse_indices(src_piece.sparse_indices());
diff --git a/tensorflow/compiler/xla/python/BUILD b/tensorflow/compiler/xla/python/BUILD
index 409d954748c..cdbe69d617e 100644
--- a/tensorflow/compiler/xla/python/BUILD
+++ b/tensorflow/compiler/xla/python/BUILD
@@ -26,7 +26,6 @@ py_test(
     name = "xla_client_test",
     srcs = ["xla_client_test.py"],
     main = "xla_client_test.py",
-    python_version = "PY3",
     srcs_version = "PY2AND3",
     tags = ["no_oss"],  # TODO(phawkins): This test passes, but requires --config=monolithic.
     deps = [
diff --git a/tensorflow/compiler/xla/python/tpu_driver/BUILD b/tensorflow/compiler/xla/python/tpu_driver/BUILD
index 96c6636323b..99a07c31256 100644
--- a/tensorflow/compiler/xla/python/tpu_driver/BUILD
+++ b/tensorflow/compiler/xla/python/tpu_driver/BUILD
@@ -31,6 +31,11 @@ tf_proto_library_cc(
     use_grpc_namespace = True,
 )
 
+cc_library(
+    name = "c_api",
+    hdrs = ["c_api.h"],
+)
+
 cc_library(
     name = "tpu_driver",
     srcs = [
diff --git a/tensorflow/compiler/xla/python/tpu_driver/c_api.h b/tensorflow/compiler/xla/python/tpu_driver/c_api.h
new file mode 100644
index 00000000000..5b892dfdaa3
--- /dev/null
+++ b/tensorflow/compiler/xla/python/tpu_driver/c_api.h
@@ -0,0 +1,30 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_C_API_H_
+#define TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_C_API_H_
+
+#define TPUDRIVER_CAPI_EXPORT __attribute__((visibility("default")))
+
+extern "C" {
+
+TPUDRIVER_CAPI_EXPORT extern void TpuDriver_Initialize();
+
+TPUDRIVER_CAPI_EXPORT extern void TpuDriver_Open(const char* worker);
+
+TPUDRIVER_CAPI_EXPORT extern const char* TpuDriver_Version(void);
+}
+
+#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_C_API_H_
diff --git a/tensorflow/compiler/xla/python/tpu_driver/client/c_api_client.c b/tensorflow/compiler/xla/python/tpu_driver/client/c_api_client.c
new file mode 100644
index 00000000000..70ab4af85fd
--- /dev/null
+++ b/tensorflow/compiler/xla/python/tpu_driver/client/c_api_client.c
@@ -0,0 +1,50 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// To compile: gcc -o c_api_client c_api_client.c -ldl
+// To run, make sure c_api.so and c_api_client in the same directory, and then
+//   sudo ./c_api_client
+
+#include <dlfcn.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char** argv) {
+  void* handle;
+  handle = dlopen("./c_api.so", RTLD_NOW);
+  if (!handle) {
+    fprintf(stderr, "Error: %s\n", dlerror());
+    exit(EXIT_FAILURE);
+  }
+
+  const char* (*TpuDriver_Version)(void);
+  void (*TpuDriver_Initialize)(void);
+  void (*TpuDriver_Open)(const char* worker);
+
+  fprintf(stdout, "------ Going to Find Out Version ------\n");
+  *(void**)(&TpuDriver_Version) = dlsym(handle, "TpuDriver_Version");
+  fprintf(stdout, "TPU Driver Version: %s\n", TpuDriver_Version());
+
+  fprintf(stdout, "------ Going to Initialize ------\n");
+  *(void**)(&TpuDriver_Initialize) = dlsym(handle, "TpuDriver_Initialize");
+  TpuDriver_Initialize();
+
+  fprintf(stdout, "------ Going to Open a TPU Driver ------\n");
+  *(void**)(&TpuDriver_Open) = dlsym(handle, "TpuDriver_Open");
+  TpuDriver_Open("local://");
+
+  dlclose(handle);
+  exit(EXIT_SUCCESS);
+}
diff --git a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.py b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.py
index 43c0d1a40c3..a3ad8b117ef 100644
--- a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.py
+++ b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client.py
@@ -104,6 +104,9 @@ class TpuBackend(xla_client.Backend):
                                              options, self.client,
                                              compile_options.device_assignment)
 
+  def get_default_device_assignment(self, num_replicas):
+    return self.client.GetDefaultDeviceAssignment(num_replicas)
+
   def serialize(self, executable):
     return self.client.SerializeExecutable(executable)
 
diff --git a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client_extension.cc b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client_extension.cc
index e7d1e2ef9d9..60886416a62 100644
--- a/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client_extension.cc
+++ b/tensorflow/compiler/xla/python/tpu_driver/client/tpu_client_extension.cc
@@ -32,6 +32,21 @@ PYBIND11_MODULE(tpu_client_extension, m) {
       .def("devices", &PyTpuClient::devices)
       .def("local_devices", &PyTpuClient::local_devices)
       .def("host_id", &PyTpuClient::host_id)
+      .def("GetDefaultDeviceAssignment",
+           [](PyTpuClient* client, int num_replicas)
+               -> StatusOr<std::vector<std::shared_ptr<Device>>> {
+             TF_ASSIGN_OR_RETURN(
+                 DeviceAssignment device_assignment,
+                 client->GetDefaultDeviceAssignment(num_replicas));
+             std::vector<std::shared_ptr<Device>> result;
+             for (int i = 0; i < num_replicas; ++i) {
+               int device_id = device_assignment(i, 0);
+               auto iter = client->id_to_device().find(device_id);
+               CHECK(iter != client->id_to_device().end()) << device_id;
+               result.push_back(iter->second);
+             }
+             return result;
+           })
       .def("TransferToInfeed",
            [](PyTpuClient* client, const LiteralSlice& literal,
               int device_ordinal) {
@@ -189,6 +204,11 @@ PYBIND11_MODULE(tpu_client_extension, m) {
            py::call_guard<py::gil_scoped_release>(), py::arg("arguments"))
       .def("ExecutePerReplica", &PyTpuExecutable::ExecutePerReplica,
            py::call_guard<py::gil_scoped_release>(), py::arg("arguments"));
+
+  py::class_<TpuDevice, Device, std::shared_ptr<TpuDevice>>(m, "TpuDevice")
+      .def("__repr__", [](const TpuDevice& device) {
+        return absl::StrFormat("TpuDevice(id=%i)", device.id());
+      });
 }  // NOLINT(readability/fn_size)
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/python/xla.cc b/tensorflow/compiler/xla/python/xla.cc
index 054c1da9e03..13968154188 100644
--- a/tensorflow/compiler/xla/python/xla.cc
+++ b/tensorflow/compiler/xla/python/xla.cc
@@ -366,6 +366,21 @@ PYBIND11_MODULE(xla_extension, m) {
       .def("devices", &PyLocalClient::devices)
       .def("local_devices", &PyLocalClient::local_devices)
       .def("host_id", &PyLocalClient::host_id)
+      .def("GetDefaultDeviceAssignment",
+           [](PyLocalClient* client, int num_replicas)
+               -> StatusOr<std::vector<std::shared_ptr<Device>>> {
+             TF_ASSIGN_OR_RETURN(
+                 DeviceAssignment device_assignment,
+                 client->GetDefaultDeviceAssignment(num_replicas));
+             std::vector<std::shared_ptr<Device>> result;
+             for (int i = 0; i < num_replicas; ++i) {
+               int device_id = device_assignment(i, 0);
+               auto iter = client->id_to_device().find(device_id);
+               CHECK(iter != client->id_to_device().end()) << device_id;
+               result.push_back(iter->second);
+             }
+             return result;
+           })
       .def("TransferToInfeed",
            [](PyLocalClient* client, const LiteralSlice& literal,
               int device_ordinal) {
@@ -624,10 +639,12 @@ PYBIND11_MODULE(xla_extension, m) {
   py::module ops = m.def_submodule("ops", "XLA operations");
 
   ops.def("AfterAll", &AfterAll);
-  ops.def("AllReduce",
-          static_cast<XlaOp (*)(
-              XlaOp, const XlaComputation&, absl::Span<const ReplicaGroup>,
-              const absl::optional<ChannelHandle>&)>(&AllReduce));
+  ops.def(
+      "AllReduce",
+      static_cast<XlaOp (*)(
+          XlaOp, const XlaComputation&, absl::Span<const ReplicaGroup>,
+          const absl::optional<ChannelHandle>&, const absl::optional<Shape>&)>(
+          &AllReduce));
   ops.def("AllToAll", &AllToAll);
   ops.def("CollectivePermute", &CollectivePermute);
   ops.def("CreateToken", &CreateToken);
diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py
index c8f66f704d7..a7e35a8a81f 100644
--- a/tensorflow/compiler/xla/python/xla_client.py
+++ b/tensorflow/compiler/xla/python/xla_client.py
@@ -91,6 +91,23 @@ class Backend(object):
   def compile(self, computation, compile_options):
     """Compiles a computation. Returns an executable."""
 
+  @abc.abstractmethod
+  def get_default_device_assignment(self, num_replicas):
+    """Returns the default device assignment that `compile` would use.
+
+    If `compile_options.device_assignment` isn't set, `compile` will pick a
+    deterministic device assignment based on the number of replicas, possibly
+    optimizing for device locality. This method returns that assignment, which
+    is useful for e.g. manually replicating a value before passing it to a
+    compiled executable.
+
+    Args:
+      num_replicas: the number of replicas needed.
+
+    Returns:
+      A list of Devices of length `num_replicas` indexed by replica ID.
+    """
+
 
 class LocalBackend(Backend):
   """XLA backend implemented using the in-process xla::LocalClient API."""
@@ -143,6 +160,9 @@ class LocalBackend(Backend):
                                         options, self.client,
                                         compile_options.device_assignment)
 
+  def get_default_device_assignment(self, num_replicas):
+    return self.client.GetDefaultDeviceAssignment(num_replicas)
+
   def serialize(self, executable):
     return self.client.SerializeExecutable(executable)
 
@@ -1014,7 +1034,7 @@ class ComputationBuilder(object):
     """
     replica_groups_protos = _get_replica_groups_protos(replica_groups)
     return ops.AllReduce(operand, computation.computation,
-                         replica_groups_protos, None)
+                         replica_groups_protos, None, None)
 
   def AllToAll(self,
                operand,
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index a6300d2dc73..14e6f66741e 100755
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -1505,6 +1505,7 @@ cc_library(
     hdrs = ["hlo_query.h"],
     deps = [
         ":hlo",
+        ":hlo_casting_utils",
         "//tensorflow/compiler/xla:literal",
         "//tensorflow/compiler/xla:shape_util",
         "@com_google_absl//absl/container:flat_hash_set",
diff --git a/tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc b/tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc
index eb6692ade5b..ac5edd82bee 100644
--- a/tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc
@@ -239,6 +239,7 @@ TEST_F(BFloat16ConversionFoldingTest, FoldAllReduceTupleOutput) {
   HloInstruction* crs = builder.AddInstruction(HloInstruction::CreateAllReduce(
       ShapeUtil::MakeTupleShape({f32_shape, f32_shape}), {convert_a, b}, sum,
       /*replica_groups=*/{},
+      /*constrain_layout=*/false,
       /*channel_id=*/absl::nullopt));
   HloInstruction* gte_a = builder.AddInstruction(
       HloInstruction::CreateGetTupleElement(f32_shape, crs, 0));
diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc
index f7a5ee691f3..ec93a868022 100644
--- a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc
@@ -259,6 +259,7 @@ TEST_F(BFloat16NormalizationTest, ResolveMixedPrecisionTupleAllReduce) {
   HloInstruction* crs = builder.AddInstruction(HloInstruction::CreateAllReduce(
       ShapeUtil::MakeTupleShape({f32_shape, bf16_shape}), {a, b}, reduction,
       /*replica_groups=*/{},
+      /*constrain_layout=*/false,
       /*channel_id=*/absl::nullopt));
   builder.AddInstruction(
       HloInstruction::CreateGetTupleElement(bf16_shape, crs, 1));
diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc b/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc
index d716e62d467..aee1f652abd 100644
--- a/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc
@@ -211,7 +211,8 @@ TEST_F(BFloat16PropagationTest, DoNotChangeAllReduce) {
   HloInstruction* all_reduce =
       builder.AddInstruction(HloInstruction::CreateAllReduce(
           ShapeUtil::MakeTupleShape({shape, shape}), {a, b}, reduction,
-          /*replica_groups=*/{}, /*channel_id=*/1));
+          /*replica_groups=*/{}, /*constrain_layout=*/false,
+          /*channel_id=*/1));
   HloInstruction* gte0 = builder.AddInstruction(
       HloInstruction::CreateGetTupleElement(shape, all_reduce, 0));
   HloInstruction* gte1 = builder.AddInstruction(
diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h
index c5ed810c917..37a54f86d3d 100644
--- a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h
+++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h
@@ -22,7 +22,9 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/dfs_hlo_visitor.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/statusor.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 #include "tensorflow/core/lib/core/status.h"
@@ -254,6 +256,16 @@ using ConstDfsHloVisitorWithDefault =
 // visiting.
 class DfsHloRewriteVisitor : public DfsHloVisitorWithDefault {
  public:
+  // Runs a visitor on the module and returns whether the module has changed.
+  StatusOr<bool> RunOnModule(HloModule* module) {
+    bool is_changed = false;
+    for (const auto& computation : module->computations()) {
+      TF_RETURN_IF_ERROR(computation->Accept(this));
+      is_changed |= changed();
+    }
+    return is_changed;
+  }
+
   // Default visitor action is to do nothing and return OK.
   Status DefaultAction(HloInstruction* /*hlo_instruction*/) override {
     return Status::OK();
diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 9634401fe96..eb8b848fc3f 100755
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -1196,6 +1196,9 @@ cc_library(
         ":gpu_conv_padding_legalization",
         ":gpu_conv_rewriter",
         ":gpu_layout_assignment",
+        ":reduction_degenerate_dim_remover",
+        ":reduction_dimension_grouper",
+        ":reduction_layout_normalizer",
         ":stream_executor_util",
         ":target_constants",
         "//tensorflow/compiler/xla:status_macros",
@@ -1664,3 +1667,66 @@ tf_cc_test(
         "//tensorflow/core:test",
     ],
 )
+
+cc_library(
+    name = "reduction_degenerate_dim_remover",
+    srcs = ["reduction_degenerate_dim_remover.cc"],
+    hdrs = ["reduction_degenerate_dim_remover.h"],
+    deps = [
+        ":ir_emission_utils",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_casting_utils",
+        "//tensorflow/compiler/xla/service:hlo_pass",
+        "//tensorflow/compiler/xla/service:pattern_matcher",
+        "//tensorflow/core:lib",
+        "//tensorflow/stream_executor/lib",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
+    ],
+)
+
+cc_library(
+    name = "reduction_dimension_grouper",
+    srcs = ["reduction_dimension_grouper.cc"],
+    hdrs = ["reduction_dimension_grouper.h"],
+    deps = [
+        ":ir_emission_utils",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_casting_utils",
+        "//tensorflow/compiler/xla/service:hlo_pass",
+        "//tensorflow/compiler/xla/service:pattern_matcher",
+        "//tensorflow/core:lib",
+        "//tensorflow/stream_executor/lib",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
+    ],
+)
+
+cc_library(
+    name = "reduction_layout_normalizer",
+    srcs = ["reduction_layout_normalizer.cc"],
+    hdrs = ["reduction_layout_normalizer.h"],
+    deps = [
+        ":ir_emission_utils",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status_macros",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_casting_utils",
+        "//tensorflow/compiler/xla/service:hlo_pass",
+        "//tensorflow/compiler/xla/service:pattern_matcher",
+        "//tensorflow/core:lib",
+        "//tensorflow/stream_executor/lib",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
+    ],
+)
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
old mode 100755
new mode 100644
index 6404c6d826f..30b204e6fd5
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -161,6 +161,12 @@ Status GpuCompiler::OptimizeHloModule(
       // where possible.  Not every batchnorm op can be implemented as a call to
       // cudnn, so decompose any remaining batchnorm ops into a soup of HLOs.
       if (hlo_module->config().debug_options().xla_gpu_use_cudnn_batchnorm()) {
+        // Since BatchNorm inference is essentially pointwise operations, it is
+        // always advantageous to use kernel fusion rather than cudnn.
+        pass.AddPass<BatchNormExpander>(
+            /*rewrite_training_op=*/false,
+            /*rewrite_inference_op=*/true,
+            /*rewrite_grad_op=*/false);
         pass.AddPass<CudnnBatchNormRewriter>();
       }
       pass.AddPass<BatchNormExpander>(
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
index 72f69ca2017..b2067fe916d 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
@@ -124,6 +124,24 @@ bool IsCublasGemm(const HloInstruction& hlo) {
          hlo.custom_call_target() == kGemmCallTarget;
 }
 
+std::array<int64, 3> GetReductionTiling(
+    const ReductionDimensions& reduction_dimensions) {
+  if (reduction_dimensions.is_row_reduction) {
+    int64 tile_z = std::min(reduction_dimensions.dimensions[0], 8LL);
+    if (reduction_dimensions.dimensions[1] == 1) {
+      CHECK_EQ(reduction_dimensions.dimensions[0], 1);
+      return {tile_z, 1, 16};
+    }
+    if (reduction_dimensions.dimensions[2] % (kWarpSize * 64) == 0) {
+      return {tile_z, 1, 64};
+    }
+    return {tile_z, 1, 8};
+  }
+
+  // Column reduction.
+  return {1, 128, 1};
+}
+
 const char* const kCudnnBatchNormForwardInferenceCallTarget =
     "__cudnn$batchNormalizationForwardInference";
 const char* const kCudnnBatchNormForwardTrainingCallTarget =
@@ -201,8 +219,7 @@ bool IsReductionFromOrToContiguousDimensions(const HloInstruction& reduce) {
   }
 
   ReductionDimensions reduction_dimensions =
-      GetReductionKindAndContiguousComponents(input->shape(),
-                                              reduce.dimensions());
+      GetReductionKindAndContiguousComponents(reduce);
 
   if (reduction_dimensions.is_row_reduction) {
     // For row reduction, the tile block is 1 x tile_size_x, and we are reducing
@@ -218,7 +235,9 @@ bool IsReductionFromOrToContiguousDimensions(const HloInstruction& reduce) {
 }
 
 ReductionDimensions GetReductionKindAndContiguousComponents(
-    const Shape& input_shape, absl::Span<const int64> dims_to_reduce) {
+    const HloInstruction& reduce) {
+  const Shape& input_shape = reduce.operand(0)->shape();
+  absl::Span<const int64> dims_to_reduce = reduce.dimensions();
   DimensionVector dims_to_keep;
   for (int64 dim = 0; dim < input_shape.rank(); ++dim) {
     if (!absl::c_linear_search(dims_to_reduce, dim)) {
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
index db3cd228841..2c37a63c05a 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
@@ -169,14 +169,17 @@ struct ReductionDimensions {
   std::array<int64, 3> dimensions;
 };
 
-// Given the input shape and dimensions to reduce for a reduction, returns
-// ReductionDimensions.
+// Given the reduction operation, returns ReductionDimensions.
 //
 // Prerequisite: the reduction instruction passes the check
 // IsReductionFromOrToContiguousDimensions, which guarantees either the
 // dimensions to reduce or the dimensions to keep are consecutive.
 ReductionDimensions GetReductionKindAndContiguousComponents(
-    const Shape& input_shape, absl::Span<const int64> dims_to_reduce);
+    const HloInstruction& reduce);
+
+// Get tiling per thread for the given reduction in dimensions [D, H, W].
+std::array<int64, 3> GetReductionTiling(
+    const ReductionDimensions& reduction_dimensions);
 
 // Emits call to "vprintf" with given format and arguments.
 llvm::Value* EmitPrintf(absl::string_view fmt,
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index dbc2c95773a..2f8fd5e01cf 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -1988,10 +1988,11 @@ static int GetNumberOfPartialResults(
   if (reduction_info.IsRowReduction()) {
     return 1;
   }
-  int64 num_thread = mapping_scheme.GetNumberOfThreadsForDimensionX();
-  int64 tile_size = mapping_scheme.GetTileSizeForDimensionX();
-  CHECK_EQ(tile_size % num_thread, 0);
-  return tile_size / num_thread;
+  int64 num_partial_results = mapping_scheme.DilatedX() ? 1 : 2;
+  CHECK_EQ(num_partial_results,
+           (mapping_scheme.GetTileSizeForDimensionX() /
+            mapping_scheme.GetNumberOfThreadsForDimensionX()));
+  return num_partial_results;
 }
 
 void IrEmitterUnnested::EmitPrologueForOneReduction(
@@ -2876,36 +2877,26 @@ ReductionCodegenInfo IrEmitterUnnested::ComputeReductionCodegenInfo(
     const HloInstruction* unnested_hlo, const HloInstruction* first_reduce) {
   const Shape& input_shape = first_reduce->operand(0)->shape();
   ReductionDimensions reduction_dimensions =
-      GetReductionKindAndContiguousComponents(input_shape,
-                                              first_reduce->dimensions());
+      GetReductionKindAndContiguousComponents(*first_reduce);
   VLOG(10) << "is_row_reduction " << reduction_dimensions.is_row_reduction
            << " " << reduction_dimensions.dimensions[0] << " "
            << reduction_dimensions.dimensions[1] << " "
            << reduction_dimensions.dimensions[2];
 
+  std::array<int64, 3> reduction_tiling =
+      GetReductionTiling(reduction_dimensions);
+  int64 tile_size_y = reduction_tiling[1];
+  int64 block_size_z = reduction_tiling[0];
+  bool dilated_x =
+      !reduction_dimensions.is_row_reduction &&
+      !IsUnrollingColumnReductionBeneficial(unnested_hlo, input_shape,
+                                            reduction_dimensions.dimensions[2]);
+
   int64 tile_size_x = 1;
-  int64 tile_size_y = 1;
-  int64 block_size_z = 1;
   int64 num_threads_x = 1;
-  bool dilated_x = true;
   if (reduction_dimensions.is_row_reduction) {
     num_threads_x = kWarpSize;
-    if (reduction_dimensions.dimensions[1] == 1) {
-      // Scalar reduction is handled differently than the other kind of row
-      // reduction.
-      CHECK_EQ(reduction_dimensions.dimensions[0], 1);
-      tile_size_x = kWarpSize * 16;
-    } else {
-      if (reduction_dimensions.dimensions[2] % (kWarpSize * 64) == 0) {
-        tile_size_x = kWarpSize * 64;
-      } else {
-        tile_size_x = kWarpSize * 8;
-        block_size_z = 8;
-        while (reduction_dimensions.dimensions[0] % block_size_z != 0) {
-          block_size_z -= 1;
-        }
-      }
-    }
+    tile_size_x = reduction_tiling[2] * kWarpSize;
   } else {
     // Column reduction without transpose doesn't require communication among
     // threads processing elements in the same tile. The current implementation
@@ -2915,20 +2906,17 @@ ReductionCodegenInfo IrEmitterUnnested::ComputeReductionCodegenInfo(
     // num_threads_x and tile_size_x to allow a bigger hardware thread block.
     int64 hw_threads_per_block_limit =
         ThreadsPerBlockLimit(ir_emitter_context_->device_description());
-    if (IsUnrollingColumnReductionBeneficial(
-            unnested_hlo, input_shape, reduction_dimensions.dimensions[2])) {
+    if (!dilated_x) {
       // Vectorized loads: two elements per thread.
       tile_size_x = std::min(2 * hw_threads_per_block_limit,
                              reduction_dimensions.dimensions[2]);
       num_threads_x = tile_size_x / 2;
-      dilated_x = false;
     } else {
       // One element per thread.
       tile_size_x = std::min(hw_threads_per_block_limit,
                              reduction_dimensions.dimensions[2]);
       num_threads_x = tile_size_x;
     }
-    tile_size_y = 128;
   }
 
   KernelMappingScheme mapping_scheme(
diff --git a/tensorflow/compiler/xla/service/gpu/kernel_mapping_scheme.h b/tensorflow/compiler/xla/service/gpu/kernel_mapping_scheme.h
index 345abbd0935..2eede7036cf 100644
--- a/tensorflow/compiler/xla/service/gpu/kernel_mapping_scheme.h
+++ b/tensorflow/compiler/xla/service/gpu/kernel_mapping_scheme.h
@@ -85,15 +85,14 @@ class KernelMappingScheme {
         dims_in_tiles_{dims_in_elems[0],
                        CeilOfRatio<int64>(dims_in_elems[1], tile_size_y),
                        CeilOfRatio<int64>(dims_in_elems[2], tile_size_x)},
-        dims_in_blocks_{dims_in_tiles_[0] / block_size_z, dims_in_tiles_[1],
-                        dims_in_tiles_[2]},
+        dims_in_blocks_{CeilOfRatio<int64>(dims_in_tiles_[0], block_size_z),
+                        dims_in_tiles_[1], dims_in_tiles_[2]},
         block_size_z_{block_size_z},
         num_threads_x_(num_threads_x),
         num_threads_y_(num_threads_y),
         dilated_x_(is_dilated_x) {
     CHECK_EQ(tile_size_y % num_threads_y_, 0);
     CHECK_EQ(tile_size_x % num_threads_x_, 0);
-    CHECK_EQ((dims_in_elems[0] % block_size_z), 0);
     VLOG(10) << "dims_in_elems_ = " << absl::StrJoin(dims_in_elems_, ",");
     VLOG(10) << "dims_in_tiles_ = " << absl::StrJoin(dims_in_tiles_, ",");
     VLOG(10) << "dims_in_blocks_ = " << absl::StrJoin(dims_in_blocks_, ",");
diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
index 489cbd101e2..6635b68899d 100755
--- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc
@@ -32,6 +32,9 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/gpu/gpu_conv_rewriter.h"
 #include "tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h"
 #include "tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.h"
+#include "tensorflow/compiler/xla/service/gpu/reduction_degenerate_dim_remover.h"
+#include "tensorflow/compiler/xla/service/gpu/reduction_dimension_grouper.h"
+#include "tensorflow/compiler/xla/service/gpu/reduction_layout_normalizer.h"
 #include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h"
 #include "tensorflow/compiler/xla/service/gpu/target_constants.h"
 #include "tensorflow/compiler/xla/service/hlo_constant_folding.h"
@@ -154,6 +157,10 @@ Status NVPTXCompiler::OptimizeHloPostLayoutAssignment(
       /*allow_mixed_precision=*/false,
       LayoutAssignment::InstructionCanChangeLayout);
 
+  pipeline.AddPass<ReductionDegenerateDimRemover>();
+  pipeline.AddPass<ReductionLayoutNormalizer>();
+  pipeline.AddPass<ReductionDimensionGrouper>();
+
   // The LayoutAssignment pass may leave behind kCopy instructions which are
   // duplicate or NOPs, so remove them with algebraic simplification and CSE.
   AlgebraicSimplifierOptions options;
diff --git a/tensorflow/compiler/xla/service/gpu/reduction_degenerate_dim_remover.cc b/tensorflow/compiler/xla/service/gpu/reduction_degenerate_dim_remover.cc
new file mode 100644
index 00000000000..2c786b577fc
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/reduction_degenerate_dim_remover.cc
@@ -0,0 +1,92 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/gpu/reduction_degenerate_dim_remover.h"
+
+#include <algorithm>
+
+#include "absl/algorithm/container.h"
+#include "absl/strings/str_join.h"
+#include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
+#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
+#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/service/pattern_matcher.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/status_macros.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/stream_executor/lib/statusor.h"
+
+namespace xla {
+namespace gpu {
+
+class ReductionDegenerateDimRemoverVisitor : public DfsHloRewriteVisitor {
+ public:
+  Status HandleReduce(HloInstruction *instr) override {
+    HloInstruction *reduced_op = instr->mutable_operand(0);
+    const Shape &input_shape = reduced_op->shape();
+    const Shape &reduce_shape = instr->shape();
+
+    if (!instr->shape().IsArray() ||
+        !ShapeUtil::HasDegenerateDimensions(reduced_op->shape())) {
+      return Status::OK();
+    }
+    Shape canonical_input_shape =
+        ShapeUtil::DropDegenerateDimensions(input_shape);
+
+    Shape canonical_reduce_shape =
+        ShapeUtil::DropDegenerateDimensions(reduce_shape);
+
+    const std::vector<int64> &reduced_dimensions = instr->dimensions();
+    std::vector<int64> updated_reduced_dimensions;
+    int64 shift = 0;
+
+    for (int dim = 0; dim < input_shape.rank(); dim++) {
+      if (input_shape.dimensions(dim) == 1) {
+        shift++;
+      } else {
+        if (absl::c_linear_search(reduced_dimensions, dim)) {
+          updated_reduced_dimensions.push_back(dim - shift);
+        }
+      }
+    }
+
+    HloInstruction *input_reshape = instr->parent()->AddInstruction(
+        HloInstruction::CreateBitcast(canonical_input_shape, reduced_op));
+
+    std::unique_ptr<HloInstruction> new_reduce = HloInstruction::CreateReduce(
+        canonical_reduce_shape, input_reshape, instr->mutable_operand(1),
+        updated_reduced_dimensions, instr->to_apply());
+
+    if (canonical_reduce_shape != reduce_shape) {
+      HloInstruction *wrapped_reduce =
+          instr->parent()->AddInstruction(std::move(new_reduce));
+      new_reduce = HloInstruction::CreateBitcast(reduce_shape, wrapped_reduce);
+    }
+
+    return ReplaceWithNewInstruction(instr, std::move(new_reduce));
+  }
+};
+
+StatusOr<bool> ReductionDegenerateDimRemover::Run(HloModule *module) {
+  TF_ASSIGN_OR_RETURN(
+      bool changed, ReductionDegenerateDimRemoverVisitor().RunOnModule(module));
+  return changed;
+}
+
+}  // namespace gpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/reduction_degenerate_dim_remover.h b/tensorflow/compiler/xla/service/gpu/reduction_degenerate_dim_remover.h
new file mode 100644
index 00000000000..eeb26da607a
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/reduction_degenerate_dim_remover.h
@@ -0,0 +1,53 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_REDUCTION_DEGENERATE_DIM_REMOVER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_REDUCTION_DEGENERATE_DIM_REMOVER_H_
+
+#include "absl/types/optional.h"
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+
+namespace xla {
+namespace gpu {
+
+// Enforces the invariant that reduction input and output have no degenerate
+// (size 1) dimension. Since these dimensions are physically meaningless, they
+// are removed using bitcasts.
+//
+// For example,
+//
+//   f[1] out = reduce(f[100, 1, 1] input, dimensions={0, 1})
+//
+// becomes:
+//
+//
+//   f[100] tmp1 = f[100] bitcast(f[100, 1, 1], input)
+//   f[] tmp2 = reduce(f[100] tmp1, dimensions={0})
+//   f[1] out = f[] bitcast(tmp2)
+//
+class ReductionDegenerateDimRemover : public HloModulePass {
+ public:
+  absl::string_view name() const override {
+    return "reduction-degenerate-dim-remover";
+  }
+
+  StatusOr<bool> Run(HloModule* module) override;
+};
+
+}  // namespace gpu
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_REDUCTION_DEGENERATE_DIM_REMOVER_H_
diff --git a/tensorflow/compiler/xla/service/gpu/reduction_dimension_grouper.cc b/tensorflow/compiler/xla/service/gpu/reduction_dimension_grouper.cc
new file mode 100644
index 00000000000..66b458e1ba4
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/reduction_dimension_grouper.cc
@@ -0,0 +1,107 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/gpu/reduction_dimension_grouper.h"
+
+#include <algorithm>
+
+#include "absl/algorithm/container.h"
+#include "absl/strings/str_join.h"
+#include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
+#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
+#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/service/pattern_matcher.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/status_macros.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/stream_executor/lib/statusor.h"
+
+namespace xla {
+namespace gpu {
+
+class ReduceDimensionGroupVisitor : public DfsHloRewriteVisitor {
+ public:
+  Status HandleReduce(HloInstruction *reduce) override {
+    VLOG(4) << "Input: " << reduce->ToString();
+
+    if (!reduce->shape().IsArray()) {
+      // TODO(cheshire): Handle variadic reduction.
+      return Status::OK();
+    }
+
+    std::vector<int64> new_grouped_dims;
+    std::vector<int64> reduced_dims_grouped;
+    HloInstruction *operand = reduce->mutable_operand(0);
+    const Shape &shape = operand->shape();
+    CHECK(shape == LayoutUtil::GetWithDefaultLayout(shape))
+        << "Default layout should be enforced on reduction operand";
+    auto is_reduced = [&](int dim) {
+      return absl::c_linear_search(reduce->dimensions(), dim);
+    };
+
+    bool changed = false;
+    int64 next_dim_size = 1;
+
+    // Since we have enforced the standard layout, iteration over logical
+    // dimensions is equivalent to iteration over the major-to-minor order.
+    for (int logical_dim = 0; logical_dim < shape.rank(); logical_dim++) {
+      VLOG(5) << "Processing dimension " << logical_dim << " of size "
+              << shape.dimensions(logical_dim);
+      if (is_reduced(logical_dim) && logical_dim < shape.rank() - 1 &&
+          is_reduced(logical_dim + 1)) {
+        VLOG(5) << "This and consecutive dimension are reduced, merging";
+        changed = true;
+        next_dim_size *= shape.dimensions(logical_dim);
+        continue;
+      }
+
+      if (is_reduced(logical_dim)) {
+        new_grouped_dims.push_back(next_dim_size *
+                                   shape.dimensions(logical_dim));
+        reduced_dims_grouped.push_back(new_grouped_dims.size() - 1);
+        next_dim_size = 1;
+      } else {
+        new_grouped_dims.push_back(shape.dimensions(logical_dim));
+      }
+    }
+
+    if (!changed) {
+      return Status::OK();
+    }
+
+    Shape grouped_shape =
+        ShapeUtil::MakeShape(shape.element_type(), new_grouped_dims);
+    HloInstruction *reduce_input_grouped = reduce->parent()->AddInstruction(
+        HloInstruction::CreateBitcast(grouped_shape, operand));
+
+    std::unique_ptr<HloInstruction> new_reduce = HloInstruction::CreateReduce(
+        reduce->shape(), reduce_input_grouped, reduce->mutable_operand(1),
+        reduced_dims_grouped, reduce->to_apply());
+    VLOG(5) << "Generated new reduction: " << new_reduce->ToString();
+    return ReplaceWithNewInstruction(reduce, std::move(new_reduce));
+  }
+};
+
+StatusOr<bool> ReductionDimensionGrouper::Run(HloModule *module) {
+  TF_ASSIGN_OR_RETURN(bool changed,
+                      ReduceDimensionGroupVisitor().RunOnModule(module));
+  return changed;
+}
+
+}  // namespace gpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/reduction_dimension_grouper.h b/tensorflow/compiler/xla/service/gpu/reduction_dimension_grouper.h
new file mode 100644
index 00000000000..8a78d3fca07
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/reduction_dimension_grouper.h
@@ -0,0 +1,54 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_REDUCTION_DIMENSION_GROUPER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_REDUCTION_DIMENSION_GROUPER_H_
+
+#include "absl/types/optional.h"
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+
+namespace xla {
+namespace gpu {
+
+// Groups adjacent (logically and physically) reduced dimensions in reduction
+// input.
+//
+// Precondition: ReductionLayoutNormalizer has been run (physical proximity and
+// logical proximity become the same).
+//
+// For example,
+//
+//   f[] out = reduce(f[10,20,30] input, dimensions={0,1,2})
+//
+// becomes:
+//
+//   f[600] tmp = f[600] bitcast(f[10,20,30] input)
+//   f[] out = reduce(f[600] tmp, dimensions={0})
+//
+// TODO(cheshire): handle variadic reduction
+class ReductionDimensionGrouper : public HloModulePass {
+ public:
+  absl::string_view name() const override {
+    return "reduction-dimension-grouper";
+  }
+
+  StatusOr<bool> Run(HloModule* module) override;
+};
+
+}  // namespace gpu
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_REDUCTION_DIMENSION_GROUPER_H_
diff --git a/tensorflow/compiler/xla/service/gpu/reduction_layout_normalizer.cc b/tensorflow/compiler/xla/service/gpu/reduction_layout_normalizer.cc
new file mode 100644
index 00000000000..295ccebd442
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/reduction_layout_normalizer.cc
@@ -0,0 +1,129 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/gpu/reduction_layout_normalizer.h"
+
+#include <algorithm>
+
+#include "absl/algorithm/container.h"
+#include "absl/strings/str_join.h"
+#include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h"
+#include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h"
+#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
+#include "tensorflow/compiler/xla/service/hlo_opcode.h"
+#include "tensorflow/compiler/xla/service/pattern_matcher.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/status_macros.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/stream_executor/lib/statusor.h"
+
+namespace xla {
+namespace gpu {
+
+class EnforceMinorToMajorReduceOpVisitor : public DfsHloRewriteVisitor {
+  Status HandleReduce(HloInstruction *reduce) override {
+    VLOG(5) << "Input: " << reduce->ToString();
+    HloInstruction *operand = reduce->mutable_operand(0);
+    const Shape &operand_shape = operand->shape();
+    const Layout &operand_layout = operand_shape.layout();
+    const Shape &reduce_shape = reduce->shape();
+
+    if (!reduce_shape.IsArray()) {
+      // TODO(cheshire): Handle variadic reduction.
+      return Status::OK();
+    }
+
+    std::vector<int64> new_reduce_dimensions;
+    std::vector<int64> new_operand_shape_data;
+    std::vector<int64> new_reduce_shape_data;
+
+    // The layout order of the reduction output can be different to the
+    // ordering of kept dimensions in the input operand, thus we need to
+    // calculate the new layout.
+    std::vector<int64> new_reduce_shape_layout(reduce_shape.rank());
+    std::vector<int64> reduce_shape_logical_to_physical =
+        LayoutUtil::MakeLogicalToPhysical(reduce_shape.layout());
+
+    auto to_reduce_logical_dim = [&](int64 op_logical_dim) {
+      return op_logical_dim -
+             absl::c_count_if(reduce->dimensions(), [&](int64 dim) {
+               CHECK(dim != op_logical_dim);
+               return dim < op_logical_dim;
+             });
+    };
+
+    for (int i = 0; i < operand_shape.rank(); i++) {
+      // Process the dimensions in the major-to-minor order in order to enforce
+      // the default layout.
+      int64 major_to_minor_dim_idx = operand_shape.rank() - i - 1;
+      int64 logical_dim = operand_layout.minor_to_major(major_to_minor_dim_idx);
+      int64 dim_size = operand_shape.dimensions(logical_dim);
+      VLOG(5) << "Processing logical dimension " << logical_dim << " of size "
+              << dim_size;
+      new_operand_shape_data.push_back(dim_size);
+
+      if (absl::c_linear_search(reduce->dimensions(), logical_dim)) {
+        new_reduce_dimensions.push_back(i);
+      } else {
+        new_reduce_shape_data.push_back(dim_size);
+        int64 logical_reduce_dim = to_reduce_logical_dim(logical_dim);
+        int64 physical_reduce_dim =
+            reduce_shape_logical_to_physical[logical_reduce_dim];
+        VLOG(5) << "logical_reduce_dim = " << logical_reduce_dim << ", "
+                << "physical_reduce_dim = " << physical_reduce_dim;
+        new_reduce_shape_layout[reduce_shape.rank() - physical_reduce_dim - 1] =
+            new_reduce_shape_data.size() - 1;
+      }
+    }
+
+    Shape new_operand_shape = ShapeUtil::MakeShape(operand_shape.element_type(),
+                                                   new_operand_shape_data);
+    if (new_operand_shape == operand_shape) {
+      return Status::OK();
+    }
+
+    Shape new_reduce_shape = ShapeUtil::MakeShapeWithLayout(
+        reduce_shape.element_type(), new_reduce_shape_data,
+        new_reduce_shape_layout);
+    HloInstruction *canonical_reduce_input = reduce->parent()->AddInstruction(
+        HloInstruction::CreateBitcast(new_operand_shape, operand));
+
+    VLOG(5) << "Reduction input: " << canonical_reduce_input->ToString();
+    std::unique_ptr<HloInstruction> new_reduce = HloInstruction::CreateReduce(
+        new_reduce_shape, canonical_reduce_input, reduce->mutable_operand(1),
+        new_reduce_dimensions, reduce->to_apply());
+    VLOG(5) << "Generated new reduction: " << new_reduce->ToString();
+
+    if (new_reduce_shape != reduce_shape) {
+      HloInstruction *wrapped_reduce =
+          reduce->parent()->AddInstruction(std::move(new_reduce));
+      new_reduce = HloInstruction::CreateBitcast(reduce_shape, wrapped_reduce);
+    }
+
+    VLOG(5) << "Generated output: " << new_reduce->ToString();
+    return ReplaceWithNewInstruction(reduce, std::move(new_reduce));
+  }
+};
+
+StatusOr<bool> ReductionLayoutNormalizer::Run(HloModule *module) {
+  TF_ASSIGN_OR_RETURN(bool changed,
+                      EnforceMinorToMajorReduceOpVisitor().RunOnModule(module));
+  return changed;
+}
+
+}  // namespace gpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/reduction_layout_normalizer.h b/tensorflow/compiler/xla/service/gpu/reduction_layout_normalizer.h
new file mode 100644
index 00000000000..d27c847f8ea
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/reduction_layout_normalizer.h
@@ -0,0 +1,50 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_REDUCTION_LAYOUT_NORMALIZER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_REDUCTION_LAYOUT_NORMALIZER_H_
+
+#include "absl/types/optional.h"
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_pass_interface.h"
+
+namespace xla {
+namespace gpu {
+
+// Enforces default (minor-to-major) layout on all reduction inputs.
+// Note that since reduction output can request a custom layout,
+// this pass only guarantees standard layout for the input.
+//
+// For example,
+//
+//   f[20,30]{0,1} out = reduce(f[10,20,30]{2,0,1} input, dimensions={0})
+//
+// becomes:
+//
+//   f[20,10,30] tmp = f[20,10,30] bitcast(f[10,20,30]{2,0,1} input)
+//   f[20,30]{0,1} out = reduce(f[20,10,30]{2,1,0} tmp, dimensions={1})
+class ReductionLayoutNormalizer : public HloModulePass {
+ public:
+  absl::string_view name() const override {
+    return "reduction-layout-normalizer";
+  }
+
+  StatusOr<bool> Run(HloModule* module) override;
+};
+
+}  // namespace gpu
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_REDUCTION_LAYOUT_NORMALIZER_H_
diff --git a/tensorflow/compiler/xla/service/gpu/tests/BUILD b/tensorflow/compiler/xla/service/gpu/tests/BUILD
index 11cb5f0cbf7..51a12e1f2fe 100644
--- a/tensorflow/compiler/xla/service/gpu/tests/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/tests/BUILD
@@ -81,6 +81,87 @@ tf_cc_test(
     ],
 )
 
+tf_cc_test(
+    name = "reduction_degenerate_dim_remover_test",
+    srcs = [
+        "reduction_degenerate_dim_remover_test.cc",
+    ],
+    tags = tf_cuda_tests_tags(),
+    deps = [
+        ":gpu_codegen_test",
+        "//tensorflow/compiler/xla:debug_options_flags",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla/service:gpu_plugin",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_module_config",
+        "//tensorflow/compiler/xla/service:hlo_parser",
+        "//tensorflow/compiler/xla/service/gpu:gemm_rewriter",
+        "//tensorflow/compiler/xla/service/gpu:gpu_executable",
+        "//tensorflow/compiler/xla/tests:filecheck",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:llvm_irgen_test_base",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/stream_executor/lib",
+        "@com_google_absl//absl/memory",
+    ],
+)
+
+tf_cc_test(
+    name = "reduction_layout_normalizer_test",
+    srcs = [
+        "reduction_layout_normalizer_test.cc",
+    ],
+    tags = tf_cuda_tests_tags(),
+    deps = [
+        ":gpu_codegen_test",
+        "//tensorflow/compiler/xla:debug_options_flags",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla/service:gpu_plugin",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_module_config",
+        "//tensorflow/compiler/xla/service:hlo_parser",
+        "//tensorflow/compiler/xla/service/gpu:gemm_rewriter",
+        "//tensorflow/compiler/xla/service/gpu:gpu_executable",
+        "//tensorflow/compiler/xla/tests:filecheck",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:llvm_irgen_test_base",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/stream_executor/lib",
+        "@com_google_absl//absl/memory",
+    ],
+)
+
+tf_cc_test(
+    name = "reduction_dimension_grouper_test",
+    srcs = [
+        "reduction_dimension_grouper_test.cc",
+    ],
+    tags = tf_cuda_tests_tags(),
+    deps = [
+        ":gpu_codegen_test",
+        "//tensorflow/compiler/xla:debug_options_flags",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla/service:gpu_plugin",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_module_config",
+        "//tensorflow/compiler/xla/service:hlo_parser",
+        "//tensorflow/compiler/xla/service/gpu:gemm_rewriter",
+        "//tensorflow/compiler/xla/service/gpu:gpu_executable",
+        "//tensorflow/compiler/xla/tests:filecheck",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:llvm_irgen_test_base",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:test",
+        "//tensorflow/core:test_main",
+        "//tensorflow/stream_executor/lib",
+        "@com_google_absl//absl/memory",
+    ],
+)
+
 tf_cc_test(
     name = "gpu_copy_test",
     srcs = ["gpu_copy_test.cc"],
diff --git a/tensorflow/compiler/xla/service/gpu/tests/gpu_kernel_tiling_test.cc b/tensorflow/compiler/xla/service/gpu/tests/gpu_kernel_tiling_test.cc
index 92bb84065a2..ae10fb161d6 100644
--- a/tensorflow/compiler/xla/service/gpu/tests/gpu_kernel_tiling_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/tests/gpu_kernel_tiling_test.cc
@@ -461,7 +461,7 @@ TEST_F(GpuKernelTilingTest, ColumnReductionWithLayoutChangeTiled) {
           .ValueOrDie();
   CompileAndVerifyIr(std::move(hlo_module),
                      R"(
-; CHECK-LABEL: define void @reduce
+; CHECK-LABEL: define void @
 ; CHECK: atomicrmw fadd float
 ; CHECK: }
 )",
diff --git a/tensorflow/compiler/xla/service/gpu/tests/reduction_degenerate_dim_remover_test.cc b/tensorflow/compiler/xla/service/gpu/tests/reduction_degenerate_dim_remover_test.cc
new file mode 100644
index 00000000000..686092706f7
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/tests/reduction_degenerate_dim_remover_test.cc
@@ -0,0 +1,72 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <utility>
+
+#include "tensorflow/compiler/xla/service/gpu/gpu_executable.h"
+#include "tensorflow/compiler/xla/service/gpu/tests/gpu_codegen_test.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_module_config.h"
+#include "tensorflow/compiler/xla/service/hlo_parser.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/compiler/xla/tests/filecheck.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/stream_executor/lib/statusor.h"
+
+namespace xla {
+namespace gpu {
+
+namespace {
+
+class ReductionDegenerateDimRemoverTest : public GpuCodegenTest {
+  DebugOptions GetDebugOptionsForTest() override {
+    DebugOptions debug_options = GpuCodegenTest::GetDebugOptionsForTest();
+    debug_options.add_xla_disable_hlo_passes("reduction-layout-normalizer");
+    debug_options.add_xla_disable_hlo_passes("reduction-dimension-grouper");
+    return debug_options;
+  }
+};
+
+TEST_F(ReductionDegenerateDimRemoverTest, ReductionWithDegenerateDimensions) {
+  const char* hlo_text = R"(
+HloModule ReduceWithDegenerateDimensions
+
+add {
+  accum = f32[] parameter(0)
+  op = f32[] parameter(1)
+  ROOT out = f32[] add(accum, op)
+}
+
+ENTRY main {
+  input = f32[1,3,1,4,1,5,1] parameter(0)
+  zero = f32[] constant(0)
+
+  ROOT out = f32[1,1,1,1] reduce(input, zero), dimensions={1,3,5}, to_apply=add
+}
+
+)";
+
+  EXPECT_TRUE(RunAndCompare(hlo_text, ErrorSpec{1e-5, 1e-5}));
+  MatchOptimizedHloWithShapes(hlo_text,
+                              R"(
+// CHECK: f32[] reduce(f32[3,4,5]{2,1,0} {{.+}}, f32[] {{.+}}), dimensions={0,1,2}, to_apply=%add
+      )");
+}
+
+}  // namespace
+}  // namespace gpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/tests/reduction_dimension_grouper_test.cc b/tensorflow/compiler/xla/service/gpu/tests/reduction_dimension_grouper_test.cc
new file mode 100644
index 00000000000..a9e0b9b5c5f
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/tests/reduction_dimension_grouper_test.cc
@@ -0,0 +1,70 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <utility>
+
+#include "tensorflow/compiler/xla/service/gpu/gpu_executable.h"
+#include "tensorflow/compiler/xla/service/gpu/tests/gpu_codegen_test.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_parser.h"
+#include "tensorflow/compiler/xla/tests/filecheck.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace xla {
+namespace gpu {
+
+namespace {
+
+class ReductionDimensionGrouperTest : public GpuCodegenTest {
+  DebugOptions GetDebugOptionsForTest() override {
+    DebugOptions debug_options = GpuCodegenTest::GetDebugOptionsForTest();
+    debug_options.add_xla_disable_hlo_passes("reduction-layout-normalizer");
+    debug_options.add_xla_disable_hlo_passes("layout-assignment");
+    return debug_options;
+  }
+};
+
+TEST_F(ReductionDimensionGrouperTest, ReductionWithGrouping) {
+  const char* hlo_text = R"(
+HloModule ReductionWithGrouping
+
+add {
+  accum = f32[] parameter(0)
+  op = f32[] parameter(1)
+  ROOT out = f32[] add(accum, op)
+}
+
+ENTRY main {
+  input = f32[100,10,32,3]{3,2,1,0} parameter(0)
+  zero = f32[] constant(0)
+
+  ROOT out = f32[100,10]{0,1} reduce(input, zero), dimensions={2,3}, to_apply=add
+}
+
+
+)";
+
+  EXPECT_TRUE(RunAndCompare(hlo_text, ErrorSpec{1e-5, 1e-5}));
+  MatchOptimizedHloWithShapes(hlo_text,
+                              R"(
+// CHECK: f32[100,10]{0,1} reduce(f32[100,10,96]{2,1,0} {{.+}}, f32[] {{.+}}), dimensions={2}, to_apply=%add
+      )");
+}
+
+}  // namespace
+}  // namespace gpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/gpu/tests/reduction_layout_normalizer_test.cc b/tensorflow/compiler/xla/service/gpu/tests/reduction_layout_normalizer_test.cc
new file mode 100644
index 00000000000..49b8bbf1d6b
--- /dev/null
+++ b/tensorflow/compiler/xla/service/gpu/tests/reduction_layout_normalizer_test.cc
@@ -0,0 +1,69 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <utility>
+
+#include "tensorflow/compiler/xla/service/gpu/gpu_executable.h"
+#include "tensorflow/compiler/xla/service/gpu/tests/gpu_codegen_test.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_parser.h"
+#include "tensorflow/compiler/xla/tests/filecheck.h"
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace xla {
+namespace gpu {
+
+namespace {
+
+class ReductionLayoutNormalizerTest : public GpuCodegenTest {
+  DebugOptions GetDebugOptionsForTest() override {
+    DebugOptions debug_options = GpuCodegenTest::GetDebugOptionsForTest();
+    debug_options.add_xla_disable_hlo_passes("reduction-dimension-grouper");
+    debug_options.add_xla_disable_hlo_passes("layout-assignment");
+    return debug_options;
+  }
+};
+
+TEST_F(ReductionLayoutNormalizerTest, LayoutCanonicalizerTest) {
+  const char* hlo_text = R"(
+HloModule ReduceWithLayoutChange
+
+add {
+  x0 = f32[] parameter(0)
+  y0 = f32[] parameter(1)
+  ROOT add0 = f32[] add(x0, y0)
+}
+
+ENTRY main {
+  arg0 = f32[4,5,5,16,12,12,3,3]{2,3,5,4,0,7,6,1}  parameter(0)
+  constant0 = f32[] constant(0)
+  ROOT reduce0 = f32[4,5,16,12,12]{4,3,2,1,0} reduce(arg0, constant0),
+    dimensions={1,6,7}, to_apply=add
+}
+
+)";
+
+  EXPECT_TRUE(RunAndCompare(hlo_text, ErrorSpec{1e-5, 1e-5}));
+  MatchOptimizedHloWithShapes(hlo_text,
+                              R"(
+// CHECK: f32[4,12,12,16,5]{2,1,3,4,0} reduce(f32[5,3,3,4,12,12,16,5]{7,6,5,4,3,2,1,0} {{.+}}, f32[] {{.+}}), dimensions={0,1,2}, to_apply=%add
+      )");
+}
+
+}  // namespace
+}  // namespace gpu
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 368a3876f8c..bc099371d08 100755
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -400,6 +400,7 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
           /*replica_groups=*/
           std::vector<ReplicaGroup>(proto.replica_groups().begin(),
                                     proto.replica_groups().end()),
+          /*constrain_layout=*/proto.constrain_layout(),
           /*channel_id=*/channel_id);
       break;
     }
@@ -900,10 +901,11 @@ HloInstruction::CreateReducePrecision(const Shape& shape,
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateAllReduce(
     const Shape& shape, absl::Span<HloInstruction* const> operands,
     HloComputation* reduce_computation,
-    const std::vector<ReplicaGroup>& replica_groups,
+    const std::vector<ReplicaGroup>& replica_groups, bool constrain_layout,
     const absl::optional<int64>& channel_id) {
   return absl::make_unique<HloAllReduceInstruction>(
-      shape, operands, reduce_computation, replica_groups, channel_id);
+      shape, operands, reduce_computation, replica_groups, constrain_layout,
+      channel_id);
 }
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateAllToAll(
@@ -1341,7 +1343,8 @@ bool HloInstruction::HasSideEffectNoRecurse() const {
     case HloOpcode::kTrace:
       return true;
     case HloOpcode::kAllReduce:
-      return channel_id().has_value();
+      return channel_id().has_value() ||
+             Cast<HloAllReduceInstruction>(this)->constrain_layout();
     case HloOpcode::kCustomCall:
       return Cast<HloCustomCallInstruction>(this)
           ->custom_call_has_side_effect();
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 5855911650d..238a96e52a0 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -607,7 +607,7 @@ class HloInstruction {
   static std::unique_ptr<HloInstruction> CreateAllReduce(
       const Shape& shape, absl::Span<HloInstruction* const> operands,
       HloComputation* reduce_computation,
-      const std::vector<ReplicaGroup>& replica_groups,
+      const std::vector<ReplicaGroup>& replica_groups, bool constrain_layout,
       const absl::optional<int64>& channel_id);
 
   // An all-to-all op takes N array operands of the same shape and scatters them
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index 9448feb7d8a..a150efd8c83 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -553,10 +553,11 @@ bool HloCollectiveInstruction::IdenticalSlowPath(
 HloAllReduceInstruction::HloAllReduceInstruction(
     const Shape& shape, absl::Span<HloInstruction* const> operands,
     HloComputation* reduce_computation,
-    const std::vector<ReplicaGroup>& replica_groups,
+    const std::vector<ReplicaGroup>& replica_groups, bool constrain_layout,
     const absl::optional<int64>& channel_id)
     : HloCollectiveInstruction(HloOpcode::kAllReduce, shape, operands,
-                               replica_groups, channel_id) {
+                               replica_groups, channel_id),
+      constrain_layout_(constrain_layout) {
   AppendComputation(reduce_computation);
 }
 
@@ -569,12 +570,29 @@ bool HloAllReduceInstruction::IsNoop() const {
   return !channel_id();
 }
 
+HloInstructionProto HloAllReduceInstruction::ToProto() const {
+  HloInstructionProto proto = HloCollectiveInstruction::ToProto();
+  proto.set_constrain_layout(constrain_layout_);
+  return proto;
+}
+
+std::vector<string> HloAllReduceInstruction::ExtraAttributesToStringImpl(
+    const HloPrintOptions& options) const {
+  std::vector<string> result =
+      HloCollectiveInstruction::ExtraAttributesToStringImpl(options);
+  if (constrain_layout_) {
+    result.push_back("constrain_layout=true");
+  }
+  return result;
+}
+
 bool HloAllReduceInstruction::IdenticalSlowPath(
     const HloInstruction& other,
     const std::function<bool(const HloComputation*, const HloComputation*)>&
         eq_computations) const {
   const auto& casted_other = static_cast<const HloAllReduceInstruction&>(other);
   return HloCollectiveInstruction::IdenticalSlowPath(other, eq_computations) &&
+         constrain_layout() == casted_other.constrain_layout() &&
          eq_computations(to_apply(), casted_other.to_apply());
 }
 
@@ -583,7 +601,8 @@ HloAllReduceInstruction::CloneWithNewOperandsImpl(
     const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* /*context*/) const {
   return absl::make_unique<HloAllReduceInstruction>(
-      shape, new_operands, to_apply(), replica_groups(), channel_id());
+      shape, new_operands, to_apply(), replica_groups(), constrain_layout(),
+      channel_id());
 }
 
 HloAllToAllInstruction::HloAllToAllInstruction(
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index 8950e6218e3..1863c78e7e1 100755
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -336,13 +336,33 @@ class HloAllReduceInstruction : public HloCollectiveInstruction {
   explicit HloAllReduceInstruction(
       const Shape& shape, absl::Span<HloInstruction* const> operands,
       HloComputation* reduce_computation,
-      const std::vector<ReplicaGroup>& replica_groups,
+      const std::vector<ReplicaGroup>& replica_groups, bool constrain_layout,
       const absl::optional<int64>& channel_id);
 
   // Returns true if the AllReduce does no communication, so it's equivalent
   // to a mem copy.
   bool IsNoop() const;
 
+  // Returns true if the layout of the AllReduce is enforced by XLA client (as
+  // the layout set in the shape). The only reason for the client to set the
+  // layout is to separately compile computations that communicate with
+  // AllReduce. Since this field is only set `true` by the client, the compiler
+  // only needs to propagate existing values (e.g., Clone, X64Rewriter) or set
+  // `false` for all other cases.
+  //
+  // When this is `true`, there may be communication endpoints outside the
+  // current compilation unit, so the compiler considers this AllReduce as
+  // side-effecting to disable compiler transformations. The compiler is free to
+  // transform unconstrained AllReduces differently across compilation units.
+  // It is an error for an HloModule to have a mix of constrained and
+  // unconstrained AllReduce instructions (checked by HloVerifier).
+  bool constrain_layout() const { return constrain_layout_; }
+
+ protected:
+  std::vector<string> ExtraAttributesToStringImpl(
+      const HloPrintOptions& options) const override;
+  HloInstructionProto ToProto() const override;
+
  private:
   bool IdenticalSlowPath(
       const HloInstruction& other,
@@ -353,6 +373,8 @@ class HloAllReduceInstruction : public HloCollectiveInstruction {
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
       const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
+
+  bool constrain_layout_;
 };
 
 class HloAllToAllInstruction : public HloCollectiveInstruction {
diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index ef58b37b469..3ecd0af3480 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -857,11 +857,14 @@ bool HloParserImpl::ParseInstructionRhs(HloComputation::Builder* builder,
       optional<HloComputation*> to_apply;
       optional<std::vector<int64>> replica_group_ids;
       optional<int64> channel_id;
+      optional<bool> constrain_layout;
       attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation,
                            &to_apply};
       attrs["replica_groups"] = {/*required=*/false,
                                  AttrTy::kBracedInt64ListList, &tmp_groups};
       attrs["channel_id"] = {/*required=*/false, AttrTy::kInt64, &channel_id};
+      attrs["constrain_layout"] = {/*required=*/false, AttrTy::kBool,
+                                   &constrain_layout};
       if (!ParseOperands(&operands) || !ParseAttributes(attrs)) {
         return false;
       }
@@ -870,7 +873,8 @@ bool HloParserImpl::ParseInstructionRhs(HloComputation::Builder* builder,
         replica_groups = CreateReplicaGroups(*tmp_groups);
       }
       instruction = builder->AddInstruction(HloInstruction::CreateAllReduce(
-          shape, operands, *to_apply, replica_groups, channel_id));
+          shape, operands, *to_apply, replica_groups,
+          constrain_layout ? *constrain_layout : false, channel_id));
       break;
     }
     case HloOpcode::kAllToAll: {
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index a522b1ddbfe..29a6a5e4297 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -1472,6 +1472,24 @@ ENTRY AllReduceWithSubgroups {
   ROOT all-reduce = f32[128,32]{0,1} all-reduce(input), replica_groups={{0,1},{2,3}}, to_apply=add
 }
 
+)"
+},
+// all-reduce with constrained layout
+{
+"AllReduceWithLayout",
+R"(HloModule CRS
+
+add {
+  lhs = f32[] parameter(0)
+  rhs = f32[] parameter(1)
+  ROOT add = f32[] add(lhs, rhs)
+}
+
+ENTRY CRS {
+  input = f32[8]{0} parameter(0)
+  ROOT crs = f32[8]{0} all-reduce(input), replica_groups={}, constrain_layout=true, to_apply=add
+}
+
 )"
 },
 // all-reduce with all-reduce-id
diff --git a/tensorflow/compiler/xla/service/hlo_query.cc b/tensorflow/compiler/xla/service/hlo_query.cc
index f968a4a9445..defd6abd8f6 100644
--- a/tensorflow/compiler/xla/service/hlo_query.cc
+++ b/tensorflow/compiler/xla/service/hlo_query.cc
@@ -16,6 +16,8 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_query.h"
 
 #include "tensorflow/compiler/xla/literal.h"
+#include "tensorflow/compiler/xla/service/hlo_casting_utils.h"
+#include "tensorflow/compiler/xla/service/hlo_instructions.h"
 #include "tensorflow/compiler/xla/service/hlo_opcode.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 
@@ -119,5 +121,17 @@ bool ContainsInstrWithOpcode(const HloComputation* comp,
   return false;
 }
 
+bool ContainsLayoutConstrainedAllReduce(const HloModule& module) {
+  for (auto computation : module.computations()) {
+    for (auto hlo : computation->instructions()) {
+      if (hlo->opcode() == HloOpcode::kAllReduce &&
+          DynCast<HloAllReduceInstruction>(hlo)->constrain_layout()) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 }  // namespace hlo_query
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_query.h b/tensorflow/compiler/xla/service/hlo_query.h
index 215051f8834..0ea36ae83f8 100644
--- a/tensorflow/compiler/xla/service/hlo_query.h
+++ b/tensorflow/compiler/xla/service/hlo_query.h
@@ -19,6 +19,7 @@ limitations under the License.
 #include "absl/container/flat_hash_set.h"
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
 
 namespace xla {
 
@@ -72,6 +73,10 @@ bool MatchBinaryInstructionOperandOpcode(HloOpcode opcode,
                                          HloInstruction** matching_operand,
                                          HloInstruction** other_operand);
 
+// Returns whether the module contains all-reduce instructions with constrained
+// layout.
+bool ContainsLayoutConstrainedAllReduce(const HloModule& module);
+
 }  // namespace hlo_query
 }  // namespace xla
 
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 4d460ee30ca..1218f7dfc6f 100755
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -1310,6 +1310,29 @@ Status VerifyAsynchronousCopies(const HloModule& module) {
   return Status::OK();
 }
 
+// Checks that AllReduce instructions in the module are either all layout
+// constrained or all unconstrained.
+Status VerifyLayoutConstrainedAllReduce(const HloModule& module) {
+  const HloAllReduceInstruction* reference = nullptr;
+  for (const HloComputation* computation : module.computations()) {
+    for (const HloInstruction* instruction : computation->instructions()) {
+      if (instruction->opcode() != HloOpcode::kAllReduce) {
+        continue;
+      }
+      auto all_reduce = DynCast<HloAllReduceInstruction>(instruction);
+      if (!reference) {
+        reference = all_reduce;
+      }
+      if (reference->constrain_layout() != all_reduce->constrain_layout()) {
+        return FailedPrecondition(
+            "HloModule has a mix of layout constrained and unconstrained "
+            "AllReduce instructions.");
+      }
+    }
+  }
+  return Status::OK();
+}
+
 // Checks various invariants of send and recv instructions.
 Status VerifySendsAndRecvs(const HloModule& module) {
   absl::flat_hash_map<int64, const HloInstruction*> host_channels;
@@ -1697,6 +1720,7 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
       }));
 
   TF_RETURN_IF_ERROR(module->dynamic_parameter_binding().Verify(*module));
+  TF_RETURN_IF_ERROR(VerifyLayoutConstrainedAllReduce(*module));
 
   return false;
 }
diff --git a/tensorflow/compiler/xla/service/hlo_verifier_test.cc b/tensorflow/compiler/xla/service/hlo_verifier_test.cc
index df603102157..1b273909991 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier_test.cc
@@ -988,5 +988,30 @@ TEST_F(HloVerifierTest, FusionShapeVerifier) {
               HasSubstr("Fused computation shape"));
 }
 
+TEST_F(HloVerifierTest, AllReduceVerifier) {
+  const char* const kModuleStr = R"(
+  HloModule test
+
+  add {
+    lhs = f32[] parameter(0)
+    rhs = f32[] parameter(1)
+    ROOT add = f32[] add(lhs, rhs)
+  }
+
+  ENTRY entry {
+    input = f32[8,12]{0,1} parameter(0)
+    crs0 = f32[8,12]{0,1} all-reduce(input), replica_groups={}, to_apply=add
+    crs1 = f32[8,12]{0,1} all-reduce(input), replica_groups={}, to_apply=add,
+      constrain_layout=true
+    ROOT result = (f32[8,12]{0,1}, f32[8,12]{0,1}) tuple(crs0, crs1)
+  }
+  )";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnUnverifiedModule(kModuleStr));
+  EXPECT_THAT(
+      verifier().Run(module.get()).status().error_message(),
+      HasSubstr("mix of layout constrained and unconstrained AllReduce"));
+}
+
 }  // namespace
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index 81a42de6816..defaf4cd7ab 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -432,6 +432,12 @@ bool IsLayoutConstrainedCustomCall(HloInstruction* instruction) {
   return custom_call != nullptr && custom_call->layout_constrained();
 }
 
+bool IsLayoutConstrainedAllReduce(HloInstruction* instruction) {
+  const HloAllReduceInstruction* all_reduce =
+      DynCast<HloAllReduceInstruction>(instruction);
+  return all_reduce != nullptr && all_reduce->constrain_layout();
+}
+
 }  // namespace
 
 Status LayoutAssignment::AddMandatoryConstraints(
@@ -516,6 +522,9 @@ Status LayoutAssignment::AddMandatoryConstraints(
         TF_RETURN_IF_ERROR(
             constraints->SetBufferLayout(new_shape.layout(), *buffer));
       }
+    } else if (IsLayoutConstrainedAllReduce(instruction)) {
+      TF_RETURN_IF_ERROR(
+          constraints->SetInstructionLayout(instruction->shape(), instruction));
     } else if (instruction->IsCrossModuleAllReduce()) {
       CHECK(get_channel_constraints(instruction))
           << "Multi-module layout assignment requires ChannelLayoutConstraints";
@@ -1765,7 +1774,8 @@ Status LayoutAssignment::ClearComputationLayouts(HloComputation* computation) {
     }
     // Some instructions carry mandatory layouts in their shape.
     if (instruction->opcode() != HloOpcode::kInfeed &&
-        !IsLayoutConstrainedCustomCall(instruction)) {
+        !IsLayoutConstrainedCustomCall(instruction) &&
+        !IsLayoutConstrainedAllReduce(instruction)) {
       LayoutUtil::ClearLayout(instruction->mutable_shape());
     }
   }
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/BUILD b/tensorflow/compiler/xla/service/mlir_gpu/BUILD
index e5b6138257b..f7d0aa6b669 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/BUILD
+++ b/tensorflow/compiler/xla/service/mlir_gpu/BUILD
@@ -139,6 +139,9 @@ cc_library(
     hdrs = ["kernel_lowering.h"],
     deps = [
         "//tensorflow/compiler/mlir/xla:hlo",
+        "//tensorflow/compiler/mlir/xla:hlo_legalize_to_lhlo",
+        "//tensorflow/compiler/mlir/xla:lhlo",
+        "//tensorflow/compiler/mlir/xla:lhlo_fuse_linalg",
         "//tensorflow/compiler/mlir/xla:lhlo_legalize_to_affine",
         "//tensorflow/compiler/mlir/xla:lhlo_legalize_to_linalg",
         "//tensorflow/compiler/mlir/xla:xla_dialect_registration",
@@ -157,10 +160,12 @@ cc_library(
         "@local_config_mlir//:Linalg",
         "@local_config_mlir//:LinalgDialectRegistration",
         "@local_config_mlir//:LoopDialectRegistration",
+        "@local_config_mlir//:LoopOps",
         "@local_config_mlir//:LoopsToGPUPass",
         "@local_config_mlir//:NVVMDialect",
         "@local_config_mlir//:Pass",
         "@local_config_mlir//:StandardDialectRegistration",
+        "@local_config_mlir//:StandardOps",
         "@local_config_mlir//:Transforms",
     ],
 )
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc b/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc
index 7cbbb3ec44e..c749af3a1c3 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/kernel_lowering.cc
@@ -25,32 +25,234 @@ limitations under the License.
 #include "mlir/Dialect/GPU/Passes.h"  // TF:local_config_mlir
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"  // TF:local_config_mlir
 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"  // TF:local_config_mlir
+#include "mlir/Dialect/Linalg/IR/LinalgOps.h"  // TF:local_config_mlir
 #include "mlir/Dialect/Linalg/Passes.h"  // TF:local_config_mlir
+#include "mlir/Dialect/LoopOps/LoopOps.h"  // TF:local_config_mlir
+#include "mlir/Dialect/StandardOps/Ops.h"  // TF:local_config_mlir
 #include "mlir/IR/Attributes.h"  // TF:local_config_mlir
+#include "mlir/IR/BlockAndValueMapping.h"  // TF:local_config_mlir
+#include "mlir/IR/Builders.h"  // TF:local_config_mlir
 #include "mlir/IR/Function.h"  // TF:local_config_mlir
 #include "mlir/IR/Module.h"  // TF:local_config_mlir
 #include "mlir/IR/OperationSupport.h"  // TF:local_config_mlir
+#include "mlir/IR/PatternMatch.h"  // TF:local_config_mlir
+#include "mlir/IR/Region.h"  // TF:local_config_mlir
 #include "mlir/Pass/Pass.h"  // TF:local_config_mlir
 #include "mlir/Pass/PassManager.h"  // TF:local_config_mlir
 #include "mlir/Transforms/DialectConversion.h"  // TF:local_config_mlir
 #include "mlir/Transforms/Passes.h"  // TF:local_config_mlir
+#include "tensorflow/compiler/mlir/xla/ir/lhlo_ops.h"
 #include "tensorflow/compiler/mlir/xla/transforms/passes.h"
+#include "tensorflow/compiler/mlir/xla/transforms/rewriters.h"
 #include "tensorflow/compiler/xla/util.h"
 
 namespace xla {
 namespace mlir_gpu {
+namespace {
+
+using ::mlir::xla_lhlo::FusionOp;
+
+// Following are some small transformations that are required to clean up code
+// after lowering from linalg to loops.
+
+// A simple pass that applies lowering of HLO to LHLO only within Fusion
+// operations. This is needed, as FusionOp is not closed from above and hence
+// nested pass managers can not be applied.
+struct FusionToLhloConverter
+    : public mlir::FunctionPass<FusionToLhloConverter> {
+  void runOnFunction() override {
+    auto& ctx = getContext();
+    mlir::OwningRewritePatternList patterns;
+    mlir::ConversionTarget target(ctx);
+    target.addLegalDialect<::mlir::xla_lhlo::XlaLhloDialect>();
+    ::mlir::xla_hlo::populateHLOToLHLOConversionPattern(&ctx, &patterns);
+
+    getFunction().walk([&](FusionOp op) {
+      if (failed(applyPartialConversion(op, target, patterns, nullptr))) {
+        signalPassFailure();
+      }
+    });
+  }
+};
+
+// Replaces a FusionOp by the operations contained in its region.
+struct FusionOpRemover : public mlir::FunctionPass<FusionOpRemover> {
+  void runOnFunction() override {
+    getFunction().walk([&](FusionOp op) {
+      mlir::OpBuilder builder(op);
+      // FusionOp has a single region with a single block, so we can just walk
+      // over it and clone operations to the outside.
+      mlir::BlockAndValueMapping mapping;
+      for (auto& nested_op : op.region().front().without_terminator()) {
+        auto clone = builder.clone(nested_op, mapping);
+        for (auto pair :
+             llvm::zip(nested_op.getResults(), clone->getResults())) {
+          mapping.map(std::get<0>(pair), std::get<1>(pair));
+        }
+      }
+      op.erase();
+    });
+  }
+};
+
+// Rewrite the single-trip loops we get out of linalg into just their bodies.
+// TODO(herhut): Make this a general pattern.
+struct SingleTripLoopRemoval
+    : public mlir::FunctionPass<SingleTripLoopRemoval> {
+  void runOnFunction() override {
+    auto getConstantValue = [](mlir::Value* value) -> llvm::Optional<int64_t> {
+      auto definingOp = value->getDefiningOp();
+      if (!definingOp) return llvm::None;
+      auto constantOp = llvm::dyn_cast<mlir::ConstantOp>(definingOp);
+      if (!constantOp) return llvm::None;
+      auto integer = constantOp.getValue().dyn_cast<mlir::IntegerAttr>();
+      if (!integer) return llvm::None;
+      return integer.getInt();
+    };
+    getFunction().walk([&](mlir::loop::ForOp forOp) {
+      auto lower = getConstantValue(forOp.lowerBound());
+      auto upper = getConstantValue(forOp.upperBound());
+      auto step = getConstantValue(forOp.step());
+      if (!lower || !upper || !step) return;
+      if ((lower.getValue() < upper.getValue()) &&
+          (lower.getValue() + step.getValue() >= upper.getValue())) {
+        // This loop has a single trip, so we can move the body in front.
+        mlir::BlockAndValueMapping mapping;
+        mlir::OpBuilder b(forOp);
+        mapping.map(forOp.getInductionVar(), forOp.lowerBound());
+        for (auto& nested_op : forOp.getBody()->without_terminator()) {
+          auto clone = b.clone(nested_op, mapping);
+          for (auto pair :
+               llvm::zip(nested_op.getResults(), clone->getResults())) {
+            mapping.map(std::get<0>(pair), std::get<1>(pair));
+          }
+        }
+        forOp.erase();
+      }
+    });
+  }
+};
+
+// Simple pass that replaces a load that immediately follows a store to the
+// same address with the stored value. This needs generalization.
+struct StoreForwardingPass : mlir::FunctionPass<StoreForwardingPass> {
+  void runOnFunction() override {
+    getFunction().walk([&](mlir::LoadOp loadOp) {
+      auto block = loadOp.getOperation()->getBlock();
+      auto iterator = std::find_if(block->rbegin(), block->rend(),
+                                   [&loadOp](mlir::Operation& other) {
+                                     return &other == loadOp.getOperation();
+                                   });
+      if (++iterator == block->rend()) return;
+      mlir::StoreOp storeOp = llvm::dyn_cast<mlir::StoreOp>(&*(iterator));
+      if (!storeOp) return;
+      // Check both store to the same value.
+      if (storeOp.memref() != loadOp.memref()) return;
+      auto storeIndices = storeOp.getIndices();
+      auto loadIndices = loadOp.getIndices();
+      if (!std::equal(storeIndices.begin(), storeIndices.end(),
+                      loadIndices.begin(), loadIndices.end())) {
+        return;
+      }
+      loadOp.replaceAllUsesWith(storeOp.getValueToStore());
+      loadOp.erase();
+    });
+  };
+};
+
+// Simple pass that removes temporary buffers that are only written to but
+// never read from or that are read but the read value is not used.
+// Needs an analysis that proves that loads and stores are side-effect free
+// (in bounds, no aliasing, etc.).
+struct DeadTempBufferRemoval : mlir::FunctionPass<DeadTempBufferRemoval> {
+  bool operationConsideredDead(mlir::Operation* op) {
+    for (auto result : op->getResults()) {
+      if (!llvm::all_of(result->getUsers(), [&](mlir::Operation* op) {
+            // Store and Dealloc is OK.
+            if (llvm::isa<mlir::StoreOp>(op) ||
+                llvm::isa<mlir::DeallocOp>(op)) {
+              return true;
+            }
+            // Load without uses is also ok.
+            if (auto loadOp = llvm::dyn_cast<mlir::LoadOp>(op)) {
+              return loadOp.use_empty();
+            }
+            // Subview is ok if it is dead itself.
+            if (llvm::isa<mlir::SubViewOp>(op)) {
+              return operationConsideredDead(op);
+            }
+            return false;
+          })) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  void recursiveErase(mlir::Operation* op) {
+    for (auto result : op->getResults()) {
+      for (auto user : llvm::make_early_inc_range(result->getUsers())) {
+        recursiveErase(user);
+      }
+    }
+    op->erase();
+  }
+
+  void runOnFunction() override {
+    getFunction().walk([&](mlir::AllocOp allocOp) {
+      if (!operationConsideredDead(allocOp)) {
+        return;
+      }
+
+      // TODO(herhut): There should be a generic helper for this.
+      recursiveErase(allocOp);
+    });
+  }
+};
+
+// Neat little helper pass to dump the IR inbetween passes.
+struct DumpPass : public mlir::ModulePass<DumpPass> {
+  void runOnModule() override {
+#if DEBUG
+    getModule().dump();
+#endif
+  }
+};
+
+}  // namespace
 
 Status LowerLHLOToGPU(mlir::ModuleOp module) {
   mlir::PassManager pm(module.getContext());
 
-  // Transform element-wise operations to LinAlg.
+  // First, lower bodies of fusion operations from hlo to lhlo.
+  pm.addPass(absl::make_unique<FusionToLhloConverter>());
+  // Next, we can strip the outer fusion operation.
+  pm.addPass(absl::make_unique<FusionOpRemover>());
+  // Transform lhlo operations to LinAlg.
   pm.addPass(::mlir::xla_lhlo::createLegalizeToLinalgPass());
-  // Go from affine to normal loops.
+  // Fuse linalg operations. This will yield a single tiled loop nest where
+  // the inner loops are single trip.
+  pm.addPass(::mlir::xla_lhlo::createLhloFuseLinalg());
+  pm.addPass(absl::make_unique<DumpPass>());
+  // Go from linalg to normal loops.
   pm.addPass(::mlir::linalg::createConvertLinalgToLoopsPass());
-  // Lower affine to ordinary loops.
-  pm.addPass(::mlir::createLowerAffinePass());
-  // Move constants out of the loop.
-  pm.addPass(::mlir::createLoopInvariantCodeMotionPass());
+  pm.addPass(absl::make_unique<DumpPass>());
+  // Canonicalize the code to simplify index computations.
+  pm.addNestedPass<::mlir::FuncOp>(::mlir::createCanonicalizerPass());
+  pm.addPass(absl::make_unique<DumpPass>());
+  // The innermost loops will be single-trip.
+  pm.addPass(absl::make_unique<SingleTripLoopRemoval>());
+  pm.addPass(absl::make_unique<DumpPass>());
+  // Run CSE to ensure that loads and stores to the same subview get
+  // recognized as such.
+  pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass());
+  pm.addPass(absl::make_unique<DumpPass>());
+  // Forward stores to buffers to loads.
+  pm.addPass(absl::make_unique<StoreForwardingPass>());
+  pm.addPass(absl::make_unique<DumpPass>());
+  // Remove now unused temporary buffers.
+  pm.addPass(absl::make_unique<DeadTempBufferRemoval>());
+  pm.addPass(absl::make_unique<DumpPass>());
   // Coalesce generated loops to have 1d loops.
   pm.addPass(::mlir::createLoopCoalescingPass());
   // Transform the now 1d loops to gpu launches.
@@ -65,6 +267,7 @@ Status LowerLHLOToGPU(mlir::ModuleOp module) {
   if (failed(pm.run(module))) {
     return InternalError("Lowering to GPU kernels failed.");
   }
+
   return Status::OK();
 }
 
@@ -73,7 +276,7 @@ Status LowerKernelBodiesToNVVM(mlir::ModuleOp module) {
   ::mlir::PassManager pm(module.getContext(), /*verifyPasses=*/false);
 
   // Rewrite kernel functions to LLVM IR.
-  auto &kernelPm = pm.nest<::mlir::ModuleOp>();
+  auto& kernelPm = pm.nest<::mlir::ModuleOp>();
   kernelPm.addPass(::mlir::createLowerGpuOpsToNVVMOpsPass());
   // Some basic cleanup.
   kernelPm.addNestedPass<::mlir::FuncOp>(::mlir::createCanonicalizerPass());
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc b/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc
index b035a8ddcb5..92f7e5a08ac 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/mlir_compiler.cc
@@ -522,6 +522,10 @@ StatusOr<std::unique_ptr<Executable>> MlirCompiler::RunBackend(
 
   auto llvmModule = mlir::translateModuleToNVVMIR(*kernel_module);
 
+  if (!llvmModule) {
+    return InternalError("Translation to LLVM failed");
+  }
+
   llvmModule->setModuleIdentifier(emission_context.getHloModule()->name());
   // TODO(herhut): Why is this needed and does not come from the template?
   llvmModule->setDataLayout(gpu::nvptx::kDataLayout);
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/tests/mlir_gpu_lhlo_gen_test.cc b/tensorflow/compiler/xla/service/mlir_gpu/tests/mlir_gpu_lhlo_gen_test.cc
index 3ad958dfe6d..1d37aa1ba75 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/tests/mlir_gpu_lhlo_gen_test.cc
+++ b/tensorflow/compiler/xla/service/mlir_gpu/tests/mlir_gpu_lhlo_gen_test.cc
@@ -113,41 +113,20 @@ ENTRY %Add (x: f32[2,2], y: f32[2,2]) -> f32[2,2] {
 ;CHECK: "gpu.launch_func"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[ARG0]], %[[ARG1]], %[[ARG2]]
 ;CHECK: }
 ;CHECK: func @add_kernel(%[[ARG0]]: [[TYPE]], %[[ARG1]]: [[TYPE]], %[[ARG2]]: [[TYPE]]
-;CHECK: load %[[ARG0]][[INDEX:.*]]
-;CHECK: load %[[ARG1]][[INDEX]]
-;CHECK: store %{{.*}}, %[[ARG2]][[INDEX]]
+;CHECK-DAG: std.subview %[[ARG0]]{{\[}}[[INDEX:.*]]]
+;CHECK-DAG: std.subview %[[ARG1]]{{\[}}[[INDEX]]]
+;CHECK-DAG: std.subview %[[ARG2]]{{\[}}[[INDEX]]]
+;CHECK: %[[VAL1:.*]] = load %{{.*\[}}[[INDEX:.*]]]
+;CHECK: %[[VAL2:.*]] = load %{{.*\[}}[[INDEX]]]
+;CHECK: %[[RES:.*]] = addf %[[VAL1]], %[[VAL2]]
+;CHECK: store %[[RES]], %{{.*\[}}[[INDEX]]]
       )",
                      LoweringStage::GPU);
 }
 
-TEST_F(LhloGenTest, AddInLVVMDialect) {
-  CompileAndVerifyIr(R"(
-HloModule Add
-
-ENTRY %Add (x: f32[2,2], y: f32[2,2]) -> f32[2,2] {
-  %x = f32[2,2]{1,0} parameter(0)
-  %y = f32[2,2]{1,0} parameter(1)
-  ROOT %add = f32[2,2]{1,0} add(f32[2,2]{1,0} %x, f32[2,2]{1,0} %y)
-})",
-                     R"(
-;CHECK: func @add_kernel(%[[ARG0:.*]]: [[TYPE:!llvm<.*]], %[[ARG1:.*]]: [[TYPE]], %[[ARG2:.*]]: [[TYPE]]
-;CHECK: %[[LD0:.*]] = llvm.load %[[ARG0]] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }*">
-;CHECK: %[[LD1:.*]] = llvm.load %[[ARG1]] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }*">
-;CHECK: %[[LD2:.*]] = llvm.load %[[ARG2]] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }*">
-;CHECK: %[[PTR0:.*]] = llvm.extractvalue %[[LD0]][1]
-;CHECK: %[[GEP0:.*]] = llvm.getelementptr %[[PTR0]]
-;CHECK: %[[VAL0:.*]] = llvm.load %[[GEP0]]
-;CHECK: %[[PTR1:.*]] = llvm.extractvalue %[[LD1]][1]
-;CHECK: %[[GEP1:.*]] = llvm.getelementptr %[[PTR1]]
-;CHECK: %[[VAL1:.*]] = llvm.load %[[GEP1]]
-;CHECK: %[[VAL2:.*]] = llvm.fadd %[[VAL0]], %[[VAL1]]
-;CHECK: %[[PTR2:.*]] = llvm.extractvalue %[[LD2]][1]
-;CHECK: %[[GEP2:.*]] = llvm.getelementptr %[[PTR2]]
-;CHECK: llvm.store %[[VAL2]], %[[GEP2]]
-      )",
-                     LoweringStage::LLVM);
-}
-
+// This test verifies that the kernel signature is amended correctly. The actual
+// body of the generated function does not matter, it is already checked at the
+// GPU level above.
 TEST_F(LhloGenTest, AddAsKernel) {
   CompileAndVerifyIr(R"(
 HloModule Add
@@ -219,20 +198,6 @@ ENTRY %Add (x: f32[2,2], y: f32[2,2]) -> f32[2,2] {
 ;CHECK: llvm.store %{{.*}}, %[[GEP2ST0]]
 ;CHECK: %[[GEP2ST1:.*]] = llvm.getelementptr %[[DESC2]]
 ;CHECK: llvm.store %{{.*}}, %[[GEP2ST1]]
-
-;CHECK: %[[VL0:.*]] = llvm.load %[[DESC0]]
-;CHECK: %[[VL1:.*]] = llvm.load %[[DESC1]]
-;CHECK: %[[VL2:.*]] = llvm.load %[[DESC2]]
-;CHECK: %[[EV0:.*]] = llvm.extractvalue %[[VL0]][1]
-;CHECK: %[[VGEP0:.*]] = llvm.getelementptr %[[EV0]]
-;CHECK: %[[VAL0:.*]] = llvm.load %[[VGEP0]]
-;CHECK: %[[EV1:.*]] = llvm.extractvalue %[[VL1]][1]
-;CHECK: %[[VGEP1:.*]] = llvm.getelementptr %[[EV1]]
-;CHECK: %[[VAL1:.*]] = llvm.load %[[VGEP1]]
-;CHECK: %[[VAL2:.*]] = llvm.fadd %[[VAL0]], %[[VAL1]]
-;CHECK: %[[EV2:.*]] = llvm.extractvalue %[[VL2]][1]
-;CHECK: %[[SGEP:.*]] = llvm.getelementptr %[[EV2]]
-;CHECK: llvm.store %[[VAL2]], %[[SGEP]]
       )",
                      LoweringStage::KERNEL);
 }
@@ -262,6 +227,34 @@ ENTRY %AddMultiply (x: f32[2,2], y: f32[2,2], z: f32[2,2]) -> f32[2,2] {
       )");
 }
 
+TEST_F(LhloGenTest, AddMultiplyGPU) {
+  CompileAndVerifyIr(R"(
+HloModule AddMultiply
+
+ENTRY %AddMultiply (x: f32[2,2], y: f32[2,2], z: f32[2,2]) -> f32[2,2] {
+  %x = f32[2,2]{1,0} parameter(0)
+  %y = f32[2,2]{1,0} parameter(1)
+  %z = f32[2,2]{1,0} parameter(2)
+  %add = f32[2,2]{1,0} add(f32[2,2]{1,0} %x, f32[2,2]{1,0} %y)
+  ROOT %mul = f32[2,2]{1,0} multiply(f32[2,2]{1,0} %add, f32[2,2]{1,0} %z)
+})",
+                     R"(
+;CHECK: func @fusion_kernel(%[[ARG0:.*]]: [[TYPE:.*]], %[[ARG1:.*]]: [[TYPE]], %[[ARG2:.*]]: [[TYPE]], %[[RESULT:.*]]: [[TYPE]])
+;CHECK-DAG: std.subview %[[ARG0]]{{\[}}[[INDEX:.*]]]
+;CHECK-DAG: std.subview %[[ARG1]]{{\[}}[[INDEX]]]
+;CHECK-DAG: std.subview %[[ARG2]]{{\[}}[[INDEX]]]
+;CHECK-DAG: std.subview %[[RESULT]]{{\[}}[[INDEX]]]
+;CHECK:   %[[V0:.*]] = load %{{.*\[}}[[CSTIDX:.*]]]
+;CHECK:   %[[V1:.*]] = load %{{.*\[}}[[CSTIDX:.*]]]
+;CHECK:   %[[ADD:.*]] = addf %[[V0]], %[[V1]]
+;CHECK:   %[[V2:.*]] = load %{{.*\[}}[[CSTIDX:.*]]]
+;CHECK:   %[[MUL:.*]] = mulf %[[ADD]], %[[V2]]
+;CHECK:   store %[[MUL]], %{{.*\[}}[[CSTIDX:.*]]]
+;CHECK-NEXT: return
+      )",
+                     LoweringStage::GPU);
+}
+
 TEST_F(LhloGenTest, FusedReduce) {
   CompileAndVerifyIr(R"(
 HloModule FusedReduce
@@ -275,12 +268,14 @@ HloModule FusedReduce
 %fused_computation (param: f32[100,10]) -> f32[10] {
   %param = f32[100,10] parameter(0)
   %constant = f32[] constant(0)
-  ROOT %reduce = f32[10]{0} reduce(f32[100,10]{1,0} %param, f32[] %constant), dimensions={0}, to_apply=%add
+  ROOT %reduce = f32[10]{0} reduce(f32[100,10]{1,0} %param, f32[] %constant),
+      dimensions={0}, to_apply=%add
 }
 
 ENTRY %FusedReduce (x: f32[100,10]) -> f32[10] {
   %x = f32[100,10] parameter(0)
-  ROOT %fusion = f32[10]{0} fusion(f32[100,10]{1,0} %x), kind=kInput, calls=%fused_computation
+  ROOT %fusion = f32[10]{0} fusion(f32[100,10]{1,0} %x), kind=kInput,
+      calls=%fused_computation
 }
 )",
                      R"(
@@ -316,21 +311,20 @@ ENTRY %Broadcast (x: f32[10]) -> f32[10, 5] {
 )");
 }
 
-// TODO(pifon): Re-enable when Iota can be lowered all the way to GPU.
-// TEST_F(LhloGenTest, Iota) {
-//  CompileAndVerifyIr(R"(
-//  HloModule Iota
-//
-//  ENTRY %Iota() -> s64[10, 5] {
-//   ROOT %iota = s64[10, 5]{1,0} iota(), iota_dimension=0
-// })",
-//                      R"(
-// ;CHECK: func @iota(%[[OUT:.*]]: [[OUT_T:.*]]) {
-// ;CHECK:   "xla_lhlo.iota"(%[[OUT]])
-// ;CHECK:   {iota_dimension = 0 : i64} : ([[OUT_T]]) -> ()
-// ;CHECK: }
-// )");
-// }
+TEST_F(LhloGenTest, Iota) {
+  CompileAndVerifyIr(R"(
+ HloModule Iota
+
+ ENTRY %Iota() -> s64[10, 5] {
+  ROOT %iota = s64[10, 5]{1,0} iota(), iota_dimension=0
+})",
+                     R"(
+;CHECK: func @iota(%[[OUT:.*]]: [[OUT_T:.*]]) {
+;CHECK:   "xla_lhlo.iota"(%[[OUT]])
+;CHECK:   {iota_dimension = 0 : i64} : ([[OUT_T]]) -> ()
+;CHECK: }
+)");
+}
 
 TEST_F(LhloGenTest, AddReduce) {
   CompileAndVerifyIr(R"(
diff --git a/tensorflow/compiler/xla/service/multi_output_fusion.cc b/tensorflow/compiler/xla/service/multi_output_fusion.cc
index 74f2c95102a..07b6fb5bf85 100644
--- a/tensorflow/compiler/xla/service/multi_output_fusion.cc
+++ b/tensorflow/compiler/xla/service/multi_output_fusion.cc
@@ -108,6 +108,11 @@ StatusOr<bool> MultiOutputFusion::Run(HloModule* module) {
       changed = true;
     }
   }
+  // Clean up state in case this pass is wrapped in an HloPassPipeline.
+  candidates_.clear();
+  candidates_index_.clear();
+  all_fusion_candidates_.clear();
+  reachability_.reset();
   return changed;
 }
 
diff --git a/tensorflow/compiler/xla/tests/llvm_irgen_test_base.cc b/tensorflow/compiler/xla/tests/llvm_irgen_test_base.cc
index 8b95c17d199..c2dc9125479 100644
--- a/tensorflow/compiler/xla/tests/llvm_irgen_test_base.cc
+++ b/tensorflow/compiler/xla/tests/llvm_irgen_test_base.cc
@@ -98,9 +98,9 @@ void LlvmIrGenTestBase::MatchOptimizedHlo(absl::string_view hlo,
 
 StatusOr<std::unique_ptr<HloModule>> LlvmIrGenTestBase::GetOptimizedModule(
     absl::string_view hlo) {
-  HloModuleConfig config;
-  TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModule> module,
-                      ParseAndReturnVerifiedModule(hlo, config));
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<HloModule> module,
+      ParseAndReturnVerifiedModule(hlo, GetModuleConfigForTest()));
   return backend().compiler()->RunHloPasses(
       std::move(module), backend().default_stream_executor(),
       backend().default_stream_executor()->GetAllocator());
diff --git a/tensorflow/compiler/xla/tools/BUILD b/tensorflow/compiler/xla/tools/BUILD
index da20d28ea81..8e6e9b46100 100644
--- a/tensorflow/compiler/xla/tools/BUILD
+++ b/tensorflow/compiler/xla/tools/BUILD
@@ -252,3 +252,114 @@ sh_test(
     srcs = ["interactive_graphviz_test.sh"],
     data = [":interactive_graphviz"],
 )
+
+cc_library(
+    name = "hlo_module_loader",
+    srcs = ["hlo_module_loader.cc"],
+    hdrs = ["hlo_module_loader.h"],
+    deps = [
+        "//tensorflow/compiler/xla:debug_options_flags",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_parser",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:regexp_internal",
+        "@com_google_absl//absl/strings",
+        "@com_google_protobuf//:protobuf_headers",
+    ],
+)
+
+tf_cc_test(
+    name = "hlo_module_loader_test",
+    srcs = ["hlo_module_loader_test.cc"],
+    deps = [
+        ":hlo_module_loader",
+        "//tensorflow/compiler/xla/tests:hlo_test_base",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",  # fixdeps: keep
+        "//tensorflow/core:test",
+    ],
+)
+
+cc_library(
+    name = "prepare_reference_module",
+    srcs = ["prepare_reference_module.cc"],
+    hdrs = ["prepare_reference_module.h"],
+    deps = [
+        "//tensorflow/compiler/xla:debug_options_flags",
+        "//tensorflow/compiler/xla:statusor",
+        "//tensorflow/compiler/xla:xla_proto_cc",
+        "//tensorflow/compiler/xla/service:despecializer",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_module_config",
+        "//tensorflow/core/platform:errors",
+        "//tensorflow/stream_executor:platform",
+        "//tensorflow/stream_executor/lib",
+    ],
+)
+
+cc_library(
+    name = "run_hlo_module_lib",
+    testonly = True,
+    srcs = ["run_hlo_module.cc"],
+    hdrs = ["run_hlo_module.h"],
+    deps = [
+        ":hlo_module_loader",
+        ":prepare_reference_module",
+        "//tensorflow/compiler/xla:debug_options_flags",
+        "//tensorflow/compiler/xla:error_spec",
+        "//tensorflow/compiler/xla:util",
+        "//tensorflow/compiler/xla/client/lib:testing",
+        "//tensorflow/compiler/xla/service:hlo",
+        "//tensorflow/compiler/xla/service:hlo_runner",
+        "//tensorflow/compiler/xla/service:hlo_verifier",
+        "//tensorflow/compiler/xla/service:platform_util",
+        "//tensorflow/compiler/xla/tests:literal_test_util",
+        "//tensorflow/compiler/xla/tests:test_utils",
+        "//tensorflow/core/platform:logging",
+        "//tensorflow/core/platform:status",
+        "//tensorflow/core/platform:test",
+        "//tensorflow/stream_executor:platform",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:span",
+    ],
+)
+
+tf_cc_binary(
+    name = "run_hlo_module",
+    testonly = True,
+    srcs = ["run_hlo_module_main.cc"],
+    deps = [
+        ":run_hlo_module_lib",
+        "//tensorflow/compiler/xla:debug_options_flags",
+        "//tensorflow/compiler/xla/service:cpu_plugin",
+        "//tensorflow/compiler/xla/service:gpu_plugin",
+        "//tensorflow/compiler/xla/service:interpreter_plugin",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core/platform:logging",
+        "//tensorflow/core/platform:platform_port",
+        "//tensorflow/core/platform:status",
+        "//tensorflow/core/platform:test",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+# Same as run_hlo_module, but supports the MLIR GPU backend instead of the XLA
+# GPU backend.
+tf_cc_binary(
+    name = "run_hlo_module_mlir_gpu",
+    testonly = True,
+    srcs = ["run_hlo_module_main.cc"],
+    deps = [
+        ":run_hlo_module_lib",
+        "//tensorflow/compiler/xla:debug_options_flags",
+        "//tensorflow/compiler/xla/service:cpu_plugin",
+        "//tensorflow/compiler/xla/service:interpreter_plugin",
+        "//tensorflow/compiler/xla/service:mlir_gpu_plugin",
+        "//tensorflow/core:framework_internal",
+        "//tensorflow/core/platform:logging",
+        "//tensorflow/core/platform:platform_port",
+        "//tensorflow/core/platform:status",
+        "//tensorflow/core/platform:test",
+        "@com_google_absl//absl/strings",
+    ],
+)
diff --git a/tensorflow/compiler/xla/tools/hlo_module_loader.cc b/tensorflow/compiler/xla/tools/hlo_module_loader.cc
new file mode 100644
index 00000000000..8eb170b25e5
--- /dev/null
+++ b/tensorflow/compiler/xla/tools/hlo_module_loader.cc
@@ -0,0 +1,125 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Emits an HLO module in a text form suitable for diffing.
+
+#include "tensorflow/compiler/xla/tools/hlo_module_loader.h"
+
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "google/protobuf/text_format.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/str_split.h"
+#include "tensorflow/compiler/xla/debug_options_flags.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_parser.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/regexp.h"
+
+namespace xla {
+namespace {
+
+Status OverrideConfig(const hlo_module_loader_details::Config& ovr_config,
+                      HloModuleConfig* config) {
+  config->set_replica_count(ovr_config.num_replicas);
+  return Status::OK();
+}
+
+}  // namespace
+
+string StripLogHeaders(const string& hlo_string) {
+  // I0521 12:04:45.883483    1509 service.cc:186] ...
+  static RE2* matcher = new RE2(
+      "[IWEF]\\d{4} "
+      "\\d{2}:\\d{2}:\\d{2}\\.\\d+\\s+\\d+\\s+[^:]+:\\d+\\]\\s?(.*)");
+  absl::string_view matches[4];
+  std::vector<string> lines = absl::StrSplit(hlo_string, '\n');
+  for (auto& line : lines) {
+    if (matcher->Match(line, 0, line.size(), RE2::ANCHOR_START, matches, 4)) {
+      line = string(matches[1]);
+    }
+  }
+  return absl::StrJoin(lines, "\n", [](string* out, const string& line) {
+    absl::StrAppend(out, line);
+  });
+}
+
+StatusOr<std::unique_ptr<HloModule>> LoadModuleFromData(
+    const string& data, const string& format,
+    hlo_module_loader_details::Config ovr_config,
+    const std::function<void(HloModuleConfig*)>& config_modifier_hook) {
+  DebugOptions debug_options = GetDebugOptionsFromFlags();
+  std::unique_ptr<HloModule> module;
+  if (format == "hlo" || format == "txt") {
+    string hlo_string = StripLogHeaders(data);
+    HloModuleConfig config;
+    config.set_debug_options(debug_options);
+    TF_RETURN_IF_ERROR(OverrideConfig(ovr_config, &config));
+    if (config_modifier_hook) {
+      config_modifier_hook(&config);
+    }
+    TF_ASSIGN_OR_RETURN(module,
+                        ParseAndReturnUnverifiedModule(hlo_string, config));
+  } else {
+    HloSnapshot proto;
+    if (format == "pb") {
+      if (!proto.ParseFromString(data) &&
+          !proto.mutable_hlo()->ParseFromString(data)) {
+        return InvalidArgument("Failed to parse input as HLO protobuf binary");
+      }
+    } else if (format == "pbtxt") {
+      if (!proto2::TextFormat::ParseFromString(data, &proto) &&
+          !proto2::TextFormat::ParseFromString(data, proto.mutable_hlo())) {
+        return InvalidArgument("Failed to parse input as HLO protobuf text");
+      }
+    } else {
+      return InvalidArgument(
+          "Invalid format from file extension: '%s'. Expected: hlo, txt, pb, "
+          "or pbtxt",
+          format);
+    }
+    TF_ASSIGN_OR_RETURN(HloModuleConfig config,
+                        HloModule::CreateModuleConfigFromProto(
+                            proto.hlo().hlo_module(), debug_options));
+    TF_RETURN_IF_ERROR(OverrideConfig(ovr_config, &config));
+    if (config_modifier_hook) {
+      config_modifier_hook(&config);
+    }
+    TF_ASSIGN_OR_RETURN(
+        module, HloModule::CreateFromProto(proto.hlo().hlo_module(), config));
+  }
+  return std::move(module);
+}
+
+StatusOr<std::unique_ptr<HloModule>> LoadModuleFromFile(
+    const string& path, hlo_module_loader_details::Config ovr_config,
+    string format,
+    const std::function<void(HloModuleConfig*)>& config_modifier_hook) {
+  string data;
+  if (format.empty()) {
+    format = string(tensorflow::io::Extension(path));
+  }
+  TF_RETURN_IF_ERROR(
+      tensorflow::ReadFileToString(tensorflow::Env::Default(), path, &data));
+  return LoadModuleFromData(data, format, ovr_config, config_modifier_hook);
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/tools/hlo_module_loader.h b/tensorflow/compiler/xla/tools/hlo_module_loader.h
new file mode 100644
index 00000000000..8e174cef08f
--- /dev/null
+++ b/tensorflow/compiler/xla/tools/hlo_module_loader.h
@@ -0,0 +1,79 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_TOOLS_HLO_MODULE_LOADER_H_
+#define TENSORFLOW_COMPILER_XLA_TOOLS_HLO_MODULE_LOADER_H_
+
+#include <memory>
+#include <string>
+
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/statusor.h"
+
+namespace xla {
+namespace hlo_module_loader_details {
+
+struct Config {
+  Config() {}
+  int64 num_replicas = 1;
+};
+
+}  // namespace hlo_module_loader_details
+
+// Given a string composed by multiple lines, strip the log headers, if present
+// at the beginning of each line.
+string StripLogHeaders(const string& hlo_string);
+
+// Loads an HLO module from a string.
+// The data can have the followings formats:
+// 1) A binary of text proto file, the proto should be in xla.HloProto type. It
+//    can be a binary proto (format must be "pb"), or a text proto (format must
+//    be "pbtxt").
+// 2) A hlo text dump, the string should be in HloModule::ToString() format
+//    (format must be "txt" or "hlo"). The input data can also contain log
+//    headers, which will be stripped.
+// The ovr_config data can be used to override certain fields of the
+// HloModuleConfig.
+// The HloModuleConfig is passed to config_modifier_hook for custom
+// modifications before use.
+StatusOr<std::unique_ptr<HloModule>> LoadModuleFromData(
+    const string& data, const string& format,
+    hlo_module_loader_details::Config ovr_config =
+        hlo_module_loader_details::Config(),
+    const std::function<void(HloModuleConfig*)>& config_modifier_hook = {});
+
+// Loads an HLO module from file.
+// The file can be one of the followings:
+// 1) A binary of text proto file, the proto should be in xla.HloProto type. It
+//    can be a binary proto (with .pb extension), or a text proto (with a .pbtxt
+//    extension).
+// 2) A hlo text dump, the string should be in HloModule::ToString() format
+//    (with a .hlo or .txt extension). A text file can also contain log headers,
+//    which will be stripped.
+// If the format is specified (not empty), it overrides the one guessed from the
+// file extension. The ovr_config data can be used to override certain fields of
+// the HloModuleConfig.
+// The HloModuleConfig is passed to config_modifier_hook for custom
+// modifications before use.
+StatusOr<std::unique_ptr<HloModule>> LoadModuleFromFile(
+    const string& path,
+    hlo_module_loader_details::Config ovr_config =
+        hlo_module_loader_details::Config(),
+    string format = "",
+    const std::function<void(HloModuleConfig*)>& config_modifier_hook = {});
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_TOOLS_HLO_MODULE_LOADER_H_
diff --git a/tensorflow/compiler/xla/tools/hlo_module_loader_test.cc b/tensorflow/compiler/xla/tools/hlo_module_loader_test.cc
new file mode 100644
index 00000000000..e88d03e6b33
--- /dev/null
+++ b/tensorflow/compiler/xla/tools/hlo_module_loader_test.cc
@@ -0,0 +1,48 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/tools/hlo_module_loader.h"
+
+#include "tensorflow/compiler/xla/tests/hlo_test_base.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace xla {
+namespace {
+
+class HloModuleLoaderTest : public HloTestBase {};
+
+TEST_F(HloModuleLoaderTest, StripsLogHeaders) {
+  const string& hlo_string = R"(
+I0521 12:04:45.883483    1509 service.cc:186] HloModule test_log_stripping
+I0521 12:04:45.883483    1509 service.cc:186]
+I0521 12:04:45.883483    1509 service.cc:186] ENTRY entry {
+I0521 12:04:45.883483    1509 service.cc:186]   p0 = f32[4]{0} parameter(0)
+I0521 12:04:45.883483    1509 service.cc:186]   p1 = f32[4]{0} parameter(1)
+I0521 12:04:45.883483    1509 service.cc:186]   add = f32[4]{0} add(p0, p1)
+I0521 12:04:45.883483    1509 service.cc:186]   ROOT rooty = (f32[4]{0}, f32[4]{0}) tuple(p1, add)
+I0521 12:04:45.883483    1509 service.cc:186] }
+)";
+
+  TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> hlo_module,
+                          LoadModuleFromData(hlo_string, "txt"));
+  EXPECT_NE(FindInstruction(hlo_module.get(), "p0"), nullptr);
+  EXPECT_NE(FindInstruction(hlo_module.get(), "p1"), nullptr);
+  EXPECT_NE(FindInstruction(hlo_module.get(), "add"), nullptr);
+  EXPECT_NE(FindInstruction(hlo_module.get(), "rooty"), nullptr);
+}
+
+}  // namespace
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/tools/prepare_reference_module.cc b/tensorflow/compiler/xla/tools/prepare_reference_module.cc
new file mode 100644
index 00000000000..65489c2d5db
--- /dev/null
+++ b/tensorflow/compiler/xla/tools/prepare_reference_module.cc
@@ -0,0 +1,61 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/tools/prepare_reference_module.h"
+
+#include <functional>
+#include <memory>
+
+#include "tensorflow/compiler/xla/debug_options_flags.h"
+#include "tensorflow/compiler/xla/service/despecializer.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_module_config.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/compiler/xla/xla.pb.h"
+#include "tensorflow/core/platform/errors.h"
+#include "tensorflow/stream_executor/lib/status.h"
+#include "tensorflow/stream_executor/platform.h"
+
+namespace xla {
+
+StatusOr<std::unique_ptr<HloModule>> PrepareReferenceModule(
+    const HloModule& test_module,
+    const ::stream_executor::Platform::Id& test_platform_id,
+    const std::function<void(HloModuleConfig*)>& config_modifier_hook,
+    const std::function<Status(const HloModule&,
+                               const ::stream_executor::Platform::Id&,
+                               HloModule*)>& module_modifier_hook) {
+  DebugOptions debug_options = GetDebugOptionsFromFlags();
+  // The combination of fast math and optimizations leads to unsound code
+  // transformations (see third_party/tensorflow/compiler/xla/xla.proto for
+  // details).  The test platform should not change this from the default.
+  debug_options.set_xla_cpu_enable_fast_math(false);
+  debug_options.set_xla_gpu_enable_fast_min_max(false);
+  HloModuleConfig reference_config = test_module.config();
+  reference_config.set_debug_options(debug_options);
+  if (config_modifier_hook) {
+    config_modifier_hook(&reference_config);
+  }
+  std::unique_ptr<HloModule> reference_module =
+      test_module.Clone(reference_config, "reference");
+  if (module_modifier_hook) {
+    TF_RETURN_IF_ERROR(module_modifier_hook(test_module, test_platform_id,
+                                            reference_module.get()));
+  } else {
+    TF_RETURN_IF_ERROR(Despecializer().Run(reference_module.get()).status());
+  }
+  return std::move(reference_module);
+}
+};  // namespace xla
diff --git a/tensorflow/compiler/xla/tools/prepare_reference_module.h b/tensorflow/compiler/xla/tools/prepare_reference_module.h
new file mode 100644
index 00000000000..f98e50fc1e8
--- /dev/null
+++ b/tensorflow/compiler/xla/tools/prepare_reference_module.h
@@ -0,0 +1,45 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_TOOLS_PREPARE_REFERENCE_MODULE_H_
+#define TENSORFLOW_COMPILER_XLA_TOOLS_PREPARE_REFERENCE_MODULE_H_
+
+#include <functional>
+#include <memory>
+
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_module_config.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/stream_executor/lib/status.h"
+#include "tensorflow/stream_executor/platform.h"
+
+namespace xla {
+
+// A helper function that takes a HloModule, derives a HloModuleConfig from it
+// which disables fast-math und sets the DebugOptions from flags, then runs the
+// deoptimization pipeline (or calls 'module_modifier_hook' if provided). This
+// is meant to produce a reference module that is comparable to our custom test
+// platforms.
+StatusOr<std::unique_ptr<HloModule>> PrepareReferenceModule(
+    const HloModule& test_module,
+    const ::stream_executor::Platform::Id& test_platform_id,
+    const std::function<void(HloModuleConfig*)>& config_modifier_hook = {},
+    const std::function<Status(const HloModule&,
+                               const ::stream_executor::Platform::Id&,
+                               HloModule*)>& module_modifier_hook = {});
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_TOOLS_PREPARE_REFERENCE_MODULE_H_
diff --git a/tensorflow/compiler/xla/tools/replay_computation.cc b/tensorflow/compiler/xla/tools/replay_computation.cc
index 67a2c26201a..095655085e5 100644
--- a/tensorflow/compiler/xla/tools/replay_computation.cc
+++ b/tensorflow/compiler/xla/tools/replay_computation.cc
@@ -346,10 +346,10 @@ StatusOr<std::vector<HloSnapshot>> ParseRecordIoFile(absl::string_view filename,
 
   std::vector<HloSnapshot> snapshots;
   uint64 offset = 0;
-  string record;
+  tensorflow::tstring record;
   while (reader.ReadRecord(&offset, &record).ok()) {
     HloSnapshot snapshot;
-    if (snapshot.mutable_hlo()->ParseFromString(record)) {
+    if (snapshot.mutable_hlo()->ParseFromStringPiece(record)) {
       snapshots.push_back(std::move(snapshot));
     } else {
       LOG(ERROR) << "Encountered bad proto";
diff --git a/tensorflow/compiler/xla/tools/run_hlo_module.cc b/tensorflow/compiler/xla/tools/run_hlo_module.cc
new file mode 100644
index 00000000000..39b545af393
--- /dev/null
+++ b/tensorflow/compiler/xla/tools/run_hlo_module.cc
@@ -0,0 +1,145 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/tools/run_hlo_module.h"
+
+#include <functional>
+#include <iostream>
+#include <memory>
+#include <random>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "absl/types/span.h"
+#include "tensorflow/compiler/xla/client/lib/testing.h"
+#include "tensorflow/compiler/xla/debug_options_flags.h"
+#include "tensorflow/compiler/xla/error_spec.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/hlo_runner.h"
+#include "tensorflow/compiler/xla/service/hlo_verifier.h"
+#include "tensorflow/compiler/xla/service/platform_util.h"
+#include "tensorflow/compiler/xla/tests/literal_test_util.h"
+#include "tensorflow/compiler/xla/tests/test_utils.h"
+#include "tensorflow/compiler/xla/tools/hlo_module_loader.h"
+#include "tensorflow/compiler/xla/tools/prepare_reference_module.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/status.h"
+#include "tensorflow/core/platform/test.h"
+
+namespace se = ::stream_executor;
+
+namespace xla {
+namespace {
+
+Literal ExecuteOnPlatform(std::unique_ptr<HloModule> module,
+                          absl::Span<const Literal> args,
+                          se::Platform* platform, bool run_hlo_passes) {
+  HloRunner runner(platform);
+
+  TF_QCHECK_OK(VerifyHloModule(module.get(), /*layout_sensitive=*/false,
+                               /*allow_mixed_precision=*/true))
+      << " (on " << platform->Name() << ")";
+
+  std::cerr << "Running HLO module on platform " << platform->Name() << "...\n";
+  XLA_VLOG_LINES(1, module->ToString());
+  const auto start = std::chrono::high_resolution_clock::now();
+  auto result_status = runner.Execute(std::move(module), args, run_hlo_passes);
+  const auto end = std::chrono::high_resolution_clock::now();
+  std::chrono::duration<double> diff = end - start;
+  std::cerr << "... compiled and ran in " << diff.count() << "s.\n";
+
+  TF_QCHECK_OK(result_status.status())
+      << "Failed to execute on " << platform->Name() << "\n";
+
+  return result_status.ConsumeValueOrDie();
+}
+}  // namespace
+
+::testing::AssertionResult RunAndCompare(
+    const std::string& hlo_filename, const std::string& test_platform_name,
+    const std::string& reference_platform_name, std::minstd_rand0* engine,
+    const RunHloModuleOptions& options,
+    std::function<Status(const HloModule&,
+                         const ::stream_executor::Platform::Id&, HloModule*)>
+        reference_module_modifier_hook) {
+  se::Platform* test_platform =
+      xla::PlatformUtil::GetPlatform(test_platform_name).ValueOrDie();
+  se::Platform* reference_platform =
+      reference_platform_name.empty()
+          ? nullptr
+          : xla::PlatformUtil::GetPlatform(reference_platform_name)
+                .ValueOrDie();
+  auto config_modifier = [](HloModuleConfig* config) { config->set_seed(42); };
+
+  std::unique_ptr<HloModule> test_module =
+      LoadModuleFromFile(hlo_filename, hlo_module_loader_details::Config(),
+                         options.input_format, config_modifier)
+          .ValueOrDie();
+  const HloModuleProto test_module_proto = test_module->ToProto();
+
+  std::vector<Literal> args = MakeFakeArguments(test_module.get(), engine,
+                                                options.use_large_float_range)
+                                  .ConsumeValueOrDie();
+
+  if (options.print_literals) {
+    for (int i = 0; i < args.size(); ++i) {
+      std::cout << "\n** Argument " << i << " **\n"
+                << args[i].ToString() << "\n";
+    }
+  }
+
+  std::unique_ptr<HloModule> reference_module;
+  if (reference_platform != nullptr) {
+    // PrepareReferenceModule needs to know the *test* platform, in order to
+    // properly match the test platform's numerics.
+    reference_module =
+        PrepareReferenceModule(*test_module, test_platform->id(),
+                               config_modifier, reference_module_modifier_hook)
+            .ConsumeValueOrDie();
+  }
+
+  Literal test_result = ExecuteOnPlatform(
+      std::move(test_module), args, test_platform, options.run_test_hlo_passes);
+  if (options.print_literals) {
+    std::cout << "\n** Result on test platform " << test_platform->Name()
+              << " **\n"
+              << test_result.ToString() << "\n";
+  }
+
+  if (reference_module == nullptr) {
+    std::cerr << "Skipping reference platform\n";
+    return ::testing::AssertionSuccess();
+  }
+
+  Literal reference_result =
+      ExecuteOnPlatform(std::move(reference_module), args, reference_platform,
+                        options.run_reference_hlo_passes);
+
+  if (options.print_literals) {
+    std::cout << "\n** Result on reference platform "
+              << reference_platform->Name() << " **\n"
+              << reference_result.ToString() << "\n";
+  }
+  ErrorSpec error_spec(static_cast<float>(options.abs_error_bound),
+                       static_cast<float>(options.rel_error_bound));
+  return LiteralTestUtil::Near(/*expected=*/reference_result,
+                               /*actual=*/test_result,
+                               /*error_spec=*/error_spec,
+                               /*detailed_message=*/true);
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/tools/run_hlo_module.h b/tensorflow/compiler/xla/tools/run_hlo_module.h
new file mode 100644
index 00000000000..932cc22f4dd
--- /dev/null
+++ b/tensorflow/compiler/xla/tools/run_hlo_module.h
@@ -0,0 +1,76 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_TOOLS_RUN_HLO_MODULE_H_
+#define TENSORFLOW_COMPILER_XLA_TOOLS_RUN_HLO_MODULE_H_
+
+#include <functional>
+#include <random>
+#include <string>
+
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/core/platform/status.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/stream_executor/platform.h"
+
+namespace xla {
+
+// Command-line options to this tool.  See main() in run_hlo_module_main.cc for
+// descriptions of these fields.
+struct RunHloModuleOptions {
+  RunHloModuleOptions()
+      : platform(""),
+        reference_platform("default"),
+        print_literals(false),
+        run_test_hlo_passes(true),
+        run_reference_hlo_passes(true),
+        use_large_float_range(true),
+        // TODO(b/68721786): These tolerances are set to match the values in the
+        // isolation test. The goal is to lower these to 0.001.
+        abs_error_bound(0.1),
+        rel_error_bound(0.1),
+        input_format("hlo"),
+        input_module(""),
+        iterations(1) {}
+  std::string platform;
+  std::string reference_platform;
+  bool print_literals;
+  bool run_test_hlo_passes;
+  bool run_reference_hlo_passes;
+  bool use_large_float_range;
+  float abs_error_bound;
+  float rel_error_bound;
+  std::string input_format;
+  std::string input_module;
+  int iterations;
+};
+
+// Reads a HloModule from 'hlo_filename', runs it on the platform with the name
+// 'test_platform_name', and if 'reference_platform_name' is non-empty, it also
+// runs it on the platform with the name 'reference_platform_name' and compares
+// the results. 'reference_module_modifier_hook' can be used to transform the
+// HloModule before it is run on the reference platform. This may be necessary
+// to match the numerics of the test platform.
+::testing::AssertionResult RunAndCompare(
+    const std::string& hlo_filename, const std::string& test_platform_name,
+    const std::string& reference_platform_name, std::minstd_rand0* engine,
+    const RunHloModuleOptions& options,
+    std::function<Status(const HloModule&,
+                         const ::stream_executor::Platform::Id&, HloModule*)>
+        reference_module_modifier_hook = {});
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_TOOLS_RUN_HLO_MODULE_H_
diff --git a/tensorflow/compiler/xla/tools/run_hlo_module_main.cc b/tensorflow/compiler/xla/tools/run_hlo_module_main.cc
new file mode 100644
index 00000000000..7079f413eeb
--- /dev/null
+++ b/tensorflow/compiler/xla/tools/run_hlo_module_main.cc
@@ -0,0 +1,184 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// A tool for reading a HloModule from a HloProto file and execute the module on
+// given platform(s). See kUsage for details.
+
+#include <iostream>
+#include <random>
+#include <string>
+#include <vector>
+
+#include "absl/strings/str_cat.h"
+#include "tensorflow/compiler/xla/debug_options_flags.h"
+#include "tensorflow/compiler/xla/tools/run_hlo_module.h"
+#include "tensorflow/core/platform/init_main.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/status.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/util/command_line_flags.h"
+
+namespace {
+const char* const kUsage = R"(
+This tool lets you read a HloModule from a file and execute the module on given
+platform.
+
+The file can be one of the followings:
+1) a binary or text proto file, the proto should be in xla.HloProto type.
+2) a hlo text dump, the string should be in HloModule::ToString() format.
+
+By default, the module is run on a reference platform such as the interpreter
+and the reference result is compared against the test result.
+
+You can also pass in debug option flags for the HloModule.
+
+Usage:
+
+  bazel run run_hlo_module -- \
+    --input_format=[hlo|pb|pbtxt]               \
+    --platform=[CPU|CUDA|Interpreter] \
+    path/to/hlo_module
+)";
+const char kInterpreterPlatformName[] = "Interpreter";
+
+// Returns the name of the test platform.
+std::string GetTestPlatformName(std::string name) {
+  QCHECK(!name.empty()) << "Must pass --platform flag.";
+  return name;
+}
+
+// Returns the name of the reference platform
+std::string GetReferencePlatformName(std::string reference_platform) {
+  if (reference_platform == "default") {
+    return kInterpreterPlatformName;
+  }
+  return reference_platform;
+}
+}  // namespace
+
+int main(int argc, char** argv) {
+  xla::RunHloModuleOptions opts;
+  std::vector<tensorflow::Flag> flag_list = {
+      tensorflow::Flag(
+          "platform", &opts.platform,
+          "The test platform that the HLO module will be executed on "
+          "(gpu, cpu, etc)."),
+      tensorflow::Flag(
+          "reference_platform", &opts.reference_platform,
+          "The reference platform that HLO module will be "
+          "executed on. The result produced on the reference platform will "
+          "be compared against the result produced on the test platform. A "
+          "value of 'default' will use the TPU_Interpreter as a reference if "
+          "the test platform is a TPU, and 'interpreter' otherwise. If the "
+          "flag value is the empty string, then the module will not be run "
+          "on a reference platform at all."),
+      tensorflow::Flag("print_literals", &opts.print_literals,
+                       "Print the input and result literals to stdout."),
+      tensorflow::Flag(
+          "run_test_hlo_passes", &opts.run_test_hlo_passes,
+          "Run HLO pass pipeline for the test platform on the HLO module "
+          "before running the module on the test platform. This should be "
+          "set to true if the HLO module is unoptimized and set to false if "
+          "the HLO module already has been optimized."),
+      tensorflow::Flag(
+          "run_reference_hlo_passes", &opts.run_reference_hlo_passes,
+          "Run HLO pass pipeline for the reference platform on the HLO module "
+          "before running the module on the reference platform. "
+          "In general, if the given HLO module was optimized for a platform "
+          "other "
+          "than the reference this is necessary because some HLO passes are "
+          "legalization passes which must be run prior to code generation."),
+
+      tensorflow::Flag(
+          "use_large_float_range", &opts.use_large_float_range,
+          "Generate floating point values using a large uniform-log "
+          "distribtion as opposed to a small uniform distribution."),
+      tensorflow::Flag(
+          "abs_error_bound", &opts.abs_error_bound,
+          "The absolute error bound used when comparing the test and "
+          "reference results."),
+      tensorflow::Flag(
+          "rel_error_bound", &opts.rel_error_bound,
+          "The relative error bound used when comparing the test and "
+          "reference results."),
+      tensorflow::Flag("input_format", &opts.input_format,
+                       "The format of the input file. Valid values:\n"
+                       "  hlo : HLO textual format\n"
+                       "  pb : xla::HloProto in binary proto format\n"
+                       "  pbtxt : xla::HloProto in text proto format"),
+      tensorflow::Flag(
+          "input_module", &opts.input_module,
+          "A path to a file containing the HLO module. Can also pass "
+          "a this as argv[1], but this flag is more explicit."),
+      tensorflow::Flag(
+          "iterations", &opts.iterations,
+          "The number of times to run the module. Each iteration will be run "
+          "with different input data.")};
+  xla::AppendDebugOptionsFlags(&flag_list);
+  // The usage string includes the message at the top of the file, the
+  // DebugOptions flags and the flags defined above.
+  const std::string kUsageString = absl::StrCat(
+      kUsage, "\n\n", tensorflow::Flags::Usage(argv[0], flag_list));
+  bool parse_ok = tensorflow::Flags::Parse(&argc, argv, flag_list);
+  tensorflow::port::InitMain(kUsageString.c_str(), &argc, &argv);
+  if (!parse_ok) {
+    LOG(QFATAL) << kUsageString;
+  }
+
+  const std::string test_platform_name = GetTestPlatformName(opts.platform);
+  const std::string reference_platform_name =
+      GetReferencePlatformName(opts.reference_platform);
+
+  std::string hlo_filename;
+  if (!opts.input_module.empty()) {
+    hlo_filename = opts.input_module;
+  } else {
+    QCHECK(argc == 2) << "Must specify a single input file";
+    hlo_filename = argv[1];
+  }
+
+  std::minstd_rand0 engine;
+  int failure_count = 0;
+  const int iteration_count = opts.iterations;
+  for (int i = 1; i <= iteration_count; ++i) {
+    if (iteration_count != 1) {
+      std::cerr << "\n=== Iteration " << i << "\n";
+    }
+    ::testing::AssertionResult matched =
+        xla::RunAndCompare(hlo_filename, test_platform_name,
+                           reference_platform_name, &engine, opts);
+
+    // The AssertionResult is only meaningful when the reference is
+    // used. Without a reference, the test just verifies that nothing blew up
+    // when running the module.
+    if (!reference_platform_name.empty()) {
+      if (matched) {
+        // Success.
+        std::cerr << "\n** Results on " << test_platform_name << " and "
+                  << reference_platform_name << " are close enough. **\n";
+      } else {
+        failure_count++;
+        std::cerr << matched.message() << "\n";
+      }
+    }
+  }
+
+  if (!reference_platform_name.empty()) {
+    std::cerr << failure_count << "/" << iteration_count
+              << " runs miscompared.\n";
+  }
+
+  return failure_count == 0 ? 0 : -1;
+}
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 588420eb1b6..29cfe52a196 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -67,6 +67,7 @@ load(
     "//tensorflow:tensorflow.bzl",
     "cc_header_only_library",
     "if_android",
+    "if_chromiumos",
     "if_emscripten",
     "if_ios",
     "if_mobile",
@@ -105,6 +106,7 @@ load(
     "//tensorflow/core/platform:build_config.bzl",
     "tf_additional_all_protos",
     "tf_additional_core_deps",
+    "tf_additional_env_hdrs",
     "tf_additional_lib_deps",
     "tf_additional_monitoring_hdrs",
     "tf_additional_test_deps",
@@ -398,7 +400,7 @@ filegroup(
         "//tensorflow/core/platform:file_statistics.h",
         "//tensorflow/core/platform:file_system.h",
         "//tensorflow/core/platform:path.h",
-    ],
+    ] + tf_additional_env_hdrs(),
     visibility = ["//visibility:private"],
 )
 
@@ -464,7 +466,6 @@ cc_library(
         "//tensorflow/core/lib/core:legacy_lib_proto_parsing_headers",
         "//tensorflow/core/lib/strings:legacy_lib_proto_parsing_headers",
         "//tensorflow/core/platform:init_main.h",
-        "//tensorflow/core/platform:legacy_proto_hdrs",
         "//tensorflow/core/platform:logging.h",
         "//tensorflow/core/platform:macros.h",
         "//tensorflow/core/platform:platform.h",
@@ -1569,7 +1570,6 @@ filegroup(
         "//tensorflow/core/lib/strings:legacy_lib_strings_all_headers",
         "//tensorflow/core/lib/strings:legacy_lib_strings_all_srcs",
         "//tensorflow/core/platform/default/build_config:android_srcs",
-        "//tensorflow/core/platform:legacy_srcs_no_runtime",
         "//tensorflow/core/profiler:mobile_srcs",
         "//tensorflow/core/util/ctc:android_srcs",
         "//tensorflow/core/util/sparse:mobile_srcs_no_runtime_group",
@@ -1604,6 +1604,9 @@ filegroup(
             "common_runtime/eager/*",
             "common_runtime/gpu_device_factory.*",
         ],
+    ) + if_chromiumos(
+        ["//tensorflow/core/platform:legacy_srcs_no_runtime_google"],
+        otherwise = ["//tensorflow/core/platform:legacy_srcs_no_runtime"],
     ),
     visibility = ["//visibility:private"],
 )
@@ -2166,8 +2169,6 @@ cc_library(
             "lib/png/**/*",
         ],
     ) + [
-        "//tensorflow/core/platform:legacy_monitoring_srcs",
-        "//tensorflow/core/platform:legacy_platform_lib_srcs",
         "//tensorflow/core/platform:legacy_lib_internal_srcs",
     ],
     hdrs = LIB_INTERNAL_PUBLIC_HEADERS,
@@ -2255,6 +2256,7 @@ cc_library(
         "//tensorflow/core/lib/strings:strcat",
         "//tensorflow/core/lib/strings:stringprintf",
         "//tensorflow/core/platform:abi",
+        "//tensorflow/core/platform:base64",
         "//tensorflow/core/platform:blocking_counter",
         "//tensorflow/core/platform:coding",
         "//tensorflow/core/platform:context",
@@ -2270,6 +2272,7 @@ cc_library(
         "//tensorflow/core/platform:hash",
         "//tensorflow/core/platform:load_library",
         "//tensorflow/core/platform:logger",
+        "//tensorflow/core/platform:monitoring",
         "//tensorflow/core/platform:mutex",
         "//tensorflow/core/platform:notification",
         "//tensorflow/core/platform:net",
@@ -2283,6 +2286,8 @@ cc_library(
         "//tensorflow/core/platform:regexp",
         "//tensorflow/core/platform:scanner",
         "//tensorflow/core/platform:setround",
+        "//tensorflow/core/platform:stacktrace",
+        "//tensorflow/core/platform:stacktrace_handler",
         "//tensorflow/core/platform:status",
         "//tensorflow/core/platform:strcat",
         "//tensorflow/core/platform:stringpiece",
@@ -2695,6 +2700,8 @@ tf_cuda_library(
         "@com_google_absl//absl/time",
         "//third_party/eigen3",
         "//tensorflow/core/framework:attr_value_proto_text",
+        "//tensorflow/core/framework:bfloat16",
+        "//tensorflow/core/framework:numeric_types",
         "//tensorflow/core/kernels:bounds_check",
         "//tensorflow/core/platform/default/build_config:platformlib",
         "//tensorflow/core/profiler/lib:traceme",
@@ -2772,7 +2779,7 @@ tf_cuda_library(
     deps = [":framework_lite"],
 )
 
-# TODO(josh11b): Is this needed, or can we just use ":protos_all"?
+# TODO(josh11b): Is this needed, or can we just use ":protos_all_cc"?
 cc_library(
     name = "protos_cc",
     visibility = ["//visibility:public"],
diff --git a/tensorflow/core/api_def/base_api/api_def_DebugNumericSummaryV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_DebugNumericSummaryV2.pbtxt
index c9097723057..28f0271c7e8 100644
--- a/tensorflow/core/api_def/base_api/api_def_DebugNumericSummaryV2.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DebugNumericSummaryV2.pbtxt
@@ -15,17 +15,67 @@ Tensor debug mode: the mode in which the input tensor is summarized
   tensorflow/core/protobuf/debug_event.proto for details.
 
 Supported values:
-  8 (REDUCE_INF_NAN_THREE_SLOTS): Output a float32 tensor of shape
+  2 (CURT_HEALTH): Output a float32/64 tensor of shape [2]. The 1st
+  element is the tensor_id, if provided, and -1 otherwise. The 2nd
+  element is a bit which is set to 1 if the input tensor has an
+  infinity or nan value, or zero otherwise.
+
+  3 (CONCISE_HEALTH): Ouput a float32/64 tensor of shape [5]. The 1st
+  element is the tensor_id, if provided, and -1 otherwise. The
+  remaining four slots are the total number of elements, -infs,
+  +infs, and nans in the input tensor respectively.
+
+  4 (FULL_HEALTH): Output a float32/64 tensor of shape [11]. The 1st
+  element is the tensor_id, if provided, and -1 otherwise. The 2nd
+  element is the device_id, if provided, and -1 otherwise. The 3rd
+  element holds the datatype value of the input tensor as according
+  to the enumerated type in tensorflow/core/framework/types.proto.
+  The remaining elements hold the total number of elements, -infs,
+  +infs, nans, negative finite numbers, zeros, and positive finite
+  numbers in the input tensor respectively.
+
+  5 (SHAPE): Output a float32/64 tensor of shape [10]. The 1st
+  element is the tensor_id, if provided, and -1 otherwise. The 2nd
+  element holds the datatype value of the input tensor as according
+  to the enumerated type in tensorflow/core/framework/types.proto.
+  The 3rd element holds the rank of the tensor. The 4th element holds
+  the number of elements within the tensor. Finally the remaining 6
+  elements hold the shape of the tensor. If the rank of the tensor
+  is lower than 6, the shape is right padded with zeros. If the rank
+  is greater than 6, the head of the shape is truncated.
+
+  6 (FULL_NUMERICS): Output a float32/64 tensor of shape [22]. The 1st
+  element is the tensor_id, if provided, and -1 otherwise. The 2nd
+  element is the device_id, if provided, and -1 otherwise. The 3rd
+  element holds the datatype value of the input tensor as according
+  to the enumerated type in tensorflow/core/framework/types.proto.
+  The 4th element holds the rank of the tensor. The 5th to 11th
+  elements hold the shape of the tensor. If the rank of the tensor
+  is lower than 6, the shape is right padded with zeros. If the rank
+  is greater than 6, the head of the shape is truncated. The 12th to
+  18th elements hold the number of elements, -infs, +infs, nans,
+  denormal floats, negative finite numbers, zeros, and positive
+  finite numbers in the input tensor respectively. The final four
+  elements hold the min value, max value, mean, and variance of the
+  input tensor.
+
+  8 (REDUCE_INF_NAN_THREE_SLOTS): Output a float32/64 tensor of shape
   [3]. The 1st element is -inf if any elements of the input tensor
   is -inf, or zero otherwise. The 2nd element is +inf if any elements
   of the input tensor is +inf, or zero otherwise.  The 3rd element is
-  nan if any element of the input tensor is nan, or zero otherwise
+  nan if any element of the input tensor is nan, or zero otherwise.
 END
   }
   attr {
     name: "tensor_id"
     description: <<END
 Optional. An integer identifier for the tensor being summarized by this op.
+END
+  }
+  attr {
+    name: "output_dtype"
+    description: <<END
+Optional. The type of the output. Can be float32 or float64 (default: float32).
 END
   }
   summary: "Debug Numeric Summary V2 Op."
diff --git a/tensorflow/core/api_def/base_api/api_def_LogicalNot.pbtxt b/tensorflow/core/api_def/base_api/api_def_LogicalNot.pbtxt
index af29e920c9b..eaee703cf54 100644
--- a/tensorflow/core/api_def/base_api/api_def_LogicalNot.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_LogicalNot.pbtxt
@@ -1,4 +1,16 @@
 op {
   graph_op_name: "LogicalNot"
-  summary: "Returns the truth value of NOT x element-wise."
+  in_arg {
+    name: "x"
+    description: <<END
+A `Tensor` of type `bool`.
+END
+  }
+  out_arg {
+    name: "y"
+    description: <<END
+A `Tensor` of type `bool` with the same shape as `x`. The logical negation of `x`.
+END
+  }
+  summary: "Returns the truth value of `NOT x` element-wise."
 }
diff --git a/tensorflow/core/api_def/base_api/api_def_StringLength.pbtxt b/tensorflow/core/api_def/base_api/api_def_StringLength.pbtxt
index 7d2fbcd00bd..87f4c9c4fc3 100644
--- a/tensorflow/core/api_def/base_api/api_def_StringLength.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_StringLength.pbtxt
@@ -13,7 +13,7 @@ END
   in_arg {
     name: "input"
     description: <<END
-The string for which to compute the length.
+The strings for which to compute the length for each element.
 END
   }
   out_arg {
@@ -26,5 +26,12 @@ END
   summary: "String lengths of `input`."
   description: <<END
 Computes the length of each string given in the input tensor.
+
+>>> strings = tf.constant(['Hello','TensorFlow', '\U0001F642'])
+>>> tf.strings.length(strings).numpy() # default counts bytes
+array([ 5, 10, 4], dtype=int32)
+>>> tf.strings.length(strings, unit="UTF8_CHAR").numpy()
+array([ 5, 10, 1], dtype=int32)
+
 END
 }
diff --git a/tensorflow/core/api_def/base_api/api_def_TPUReplicatedInput.pbtxt b/tensorflow/core/api_def/base_api/api_def_TPUReplicatedInput.pbtxt
index acd52a735cb..d632da17ad9 100644
--- a/tensorflow/core/api_def/base_api/api_def_TPUReplicatedInput.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_TPUReplicatedInput.pbtxt
@@ -2,4 +2,17 @@ op {
   graph_op_name: "TPUReplicatedInput"
   visibility: HIDDEN
   summary: "Connects N inputs to an N-way replicated TPU computation."
+  description: <<END
+This operation holds a replicated input to a `tpu.replicate()` computation subgraph.
+Each replicated input has the same shape and type alongside the output.
+
+For example:
+```
+%a = "tf.opA"()
+%b = "tf.opB"()
+%replicated_input = "tf.TPUReplicatedInput"(%a, %b)
+%computation = "tf.Computation"(%replicated_input)
+```
+The above computation has a replicated input of two replicas.
+END
 }
diff --git a/tensorflow/core/api_def/base_api/api_def_TPUReplicatedOutput.pbtxt b/tensorflow/core/api_def/base_api/api_def_TPUReplicatedOutput.pbtxt
index 162da6c7c7d..c99bd250786 100644
--- a/tensorflow/core/api_def/base_api/api_def_TPUReplicatedOutput.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_TPUReplicatedOutput.pbtxt
@@ -1,5 +1,16 @@
 op {
   graph_op_name: "TPUReplicatedOutput"
   visibility: HIDDEN
-  summary: "Connects outputs of an N-way replicated computation to N outputs."
+  summary: "Connects N outputs from an N-way replicated TPU computation."
+  description: <<END
+This operation holds a replicated output from a `tpu.replicate()` computation subgraph.
+Each replicated output has the same shape and type alongside the input.
+
+For example:
+```
+%computation = "tf.Computation"()
+%replicated_output:2 = "tf.TPUReplicatedOutput"(%computation)
+```
+The above computation has a replicated output of two replicas.
+END
 }
diff --git a/tensorflow/core/api_def/base_api/api_def_TensorScatterUpdate.pbtxt b/tensorflow/core/api_def/base_api/api_def_TensorScatterUpdate.pbtxt
index c0e62302129..3cd2a9e9580 100644
--- a/tensorflow/core/api_def/base_api/api_def_TensorScatterUpdate.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_TensorScatterUpdate.pbtxt
@@ -62,17 +62,11 @@ tensor with 8 elements.
 
 In Python, this scatter operation would look like this:
 
-```python
-    indices = tf.constant([[4], [3], [1], [7]])
-    updates = tf.constant([9, 10, 11, 12])
-    tensor = tf.ones([8], dtype=tf.int32)
-    updated = tf.tensor_scatter_nd_update(tensor, indices, updates)
-    print(updated)
-```
-
-The resulting tensor would look like this:
-
-    [1, 11, 1, 10, 9, 1, 1, 12]
+    >>> indices = tf.constant([[4], [3], [1], [7]])
+    >>> updates = tf.constant([9, 10, 11, 12])
+    >>> tensor = tf.ones([8], dtype=tf.int32)
+    >>> print(tf.tensor_scatter_nd_update(tensor, indices, updates))
+    tf.Tensor([ 1 11  1 10  9  1  1 12], shape=(8,), dtype=int32)
 
 We can also, insert entire slices of a higher rank tensor all at once. For
 example, if we wanted to insert two slices in the first dimension of a
@@ -80,23 +74,29 @@ rank-3 tensor with two matrices of new values.
 
 In Python, this scatter operation would look like this:
 
-```python
-    indices = tf.constant([[0], [2]])
-    updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6],
-                            [7, 7, 7, 7], [8, 8, 8, 8]],
-                           [[5, 5, 5, 5], [6, 6, 6, 6],
-                            [7, 7, 7, 7], [8, 8, 8, 8]]])
-    tensor = tf.ones([4, 4, 4],dtype=tf.int32)
-    updated = tf.tensor_scatter_nd_update(tensor, indices, updates)
-    print(updated)
-```
-
-The resulting tensor would look like this:
-
-    [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
-     [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
-     [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
-     [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]]
+    >>> indices = tf.constant([[0], [2]])
+    >>> updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6],
+    ...                         [7, 7, 7, 7], [8, 8, 8, 8]],
+    ...                        [[5, 5, 5, 5], [6, 6, 6, 6],
+    ...                         [7, 7, 7, 7], [8, 8, 8, 8]]])
+    >>> tensor = tf.ones([4, 4, 4], dtype=tf.int32)
+    >>> print(tf.tensor_scatter_nd_update(tensor, indices, updates).numpy())
+    [[[5 5 5 5]
+      [6 6 6 6]
+      [7 7 7 7]
+      [8 8 8 8]]
+     [[1 1 1 1]
+      [1 1 1 1]
+      [1 1 1 1]
+      [1 1 1 1]]
+     [[5 5 5 5]
+      [6 6 6 6]
+      [7 7 7 7]
+      [8 8 8 8]]
+     [[1 1 1 1]
+      [1 1 1 1]
+      [1 1 1 1]
+      [1 1 1 1]]]
 
 Note that on CPU, if an out of bound index is found, an error is returned.
 On GPU, if an out of bound index is found, the index is ignored.
diff --git a/tensorflow/core/api_def/python_api/api_def_Ceil.pbtxt b/tensorflow/core/api_def/python_api/api_def_Ceil.pbtxt
index 331bb9cbf55..5d24634b328 100644
--- a/tensorflow/core/api_def/python_api/api_def_Ceil.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Ceil.pbtxt
@@ -1,10 +1,4 @@
 op {
   graph_op_name: "Ceil"
-  endpoint {
-    name: "math.ceil"
-  }
-  endpoint {
-    name: "ceil"
-    deprecation_version: 2
-  }
+  visibility: HIDDEN
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Exp.pbtxt b/tensorflow/core/api_def/python_api/api_def_Exp.pbtxt
index 38a9078d9f6..4c89cd7afcc 100644
--- a/tensorflow/core/api_def/python_api/api_def_Exp.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Exp.pbtxt
@@ -1,9 +1,4 @@
 op {
   graph_op_name: "Exp"
-  endpoint {
-    name: "math.exp"
-  }
-  endpoint {
-    name: "exp"
-  }
+  visibility: HIDDEN
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Log.pbtxt b/tensorflow/core/api_def/python_api/api_def_Log.pbtxt
index b6d2da6d32a..a8b00c696c0 100644
--- a/tensorflow/core/api_def/python_api/api_def_Log.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Log.pbtxt
@@ -7,4 +7,18 @@ op {
     name: "log"
     deprecation_version: 2
   }
+  description: <<END
+I.e., \\(y = \log_e x\\).
+
+Example:
+
+```python
+>>> x = tf.constant([0, 0.5, 1, 5])
+>>> tf.math.log(x)
+<tf.Tensor: shape=(4,), dtype=float32, numpy=array([      -inf, -0.6931472,  0.       ,  1.609438 ], dtype=float32)>
+
+```
+
+See: https://en.wikipedia.org/wiki/Logarithm
+END
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Log1p.pbtxt b/tensorflow/core/api_def/python_api/api_def_Log1p.pbtxt
index e3da451de3f..3950c25169b 100644
--- a/tensorflow/core/api_def/python_api/api_def_Log1p.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Log1p.pbtxt
@@ -7,4 +7,13 @@ op {
     name: "log1p"
     deprecation_version: 2
   }
+  description: <<END
+I.e., \\(y = \log_e (1 + x)\\).
+
+Example:
+>>> x = tf.constant([0, 0.5, 1, 5])
+>>> tf.math.log1p(x)
+<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0.       , 0.4054651, 0.6931472, 1.7917595], dtype=float32)>
+
+END
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_LogicalAnd.pbtxt b/tensorflow/core/api_def/python_api/api_def_LogicalAnd.pbtxt
index d4e6a7a380e..3be990d47e1 100644
--- a/tensorflow/core/api_def/python_api/api_def_LogicalAnd.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_LogicalAnd.pbtxt
@@ -1,9 +1,4 @@
 op {
   graph_op_name: "LogicalAnd"
-  endpoint {
-    name: "math.logical_and"
-  }
-  endpoint {
-    name: "logical_and"
-  }
+  visibility: HIDDEN
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_LogicalNot.pbtxt b/tensorflow/core/api_def/python_api/api_def_LogicalNot.pbtxt
index 49068738a41..9d8d5ea7b61 100644
--- a/tensorflow/core/api_def/python_api/api_def_LogicalNot.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_LogicalNot.pbtxt
@@ -6,4 +6,11 @@ op {
   endpoint {
     name: "logical_not"
   }
+  description: <<END
+Example:
+
+>>> tf.math.logical_not(tf.constant([True, False]))
+<tf.Tensor: shape=(2,), dtype=bool, numpy=array([False,  True])>
+
+END
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Maximum.pbtxt b/tensorflow/core/api_def/python_api/api_def_Maximum.pbtxt
index 130729ece17..f53382118f3 100644
--- a/tensorflow/core/api_def/python_api/api_def_Maximum.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Maximum.pbtxt
@@ -6,4 +6,12 @@ op {
   endpoint {
     name: "maximum"
   }
+  description: <<END
+Example:
+>>> x = tf.constant([0., 0., 0., 0.])
+>>> y = tf.constant([-2., 0., 2., 5.])
+>>> tf.math.maximum(x, y)
+<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0., 0., 2., 5.], dtype=float32)>
+
+END
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Minimum.pbtxt b/tensorflow/core/api_def/python_api/api_def_Minimum.pbtxt
index 8aded1f154d..e7f90893fce 100644
--- a/tensorflow/core/api_def/python_api/api_def_Minimum.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Minimum.pbtxt
@@ -6,4 +6,12 @@ op {
   endpoint {
     name: "minimum"
   }
+  description: <<END
+Example:
+>>> x = tf.constant([0., 0., 0., 0.])
+>>> y = tf.constant([-5., -2., 0., 3.])
+>>> tf.math.minimum(x, y)
+<tf.Tensor: shape=(4,), dtype=float32, numpy=array([-5., -2., 0., 0.], dtype=float32)>
+
+END
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Rsqrt.pbtxt b/tensorflow/core/api_def/python_api/api_def_Rsqrt.pbtxt
index 3cfbfc1106e..a558be92be1 100644
--- a/tensorflow/core/api_def/python_api/api_def_Rsqrt.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Rsqrt.pbtxt
@@ -7,4 +7,13 @@ op {
     name: "rsqrt"
     deprecation_version: 2
   }
+  description: <<END
+I.e., \\(y = 1 / \sqrt{x}\\).
+
+Example:
+>>> x = tf.constant([2., 0., -2.])
+>>> tf.math.rsqrt(x)
+<tf.Tensor: shape=(3,), dtype=float32, numpy=array([0.70710677,        inf,        nan], dtype=float32)>
+
+END
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Sqrt.pbtxt b/tensorflow/core/api_def/python_api/api_def_Sqrt.pbtxt
index 16a4d9a7bcc..59e2dfe8366 100644
--- a/tensorflow/core/api_def/python_api/api_def_Sqrt.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Sqrt.pbtxt
@@ -1,9 +1,4 @@
 op {
   graph_op_name: "Sqrt"
-  endpoint {
-    name: "math.sqrt"
-  }
-  endpoint {
-    name: "sqrt"
-  }
+  visibility: HIDDEN
 }
diff --git a/tensorflow/core/api_def/python_api/api_def_Square.pbtxt b/tensorflow/core/api_def/python_api/api_def_Square.pbtxt
index 0bd2f1bf41b..5e57a335686 100644
--- a/tensorflow/core/api_def/python_api/api_def_Square.pbtxt
+++ b/tensorflow/core/api_def/python_api/api_def_Square.pbtxt
@@ -6,4 +6,11 @@ op {
   endpoint {
     name: "square"
   }
+  description: <<END
+I.e., \\(y = x * x = x^2\\).
+
+>>> tf.math.square([-2., 0., 3.])
+<tf.Tensor: shape=(3,), dtype=float32, numpy=array([4., 0., 9.], dtype=float32)>
+
+END
 }
diff --git a/tensorflow/core/common_runtime/eager/BUILD b/tensorflow/core/common_runtime/eager/BUILD
index 47df665a81b..4d94bd021de 100644
--- a/tensorflow/core/common_runtime/eager/BUILD
+++ b/tensorflow/core/common_runtime/eager/BUILD
@@ -233,10 +233,10 @@ tf_cuda_library(
     deps = [
         ":attr_builder",
         ":process_function_library_runtime",
-        "@farmhash_archive//:farmhash",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/memory",
         "@com_google_absl//absl/types:optional",
+        "@farmhash_archive//:farmhash",
     ] + select({
         "//tensorflow:android": [
             "//tensorflow/core:android_tensorflow_lib_lite",
@@ -399,10 +399,10 @@ tf_cuda_library(
     hdrs = ["attr_builder.h"],
     visibility = ["//tensorflow:internal"],
     deps = [
-        "@farmhash_archive//:farmhash",
         # Only the TF_AttrType enum is required, so pull in just the C headers.
         # TODO(b/113535673): Break this dependency and avoid the C header completely.
         "//tensorflow/c:tf_attrtype",
+        "@farmhash_archive//:farmhash",
     ] + select({
         "//tensorflow:android": [
             "//tensorflow/core:android_tensorflow_lib_lite",
diff --git a/tensorflow/core/common_runtime/eager/context.cc b/tensorflow/core/common_runtime/eager/context.cc
index b5b0bce52ef..b8dd8d8dcd1 100644
--- a/tensorflow/core/common_runtime/eager/context.cc
+++ b/tensorflow/core/common_runtime/eager/context.cc
@@ -273,7 +273,7 @@ void EagerContext::CloseRemoteContexts(
 
   int i = 0;
   for (const auto& worker : remote_contexts) {
-    eager::EagerClient* client;
+    core::RefCountPtr<eager::EagerClient> client;
     Status s = remote_eager_workers_->GetClient(worker, &client);
 
     client->CloseContextAsync(
@@ -449,7 +449,7 @@ Status EagerContext::MaybeRegisterFunctionRemotely(const FunctionDef& fdef) {
       register_function->mutable_function_def()->mutable_node_def());
 
   for (const auto& target : remote_contexts_) {
-    eager::EagerClient* eager_client;
+    core::RefCountPtr<eager::EagerClient> eager_client;
     TF_RETURN_IF_ERROR(remote_eager_workers_->GetClient(target, &eager_client));
 
     eager::EnqueueResponse* response = new eager::EnqueueResponse();
@@ -475,7 +475,7 @@ Status EagerContext::RegisterExistingFunctionsOnRemoteWorkers(
   // Register multiple functions on selected remote workers.
   uint64 context_id = GetContextId();
   for (int i = 0; i < remote_workers.size(); i++) {
-    eager::EagerClient* eager_client;
+    core::RefCountPtr<eager::EagerClient> eager_client;
     Status s =
         remote_eager_workers_->GetClient(remote_workers[i], &eager_client);
     if (!s.ok()) {
@@ -649,12 +649,13 @@ Status GetTaskName(Device* d, string* task_name) {
 }  // namespace
 
 #if !defined(IS_MOBILE_PLATFORM)
-Status EagerContext::GetClient(Device* device, eager::EagerClient** client) {
+Status EagerContext::GetClient(Device* device,
+                               core::RefCountPtr<eager::EagerClient>* client) {
   return GetClient(device->parsed_name(), client);
 }
 
 Status EagerContext::GetClient(const DeviceNameUtils::ParsedName& device_name,
-                               eager::EagerClient** client) {
+                               core::RefCountPtr<eager::EagerClient>* client) {
   if (remote_eager_workers_ == nullptr) {
     return errors::Internal(
         "Haven't set up remote eager worker in this eager context yet.");
@@ -685,7 +686,7 @@ Status EagerContext::GetClient(const DeviceNameUtils::ParsedName& device_name,
 }
 
 Status EagerContext::GetClient(const string& remote_task,
-                               eager::EagerClient** client) {
+                               core::RefCountPtr<eager::EagerClient>* client) {
   if (remote_eager_workers_ == nullptr) {
     return errors::Internal(
         "Haven't set up remote eager worker in this eager context yet.");
@@ -934,7 +935,7 @@ Status EagerContext::SetMasterContextState(
                 if (keep_alive_secs_ > 0) {
                   {
                     for (const auto& worker : remote_contexts_) {
-                      eager::EagerClient* client;
+                      core::RefCountPtr<eager::EagerClient> client;
                       Status s =
                           remote_eager_workers_->GetClient(worker, &client);
 
diff --git a/tensorflow/core/common_runtime/eager/context.h b/tensorflow/core/common_runtime/eager/context.h
index 4a9606e80dd..93fbd8947fe 100644
--- a/tensorflow/core/common_runtime/eager/context.h
+++ b/tensorflow/core/common_runtime/eager/context.h
@@ -265,10 +265,18 @@ class EagerContext : public core::RefCounted {
   FunctionLibraryDefinition* FuncLibDef() { return &func_lib_def_; }
 
 #if !defined(IS_MOBILE_PLATFORM)
-  Status GetClient(Device* device, eager::EagerClient** client);
+  // Assign the EagerClient pointer to `client` based on the given device / task
+  // name, and increment the refcount of the client. The reference ownership is
+  // transferred to the caller, and the unref should automatically happen when
+  // destructing the RefCountPtr object at the caller's side.
+  // `client` must not be initialized or holding a reference of another object
+  // before calling this method.
+  Status GetClient(Device* device,
+                   core::RefCountPtr<eager::EagerClient>* client);
   Status GetClient(const DeviceNameUtils::ParsedName& device_name,
-                   eager::EagerClient** client);
-  Status GetClient(const string& remote_task, eager::EagerClient** client);
+                   core::RefCountPtr<eager::EagerClient>* client);
+  Status GetClient(const string& remote_task,
+                   core::RefCountPtr<eager::EagerClient>* client);
 
   uint64 GetContextId();
   uint64 GetContextViewId();
diff --git a/tensorflow/core/common_runtime/eager/execute.cc b/tensorflow/core/common_runtime/eager/execute.cc
index 32fdb21c1b4..32937bfdfc4 100644
--- a/tensorflow/core/common_runtime/eager/execute.cc
+++ b/tensorflow/core/common_runtime/eager/execute.cc
@@ -25,6 +25,7 @@ limitations under the License.
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/lib/core/refcount.h"
 #include "tensorflow/core/platform/platform.h"
 // clang-format on
 
@@ -727,7 +728,7 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals,
     op->SetDevice(device);
   }
 
-  eager::EagerClient* eager_client = nullptr;
+  core::RefCountPtr<eager::EagerClient> eager_client;
   uint64 context_id = ctx->GetContextId();
   TF_RETURN_IF_ERROR(ctx->GetClient(op->GetDeviceParsedName(), &eager_client));
   string remote_task;
@@ -860,7 +861,7 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals,
            << " (is async?: " << executor.Async() << ").";
 
   std::unique_ptr<EagerNode> node(new eager::RemoteExecuteNode(
-      std::move(request), op_device, eager_client,
+      std::move(request), op_device, eager_client.get(),
       op->MutableAttrs()->BuildNodeDef(), op->EagerContext()->FuncLibDef(),
       op->Inputs(), {retvals, num_outputs}));
   Status s = executor.AddOrExecute(std::move(node));
diff --git a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc
index f7d1e345a2d..2f6e97a4aee 100644
--- a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc
+++ b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/process_function_library_runtime.h"
 #include "tensorflow/core/distributed_runtime/worker_session.h"
 #include "tensorflow/core/framework/function.h"
+#include "tensorflow/core/framework/graph_def_util.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/node_def_builder.h"
 #include "tensorflow/core/framework/tensor.pb.h"
@@ -223,6 +224,8 @@ void ClusterFunctionLibraryRuntime::Instantiate(
   req->set_session_handle(worker_session_->session_name());
   req->set_create_worker_session_called(create_worker_session_called_);
   *req->mutable_graph_def() = std::move(gdef);
+  StripDefaultAttributes(*OpRegistry::Global(),
+                         req->mutable_graph_def()->mutable_node());
   req->mutable_graph_options()
       ->mutable_optimizer_options()
       ->set_do_function_inlining(true);
diff --git a/tensorflow/core/distributed_runtime/eager/BUILD b/tensorflow/core/distributed_runtime/eager/BUILD
index bbcc10b029a..6cd525b317d 100644
--- a/tensorflow/core/distributed_runtime/eager/BUILD
+++ b/tensorflow/core/distributed_runtime/eager/BUILD
@@ -65,6 +65,7 @@ cc_library(
     deps = [
         "//tensorflow/core:eager_service_proto_cc",
         "//tensorflow/core:lib",
+        "//tensorflow/core:lib_internal",
     ],
 )
 
diff --git a/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.cc b/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.cc
index a1cfe5813f1..3f940284396 100644
--- a/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.cc
+++ b/tensorflow/core/distributed_runtime/eager/cluster_function_library_runtime.cc
@@ -59,7 +59,7 @@ void EagerClusterFunctionLibraryRuntime::Instantiate(
 
   VLOG(1) << "CFLR::Instantiate: " << function_name << " on " << target
           << " (this: " << this << ")";
-  eager::EagerClient* eager_client = nullptr;
+  core::RefCountPtr<eager::EagerClient> eager_client;
   Device* device;
   s = ctx_->FindDeviceFromName(target.c_str(), &device);
   if (!s.ok()) {
@@ -97,7 +97,8 @@ void EagerClusterFunctionLibraryRuntime::Instantiate(
 
   eager_client->EnqueueAsync(request, response,
                              [this, request, response, handle, released_op,
-                              target, eager_client, done](const Status& s) {
+                              target, eager_client = eager_client.get(),
+                              done](const Status& s) {
                                {
                                  mutex_lock l(mu_);
                                  *handle = function_data_.size();
diff --git a/tensorflow/core/distributed_runtime/eager/destroy_tensor_handle_node.h b/tensorflow/core/distributed_runtime/eager/destroy_tensor_handle_node.h
index 869345fcdd3..bc1670b9f71 100644
--- a/tensorflow/core/distributed_runtime/eager/destroy_tensor_handle_node.h
+++ b/tensorflow/core/distributed_runtime/eager/destroy_tensor_handle_node.h
@@ -30,45 +30,24 @@ namespace eager {
 class DestroyTensorHandleNode : public tensorflow::AsyncEagerNode {
  public:
   DestroyTensorHandleNode(std::unique_ptr<EnqueueRequest> request,
-                          EagerContext* ctx, const string& remote_task,
-                          bool ready)
+                          EagerClient* eager_client, bool ready)
       : tensorflow::AsyncEagerNode(),
         request_(std::move(request)),
-        ctx_(ctx),
-        remote_task_(remote_task),
+        eager_client_(eager_client),
         ready_(ready) {
-    ctx_->Ref();
+    eager_client_->Ref();
   }
 
-  ~DestroyTensorHandleNode() override { ctx_->Unref(); }
+  ~DestroyTensorHandleNode() override { eager_client_->Unref(); }
 
   void RunAsync(StatusCallback done) override {
-    auto context_id = request_->context_id();
-    if (ctx_->GetContextId() != context_id) {
-      // This means that this tensor was pointing to a remote device, which
-      // has been changed out from under us. Simply return since there is
-      // nothing we can do.
-      done(Status::OK());
-      return;
-    }
-
-    eager::EagerClient* eager_client;
-    Status status = ctx_->GetClient(remote_task_, &eager_client);
-    if (!status.ok()) {
-      LOG_EVERY_N_SEC(INFO, 60)
-          << "Unable to destroy remote tensor handle because the target "
-          << remote_task_ << " is no longer available.";
-      done(Status::OK());
-      return;
-    }
-
     EnqueueResponse* response = new EnqueueResponse;
     bool ready = ready_;
     // NOTE(fishx): Don't use StreamingEnqueueAsync here. When a
     // StreamingEnqueueAsync request fails all following requests will fail as
     // well. We don't want this request poison following requests since it is
     // safe to ignore a failing destroy tensor handle request.
-    eager_client->EnqueueAsync(
+    eager_client_->EnqueueAsync(
         request_.get(), response,
         [response, ready, done](const tensorflow::Status& s) {
           // Omit the warning if:
@@ -96,7 +75,7 @@ class DestroyTensorHandleNode : public tensorflow::AsyncEagerNode {
 
  private:
   std::unique_ptr<EnqueueRequest> request_;
-  EagerContext* ctx_;
+  EagerClient* eager_client_;
   const string remote_task_;
   bool ready_;
 };
diff --git a/tensorflow/core/distributed_runtime/eager/eager_client.h b/tensorflow/core/distributed_runtime/eager/eager_client.h
index 089cf25d9b4..3b083f3cae6 100644
--- a/tensorflow/core/distributed_runtime/eager/eager_client.h
+++ b/tensorflow/core/distributed_runtime/eager/eager_client.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_EAGER_EAGER_CLIENT_H_
 #define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_EAGER_EAGER_CLIENT_H_
 
+#include "tensorflow/core/lib/core/refcount.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/protobuf/eager_service.pb.h"
@@ -25,9 +26,9 @@ namespace eager {
 
 // This is a base class that can be implemented by a variety of
 // transports (e.g. gRPC which for each of the client methods makes an RPC).
-class EagerClient {
+class EagerClient : public core::RefCounted {
  public:
-  virtual ~EagerClient() {}
+  ~EagerClient() override {}
 #define CLIENT_METHOD(method)                                \
   virtual void method##Async(const method##Request* request, \
                              method##Response* response,     \
@@ -62,7 +63,13 @@ class EagerClient {
 class EagerClientCache {
  public:
   virtual ~EagerClientCache() {}
-  virtual Status GetClient(const string& target, EagerClient** client) = 0;
+
+  // If the `target` exists, assign the EagerClient pointer to `client` and
+  // increment the refcount of the client. The reference ownership is
+  // transferred to the caller, and the unref should automatically happen when
+  // destructing the RefCountPtr object from the caller's side.
+  virtual Status GetClient(const string& target,
+                           core::RefCountPtr<EagerClient>* client) = 0;
 };
 
 }  // namespace eager
diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc b/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc
index 92e3d2fb3cf..3fe219a0290 100644
--- a/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc
+++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc
@@ -235,7 +235,6 @@ Status EagerServiceImpl::UpdateContext(const UpdateContextRequest* request,
         " but received update request at view #", request->context_view_id(),
         ". View id should only be continuously incremented.");
   }
-  ctx->ClearCaches();
   // TODO(b/143914772): Potential memory leak if rendezvous has pending
   // tensors for removed / replaced workers.
 
@@ -277,13 +276,25 @@ Status EagerServiceImpl::UpdateContext(const UpdateContextRequest* request,
   DistributedFunctionLibraryRuntime* cluster_flr =
       eager::CreateClusterFLR(request->context_id(), ctx, worker_session.get());
 
-  Status s = ctx->UpdateRemoteWorker(
-      device_mgr, std::move(remote_eager_workers),
-      worker_session->remote_device_mgr(), remote_workers,
-      request->context_id(), cluster_flr);
-  if (!s.ok()) {
-    VLOG(1) << "EagerContext::UpdateRemoteWorker failed with " << s.ToString();
-    return s;
+  {
+    // Hold `contexts_mu_` exclusively, wait for all pending nodes to finish
+    // (implicitly calling WaitForAllPendingNodes inside `ctx->ClearCaches`),
+    // and update the context state.
+    // This lock prevents other threads from handling enqueue requests at the
+    // same time. Each enqueue request will be processed either with context
+    // state before or after the update, but the exact ordering needs to be
+    // determined by the client if desired.
+    mutex_lock lock(contexts_mu_);
+    ctx->ClearCaches();
+    Status s = ctx->UpdateRemoteWorker(
+        device_mgr, std::move(remote_eager_workers),
+        worker_session->remote_device_mgr(), remote_workers,
+        request->context_id(), cluster_flr);
+    if (!s.ok()) {
+      VLOG(1) << "EagerContext::UpdateRemoteWorker failed with "
+              << s.ToString();
+      return s;
+    }
   }
 
   std::vector<DeviceAttributes> device_attributes;
@@ -401,13 +412,17 @@ Status EagerServiceImpl::Enqueue(const EnqueueRequest* request,
                                  EnqueueResponse* response, uint64 stream_id) {
   profiler::TraceMe activity(
       [&] {
-        return absl::StrCat("EagerService:Enqueue:", request->DebugString());
+        return absl::StrCat(
+            "EagerService:Enqueue#debug_str=", request->DebugString(), "#");
       },
       profiler::TraceMeLevel::kInfo);
   ServerContext* context = nullptr;
   TF_RETURN_IF_ERROR(GetServerContext(request->context_id(), &context));
   core::ScopedUnref context_unref(context);
 
+  // Acquire shared lock to prevent handling enqueue requests while updating
+  // context (see UpdateContext).
+  tf_shared_lock lock(contexts_mu_);
   EagerExecutor& executor =
       stream_id == kInvalidStreamId
           ? context->Context()->Executor()
diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc b/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc
index dbf3c6370bc..a2c15daf0b3 100644
--- a/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc
+++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc
@@ -103,13 +103,15 @@ class FakeEagerClient : public EagerClient {
 class DummyEagerClientCache : public EagerClientCache {
  public:
   DummyEagerClientCache() : client_(new FakeEagerClient) {}
-  Status GetClient(const string& target, EagerClient** client) override {
-    *client = client_.get();
+  Status GetClient(const string& target,
+                   core::RefCountPtr<EagerClient>* client) override {
+    client->reset(client_.get());
+    client_->Ref();
     return Status::OK();
   }
 
  private:
-  std::unique_ptr<EagerClient> client_;
+  core::RefCountPtr<EagerClient> client_;
 };
 
 class FakeCache : public TestWorkerCache {
@@ -481,9 +483,9 @@ class FunctionWithRemoteInputsTest : public EagerServiceImplTest {
     TF_ASSERT_OK(eager_service_impl_.GetEagerContext(context_id_, &ctx));
     Device* device;
     TF_ASSERT_OK(ctx->FindDeviceFromName(local_device_.c_str(), &device));
-    EagerClient* client;
+    core::RefCountPtr<EagerClient> client;
     TF_ASSERT_OK(ctx->GetClient(device, &client));
-    FakeEagerClient* fake_client = static_cast<FakeEagerClient*>(client);
+    FakeEagerClient* fake_client = static_cast<FakeEagerClient*>(client.get());
     fake_client->SetServiceImpl(&eager_service_impl_);
 
     // Create an input on local_device for MatMulFunction.
diff --git a/tensorflow/core/distributed_runtime/eager/remote_copy_node.cc b/tensorflow/core/distributed_runtime/eager/remote_copy_node.cc
index 0dfcd82d737..d0b07a5a97c 100644
--- a/tensorflow/core/distributed_runtime/eager/remote_copy_node.cc
+++ b/tensorflow/core/distributed_runtime/eager/remote_copy_node.cc
@@ -156,7 +156,7 @@ void RemoteCopyNode::StartSend() {
     remote_op->set_id(ctx_->RemoteMgr()->NextOpId());
 
     // Issue the RPC
-    eager::EagerClient* eager_client;
+    core::RefCountPtr<eager::EagerClient> eager_client;
     status = ctx_->GetClient(send_device_, &eager_client);
     if (!status.ok()) {
       captured_state_->SetSendStatus(status);
@@ -199,7 +199,7 @@ void RemoteCopyNode::RunRemoteRecv(EagerOperation* op, StatusCallback done) {
   PrepareRemoteOp(remote_op, op);
   remote_op->set_id(recv_op_id_);
 
-  eager::EagerClient* eager_client;
+  core::RefCountPtr<eager::EagerClient> eager_client;
   Status status = ctx_->GetClient(recv_device_, &eager_client);
   if (!status.ok()) {
     captured_state_->dst()->Poison(status);
@@ -307,7 +307,7 @@ void RemoteCopyNode::StartRemoteSendTensor(StatusCallback done) {
   }
   tensor.AsProtoTensorContent(send_tensor->add_tensors());
 
-  eager::EagerClient* eager_client;
+  core::RefCountPtr<eager::EagerClient> eager_client;
   s = ctx_->GetClient(recv_device_, &eager_client);
   if (!s.ok()) {
     captured_state_->dst()->Poison(s);
diff --git a/tensorflow/core/distributed_runtime/eager/remote_execute_node.h b/tensorflow/core/distributed_runtime/eager/remote_execute_node.h
index 3736173cd19..b0342fc5056 100644
--- a/tensorflow/core/distributed_runtime/eager/remote_execute_node.h
+++ b/tensorflow/core/distributed_runtime/eager/remote_execute_node.h
@@ -60,6 +60,7 @@ class RemoteExecuteNode : public AsyncEagerNode {
     for (auto handle : inputs_) {
       handle->Ref();
     }
+    eager_client_->Ref();
   }
 
   ~RemoteExecuteNode() override {
@@ -70,6 +71,7 @@ class RemoteExecuteNode : public AsyncEagerNode {
     for (auto handle : inputs_) {
       handle->Unref();
     }
+    eager_client_->Unref();
   }
 
   Status Prepare() override {
diff --git a/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.cc b/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.cc
index 58741ee2c9a..af63c20a7f4 100644
--- a/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.cc
+++ b/tensorflow/core/distributed_runtime/eager/remote_tensor_handle_data.cc
@@ -34,7 +34,7 @@ void DestroyRemoteTensorHandle(EagerContext* ctx, const string& remote_task,
     return;
   }
 
-  eager::EagerClient* eager_client;
+  core::RefCountPtr<eager::EagerClient> eager_client;
   Status status = ctx->GetClient(remote_task, &eager_client);
   if (!status.ok()) {
     LOG_EVERY_N_SEC(INFO, 60)
@@ -52,8 +52,8 @@ void DestroyRemoteTensorHandle(EagerContext* ctx, const string& remote_task,
 
   VLOG(3) << "Sending request to delete " << request->DebugString();
   std::unique_ptr<EagerNode> node(
-      absl::make_unique<eager::DestroyTensorHandleNode>(std::move(request), ctx,
-                                                        remote_task, ready));
+      absl::make_unique<eager::DestroyTensorHandleNode>(
+          std::move(request), eager_client.get(), ready));
   auto& executor = ctx->Executor();
   if (executor.Async()) {
     Status status = executor.AddOrExecute(std::move(node));
diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc
index 03970b91505..9c95c29b020 100644
--- a/tensorflow/core/distributed_runtime/master_session.cc
+++ b/tensorflow/core/distributed_runtime/master_session.cc
@@ -31,6 +31,7 @@ limitations under the License.
 #include "tensorflow/core/framework/allocation_description.pb.h"
 #include "tensorflow/core/framework/collective.h"
 #include "tensorflow/core/framework/cost_graph.pb.h"
+#include "tensorflow/core/framework/graph_def_util.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/node_def_util.h"
 #include "tensorflow/core/framework/tensor.h"
@@ -472,6 +473,8 @@ Status MasterSession::ReffedClientGraph::DoRegisterPartitions(
     c->req.set_session_handle(session_handle_);
     c->req.set_create_worker_session_called(!should_deregister_);
     c->req.mutable_graph_def()->Swap(&graph_partitions[part.name]);
+    StripDefaultAttributes(*OpRegistry::Global(),
+                           c->req.mutable_graph_def()->mutable_node());
     *c->req.mutable_config_proto() = session_opts_.config;
     *c->req.mutable_graph_options() = session_opts_.config.graph_options();
     *c->req.mutable_debug_options() =
@@ -741,7 +744,7 @@ Status MasterSession::ReffedClientGraph::RunPartitionsHelper(
   // Waits for the RunGraph calls.
   call_opts->SetCancelCallback([&calls]() {
     LOG(INFO) << "Client requested cancellation for RunStep, cancelling "
-                  "worker operations.";
+                 "worker operations.";
     calls.StartCancel();
   });
   auto token = cm->get_cancellation_token();
diff --git a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc
index 487479af782..5ad48118ae9 100644
--- a/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc
+++ b/tensorflow/core/distributed_runtime/rpc/eager/grpc_eager_client.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/core/distributed_runtime/rpc/grpc_client_cq_tag.h"
 #include "tensorflow/core/distributed_runtime/rpc/grpc_state.h"
 #include "tensorflow/core/distributed_runtime/rpc/grpc_util.h"
+#include "tensorflow/core/lib/core/refcount.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/protobuf/eager_service.pb.h"
@@ -61,21 +62,68 @@ bool EnableStreaming() {
   return result;
 }
 
+// Ref-counted thread to handle callbacks for completed requests a GRPC
+// completion queue. The thread might be shared by multiple eager clients, and
+// each one of them should hold a reference count to ensure that the thread
+// outlives the clients.
+// To ensure that every tag in completion queue is processed, this thread also
+// holds a reference to itself and always wait until ref count is one to exit.
+class GrpcEagerClientThread : public core::RefCounted {
+ public:
+  GrpcEagerClientThread() {
+    // Hold a reference to ensure every completion tag gets processed.
+    Ref();
+    thread_.reset(Env::Default()->StartThread(
+        ThreadOptions(), "eager_client_thread", [this]() {
+          void* tag;
+          bool ok;
+          while (completion_queue_.Next(&tag, &ok)) {
+            VLOG(4) << "GrpcEagerClientThread got next tag";
+            GrpcClientCQTag* callback_tag = static_cast<GrpcClientCQTag*>(tag);
+            callback_tag->OnCompleted(ok);
+            VLOG(4) << "GrpcEagerClientThread blocking for next tag";
+            if (RefCountIsOne()) {
+              break;
+            }
+          }
+          VLOG(4) << "GrpcEagerClientThread exiting";
+          completion_queue_.Shutdown();
+          // `this` holds the final reference so cannot directly Unref here.
+          // Instead, schedule a separate thread to clean it up.
+          Env::Default()->SchedClosure([this]() { this->Unref(); });
+        }));
+  }
+
+  ~GrpcEagerClientThread() override {}
+
+  ::grpc::CompletionQueue* completion_queue() { return &completion_queue_; }
+
+ private:
+  ::grpc::CompletionQueue completion_queue_;
+  std::unique_ptr<Thread> thread_;
+};
+
 class GrpcEagerClient : public EagerClient {
  public:
   GrpcEagerClient(const tensorflow::SharedGrpcChannelPtr& channel,
-                  ::grpc::CompletionQueue* cq)
-      : stub_(channel), cq_(cq) {}
-  ~GrpcEagerClient() override {}
+                  GrpcEagerClientThread* thread)
+      : stub_(channel), thread_(thread) {
+    // Hold a reference to make sure the corresponding EagerClientThread
+    // outlives the client.
+    thread_->Ref();
+    cq_ = thread->completion_queue();
+  }
+  ~GrpcEagerClient() override { thread_->Unref(); }
 
 #define CLIENT_METHOD(method)                                             \
   void method##Async(const method##Request* request,                      \
                      method##Response* response, StatusCallback done)     \
       override {                                                          \
+    StatusCallback done_wrapped = callback_wrapper(std::move(done));      \
     new RPCState<protobuf::Message>(                                      \
         &stub_, cq_, "/tensorflow.eager.EagerService/" #method, *request, \
-        response, std::move(done), nullptr, nullptr, /*max_retries=*/0,   \
-        /*fail_fast=*/true);                                              \
+        response, std::move(done_wrapped), /*call_opts=*/nullptr,         \
+        /*threadpool=*/nullptr, /*max_retries=*/0, /*fail_fast=*/true);   \
   }
 
   CLIENT_METHOD(CreateContext);
@@ -89,9 +137,11 @@ class GrpcEagerClient : public EagerClient {
   void CloseContextAsync(const CloseContextRequest* request,
                          CloseContextResponse* response,
                          StatusCallback done) override {
+    StatusCallback done_wrapped = callback_wrapper(std::move(done));
     new RPCState<protobuf::Message>(
         &stub_, cq_, "/tensorflow.eager.EagerService/CloseContext", *request,
-        response, std::move(done), nullptr, nullptr);
+        response, std::move(done_wrapped), /*call_opts=*/nullptr,
+        /*threadpool=*/nullptr);
 
     VLOG(1) << "Sending RPC to close remote eager context "
             << request->DebugString();
@@ -110,6 +160,7 @@ class GrpcEagerClient : public EagerClient {
   void StreamingEnqueueAsync(const EnqueueRequest* request,
                              EnqueueResponse* response,
                              StatusCallback done) override {
+    StatusCallback done_wrapped = callback_wrapper(std::move(done));
     if (EnableStreaming()) {
       tf_shared_lock l(mu_);
       auto it = enqueue_dispatchers_.find(request->context_id());
@@ -122,7 +173,7 @@ class GrpcEagerClient : public EagerClient {
                 "/tensorflow.eager.EagerService/StreamingEnqueue"));
         it = it_and_bool.first;
       }
-      it->second.SendNextRequest(*request, response, std::move(done));
+      it->second.SendNextRequest(*request, response, std::move(done_wrapped));
     } else {
       Notification n;
       Status status;
@@ -131,29 +182,44 @@ class GrpcEagerClient : public EagerClient {
         n.Notify();
       });
       n.WaitForNotification();
-      done(status);
+      done_wrapped(status);
     }
   }
 
  private:
   ::grpc::GenericStub stub_;
+  const GrpcEagerClientThread* thread_;
+
   ::grpc::CompletionQueue* cq_;
 
   mutable mutex mu_;
 
   std::unordered_map<uint64, StreamingRPCDispatcher<EnqueueResponse>>
       enqueue_dispatchers_ GUARDED_BY(mu_);
+
+  StatusCallback callback_wrapper(StatusCallback done) {
+    Ref();
+    return [this, done = std::move(done)](const Status& status) {
+      done(status);
+      this->Unref();
+    };
+  }
 };
 
 class GrpcEagerClientCache : public EagerClientCache {
  public:
   explicit GrpcEagerClientCache(
       std::shared_ptr<tensorflow::GrpcChannelCache> cache)
-      : next_round_robin_assignment_(0), cache_(cache), threads_(4) {}
+      : next_round_robin_assignment_(0), cache_(cache), threads_(4) {
+    for (int i = 0; i < threads_.size(); i++) {
+      threads_[i].reset(new GrpcEagerClientThread());
+    }
+  }
 
   ~GrpcEagerClientCache() override { threads_.clear(); }
 
-  Status GetClient(const string& target, EagerClient** client) override {
+  Status GetClient(const string& target,
+                   core::RefCountPtr<EagerClient>* client) override {
     auto it = clients_.find(target);
     if (it == clients_.end()) {
       tensorflow::SharedGrpcChannelPtr shared =
@@ -162,13 +228,15 @@ class GrpcEagerClientCache : public EagerClientCache {
         return errors::InvalidArgument("Client for target ", target,
                                        " not found.");
       }
-      auto worker = std::unique_ptr<EagerClient>(new GrpcEagerClient(
-          shared, threads_[AssignClientToThread(target)].completion_queue()));
-
+      int assigned_index = AssignClientToThread(target);
+      GrpcEagerClientThread* thread = threads_[assigned_index].get();
+      core::RefCountPtr<EagerClient> worker(
+          new GrpcEagerClient(shared, thread));
       it = clients_.emplace(target, std::move(worker)).first;
     }
 
-    *client = it->second.get();
+    it->second->Ref();
+    client->reset(it->second.get());
     return Status::OK();
   }
 
@@ -192,39 +260,9 @@ class GrpcEagerClientCache : public EagerClientCache {
     return it->second;
   }
 
-  class GrpcEagerClientThread {
-   public:
-    GrpcEagerClientThread() {
-      thread_.reset(Env::Default()->StartThread(
-          ThreadOptions(), "eager_client_thread", [this]() {
-            void* tag;
-            bool ok;
-            while (completion_queue_.Next(&tag, &ok)) {
-              VLOG(4) << "GrpcEagerClientThread got next tag";
-              GrpcClientCQTag* callback_tag =
-                  static_cast<GrpcClientCQTag*>(tag);
-              callback_tag->OnCompleted(ok);
-              VLOG(4) << "GrpcEagerClientThread blocking for next tag";
-            }
-            VLOG(4) << "GrpcEagerClientThread exiting";
-          }));
-    }
-
-    ~GrpcEagerClientThread() {
-      completion_queue_.Shutdown();
-      thread_.reset();
-    }
-
-    ::grpc::CompletionQueue* completion_queue() { return &completion_queue_; }
-
-   private:
-    ::grpc::CompletionQueue completion_queue_;
-    std::unique_ptr<Thread> thread_;
-  };  // GrpcEagerClientThread
-
   std::shared_ptr<tensorflow::GrpcChannelCache> cache_;
-  std::unordered_map<string, std::unique_ptr<EagerClient>> clients_;
-  std::vector<GrpcEagerClientThread> threads_;
+  std::unordered_map<string, core::RefCountPtr<EagerClient>> clients_;
+  std::vector<core::RefCountPtr<GrpcEagerClientThread>> threads_;
 };
 
 }  // namespace
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_state.h b/tensorflow/core/distributed_runtime/rpc/grpc_state.h
index 805a38769f0..0f0aa66d6b7 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_state.h
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_state.h
@@ -107,8 +107,7 @@ class RPCState : public GrpcClientCQTag {
 
     VLOG(2) << "Starting call: " << method_;
 
-    call_ = std::move(
-        stub_->PrepareUnaryCall(context_.get(), method_, request_buf_, cq_));
+    call_ = stub_->PrepareUnaryCall(context_.get(), method_, request_buf_, cq_);
     call_->StartCall();
     call_->Finish(&response_buf_, &status_, this);
   }
@@ -675,7 +674,7 @@ class StreamingRPCDispatcher {
     context_->set_wait_for_ready(true);
 
     std::unique_ptr<grpc::GenericClientAsyncReaderWriter> call =
-        std::move(stub_->PrepareCall(context_.get(), method_, cq_));
+        stub_->PrepareCall(context_.get(), method_, cq_);
 
     state_.reset(new StreamingRPCState<Response>(std::move(call), context_));
   }
diff --git a/tensorflow/core/framework/BUILD b/tensorflow/core/framework/BUILD
index fd3b3a34ad6..2f975195391 100644
--- a/tensorflow/core/framework/BUILD
+++ b/tensorflow/core/framework/BUILD
@@ -25,8 +25,6 @@ exports_files(
         "allocator_registry.h",
         "attr_value_util.cc",
         "attr_value_util.h",
-        "bfloat16.cc",
-        "bfloat16.h",
         "bounds_check.h",
         "cancellation.cc",
         "cancellation.h",
@@ -73,7 +71,6 @@ exports_files(
         "node_def_util.cc",
         "node_def_util.h",
         "numeric_op.h",
-        "numeric_types.h",
         "op.cc",
         "op.h",
         "op_def_builder.cc",
@@ -267,12 +264,13 @@ filegroup(
             "**/*test*",
             "**/*main.cc",
             "allocator.cc",
-            "cpu_allocator_impl.cc",
             "allocator_registry.cc",
-            "tracking_allocator.cc",
+            "bfloat16.cc",
+            "cpu_allocator_impl.cc",
             "fake_input.*",
             "op_gen_lib.*",
             "reader_base.*",
+            "tracking_allocator.cc",
         ],
     ),
 )
@@ -317,6 +315,42 @@ filegroup(
     ],
 )
 
+# Individual targets. These should be prefered over tensorflow/core:framework
+# whenever possible.
+cc_library(
+    name = "bfloat16",
+    srcs = ["bfloat16.cc"],
+    hdrs = ["bfloat16.h"],
+    visibility = ["//tensorflow/core:__subpackages__"],
+    deps = [
+        ":numeric_types",
+        "//tensorflow/core/platform:byte_order",
+        "//tensorflow/core/platform:types",
+    ],
+)
+
+cc_library(
+    name = "numeric_types",
+    hdrs = ["numeric_types.h"],
+    visibility = ["//tensorflow/core:__subpackages__"],
+    deps = [
+        "//tensorflow/core/lib/bfloat16",
+        "//tensorflow/core/platform:types",
+        "//third_party/eigen3",
+    ],
+)
+
+# Files whose users still need to be migrated from core:framework to the
+# above targets.
+# TODO(gonnet): Remove these files once targets depending on them have
+# been cleaned up.
+exports_files(
+    srcs = [
+        "bfloat16.h",
+        "numeric_types.h",
+    ],
+)
+
 # All framewrok protos are self-contained, i.e. they only import other
 # protos from the same package, so we can build the protos here and then
 # link them from core:protos_all without circular dependencies.
diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc
index 925ea44d454..439d09923ff 100644
--- a/tensorflow/core/grappler/costs/graph_properties.cc
+++ b/tensorflow/core/grappler/costs/graph_properties.cc
@@ -1911,7 +1911,7 @@ Status GraphProperties::UpdateMerge(SymbolicShapeRefiner* shape_refiner,
     // Infer the shape of the second output once and for all since it never
     // changes.
     ShapeHandle out1 = ic->Scalar();
-    ic->set_output(1, out1);
+    if (ic->num_outputs() >= 2) ic->set_output(1, out1);
   }
 
   ShapeHandle out;
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
index 751bf952213..f7df31a07bf 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
@@ -659,8 +659,8 @@ Costs OpLevelCostEstimator::PredictOpCountBasedCost(
   return costs;
 }
 
-int64 OpLevelCostEstimator::CountConv2DOperations(
-    const OpInfo& op_info, bool* found_unknown_shapes) const {
+int64 OpLevelCostEstimator::CountConv2DOperations(const OpInfo& op_info,
+                                                  bool* found_unknown_shapes) {
   return CountConv2DOperations(op_info, nullptr, found_unknown_shapes);
 }
 
@@ -747,7 +747,7 @@ OpLevelCostEstimator::ConvolutionDimensionsFromInputs(
 
 int64 OpLevelCostEstimator::CountConv2DOperations(
     const OpInfo& op_info, ConvolutionDimensions* conv_info,
-    bool* found_unknown_shapes) const {
+    bool* found_unknown_shapes) {
   DCHECK(op_info.op() == kConv2d || op_info.op() == kDepthwiseConv2dNative)
       << "Invalid Operation: not Conv2D nor DepthwiseConv2dNative";
 
@@ -779,15 +779,15 @@ int64 OpLevelCostEstimator::CountConv2DOperations(
   return ops;
 }
 
-int64 OpLevelCostEstimator::CountMatMulOperations(
-    const OpInfo& op_info, bool* found_unknown_shapes) const {
+int64 OpLevelCostEstimator::CountMatMulOperations(const OpInfo& op_info,
+                                                  bool* found_unknown_shapes) {
   return CountMatMulOperations(op_info, nullptr, found_unknown_shapes);
 }
 
 // TODO(nishantpatil): Create separate estimator for Sparse Matmul
-int64 OpLevelCostEstimator::CountMatMulOperations(
-    const OpInfo& op_info, MatMulDimensions* mat_mul,
-    bool* found_unknown_shapes) const {
+int64 OpLevelCostEstimator::CountMatMulOperations(const OpInfo& op_info,
+                                                  MatMulDimensions* mat_mul,
+                                                  bool* found_unknown_shapes) {
   double ops = 0;
 
   if (op_info.inputs_size() < 2) {
@@ -857,13 +857,13 @@ int64 OpLevelCostEstimator::CountMatMulOperations(
 }
 
 int64 OpLevelCostEstimator::CountBatchMatMulOperations(
-    const OpInfo& op_info, bool* found_unknown_shapes) const {
+    const OpInfo& op_info, bool* found_unknown_shapes) {
   return CountBatchMatMulOperations(op_info, nullptr, found_unknown_shapes);
 }
 
 int64 OpLevelCostEstimator::CountBatchMatMulOperations(
     const OpInfo& op_info, BatchMatMulDimensions* batch_mat_mul,
-    bool* found_unknown_shapes) const {
+    bool* found_unknown_shapes) {
   if (op_info.op() != kBatchMatMul) {
     LOG(ERROR) << "Invalid Operation: " << op_info.op();
     // TODO(pcma): Try to separate invalid inputs from unknown shapes
@@ -1037,7 +1037,7 @@ bool GetTensorShapeProtoFromTensorProto(const TensorProto& tensor_proto,
 // TODO(cliffy): Dedup this method and CountConv2DBackpropFilterOperations.
 int64 OpLevelCostEstimator::CountConv2DBackpropInputOperations(
     const OpInfo& op_info, ConvolutionDimensions* returned_conv_dims,
-    bool* found_unknown_shapes) const {
+    bool* found_unknown_shapes) {
   int64 ops = 0;
 
   DCHECK(op_info.op() == kConv2dBackpropInput ||
@@ -1095,7 +1095,7 @@ int64 OpLevelCostEstimator::CountConv2DBackpropInputOperations(
 
 int64 OpLevelCostEstimator::CountConv2DBackpropFilterOperations(
     const OpInfo& op_info, ConvolutionDimensions* returned_conv_dims,
-    bool* found_unknown_shapes) const {
+    bool* found_unknown_shapes) {
   int64 ops = 0;
 
   DCHECK(op_info.op() == kConv2dBackpropFilter ||
@@ -1150,7 +1150,7 @@ int64 OpLevelCostEstimator::CountConv2DBackpropFilterOperations(
 }
 
 int64 OpLevelCostEstimator::CalculateTensorElementCount(
-    const OpInfo::TensorProperties& tensor, bool* found_unknown_shapes) const {
+    const OpInfo::TensorProperties& tensor, bool* found_unknown_shapes) {
   VLOG(2) << "   with " << DataTypeString(tensor.dtype()) << " tensor of shape "
           << tensor.shape().DebugString();
   int64 tensor_size = 1;
@@ -1164,15 +1164,15 @@ int64 OpLevelCostEstimator::CalculateTensorElementCount(
 }
 
 int64 OpLevelCostEstimator::CalculateTensorSize(
-    const OpInfo::TensorProperties& tensor, bool* found_unknown_shapes) const {
+    const OpInfo::TensorProperties& tensor, bool* found_unknown_shapes) {
   int64 count = CalculateTensorElementCount(tensor, found_unknown_shapes);
   int size = DataTypeSize(BaseType(tensor.dtype()));
   VLOG(2) << "Count: " << count << " DataTypeSize: " << size;
   return count * size;
 }
 
-int64 OpLevelCostEstimator::CalculateInputSize(
-    const OpInfo& op_info, bool* found_unknown_shapes) const {
+int64 OpLevelCostEstimator::CalculateInputSize(const OpInfo& op_info,
+                                               bool* found_unknown_shapes) {
   int64 total_input_size = 0;
   for (auto& input : op_info.inputs()) {
     int64 input_size = CalculateTensorSize(input, found_unknown_shapes);
@@ -1184,7 +1184,7 @@ int64 OpLevelCostEstimator::CalculateInputSize(
 }
 
 int64 OpLevelCostEstimator::CalculateLargestInputCount(
-    const OpInfo& op_info, bool* found_unknown_shapes) const {
+    const OpInfo& op_info, bool* found_unknown_shapes) {
   int64 largest_input_count = 0;
   for (auto& input : op_info.inputs()) {
     int64 input_count =
@@ -1198,8 +1198,8 @@ int64 OpLevelCostEstimator::CalculateLargestInputCount(
   return largest_input_count;
 }
 
-int64 OpLevelCostEstimator::CalculateOutputSize(
-    const OpInfo& op_info, bool* found_unknown_shapes) const {
+int64 OpLevelCostEstimator::CalculateOutputSize(const OpInfo& op_info,
+                                                bool* found_unknown_shapes) {
   int64 total_output_size = 0;
   // use float as default for calculations
   for (const auto& output : op_info.outputs()) {
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.h b/tensorflow/core/grappler/costs/op_level_cost_estimator.h
index 3956770ac84..9183c543f11 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.h
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.h
@@ -54,77 +54,6 @@ class OpLevelCostEstimator {
                                 double output_io_bytes,
                                 const OpInfo& op_info) const;
 
-  // This family of routines counts the number of operations to perform the
-  // specified TensorFlow Op.
-  struct MatMulDimensions {
-    int m;
-    int n;
-    int k;
-  };
-  struct BatchMatMulDimensions {
-    std::vector<int> batch_dims;
-    MatMulDimensions matmul_dims;
-  };
-  struct ConvolutionDimensions {
-    int64 batch;      // Batch size.
-    int64 ix;         // Input size x.
-    int64 iy;         // Input size y.
-    int64 iz;         // Input depth.
-    int64 kx;         // Kernel x.
-    int64 ky;         // Kernel y.
-    int64 kz;  // Kernel depth (in case of group convolution, this will be
-               // smaller than input depth).
-    int64 oz;         // Output depth.
-    int64 ox;         // Output size x.
-    int64 oy;         // Output size y.
-    int64 sx;         // Stride x.
-    int64 sy;         // Stride y.
-    Padding padding;  // SAME or VALID.
-  };
-  int64 CountConv2DOperations(const OpInfo& op_info,
-                              bool* found_unknown_shapes) const;
-  int64 CountConv2DOperations(const OpInfo& op_info,
-                              ConvolutionDimensions* conv_info,
-                              bool* found_unknown_shapes) const;
-  int64 CountMatMulOperations(const OpInfo& op_info,
-                              bool* found_unknown_shapes) const;
-  int64 CountMatMulOperations(const OpInfo& op_info, MatMulDimensions* mat_mul,
-                              bool* found_unknown_shapes) const;
-  int64 CountBatchMatMulOperations(const OpInfo& op_info,
-                                   bool* found_unknown_shapes) const;
-  int64 CountBatchMatMulOperations(const OpInfo& op_info,
-                                   BatchMatMulDimensions* batch_mat_mul,
-                                   bool* found_unknown_shapes) const;
-  int64 CountConv2DBackpropInputOperations(
-      const OpInfo& op_info, ConvolutionDimensions* returned_conv_dims,
-      bool* found_unknown_shapes) const;
-  int64 CountConv2DBackpropFilterOperations(
-      const OpInfo& op_info, ConvolutionDimensions* returned_conv_dims,
-      bool* found_unknown_shapes) const;
-
-  // Calculate the element count of an input/output tensor.
-  int64 CalculateTensorElementCount(const OpInfo::TensorProperties& tensor,
-                                    bool* found_unknown_shapes) const;
-
-  // Calculate the total size in bytes of an input/output tensor.
-  int64 CalculateTensorSize(const OpInfo::TensorProperties& tensor,
-                            bool* found_unknown_shapes) const;
-
-  // Calculate the element count of the largest
-  // input of specified TensorFlow op.
-  int64 CalculateLargestInputCount(const OpInfo& op_info,
-                                   bool* found_unknown_shapes) const;
-
-  // Calculate the total size in bytes of the all
-  // the inputs of specified TensorFlow op.
-  int64 CalculateInputSize(const OpInfo& op_info,
-                           bool* found_unknown_shapes) const;
-
-  // Calculate the total size in bytes of the all
-  // the outputs of specified TensorFlow op.
-  int64 CalculateOutputSize(const OpInfo& op_info,
-                            bool* found_unknown_shapes) const;
-
   // This family of routines predicts the costs to
   // perform the specified TensorFlow Op on the
   // device represented by a subclass. The default
@@ -171,6 +100,78 @@ class OpLevelCostEstimator {
     }
   }
 
+  // This family of routines counts the number of operations to perform the
+  // specified TensorFlow Op.
+  struct MatMulDimensions {
+    int m;
+    int n;
+    int k;
+  };
+  struct BatchMatMulDimensions {
+    std::vector<int> batch_dims;
+    MatMulDimensions matmul_dims;
+  };
+  struct ConvolutionDimensions {
+    int64 batch;  // Batch size.
+    int64 ix;     // Input size x.
+    int64 iy;     // Input size y.
+    int64 iz;     // Input depth.
+    int64 kx;     // Kernel x.
+    int64 ky;     // Kernel y.
+    int64 kz;     // Kernel depth (in case of group convolution, this will be
+                  // smaller than input depth).
+    int64 oz;     // Output depth.
+    int64 ox;     // Output size x.
+    int64 oy;     // Output size y.
+    int64 sx;     // Stride x.
+    int64 sy;     // Stride y.
+    Padding padding;  // SAME or VALID.
+  };
+  static int64 CountConv2DOperations(const OpInfo& op_info,
+                                     bool* found_unknown_shapes);
+  static int64 CountConv2DOperations(const OpInfo& op_info,
+                                     ConvolutionDimensions* conv_info,
+                                     bool* found_unknown_shapes);
+  static int64 CountMatMulOperations(const OpInfo& op_info,
+                                     bool* found_unknown_shapes);
+  static int64 CountMatMulOperations(const OpInfo& op_info,
+                                     MatMulDimensions* mat_mul,
+                                     bool* found_unknown_shapes);
+  static int64 CountBatchMatMulOperations(const OpInfo& op_info,
+                                          bool* found_unknown_shapes);
+  static int64 CountBatchMatMulOperations(const OpInfo& op_info,
+                                          BatchMatMulDimensions* batch_mat_mul,
+                                          bool* found_unknown_shapes);
+  static int64 CountConv2DBackpropInputOperations(
+      const OpInfo& op_info, ConvolutionDimensions* returned_conv_dims,
+      bool* found_unknown_shapes);
+  static int64 CountConv2DBackpropFilterOperations(
+      const OpInfo& op_info, ConvolutionDimensions* returned_conv_dims,
+      bool* found_unknown_shapes);
+
+  // Calculate the element count of an input/output tensor.
+  static int64 CalculateTensorElementCount(
+      const OpInfo::TensorProperties& tensor, bool* found_unknown_shapes);
+
+  // Calculate the total size in bytes of an input/output tensor.
+  static int64 CalculateTensorSize(const OpInfo::TensorProperties& tensor,
+                                   bool* found_unknown_shapes);
+
+  // Calculate the element count of the largest
+  // input of specified TensorFlow op.
+  static int64 CalculateLargestInputCount(const OpInfo& op_info,
+                                          bool* found_unknown_shapes);
+
+  // Calculate the total size in bytes of the all
+  // the inputs of specified TensorFlow op.
+  static int64 CalculateInputSize(const OpInfo& op_info,
+                                  bool* found_unknown_shapes);
+
+  // Calculate the total size in bytes of the all
+  // the outputs of specified TensorFlow op.
+  static int64 CalculateOutputSize(const OpInfo& op_info,
+                                   bool* found_unknown_shapes);
+
   // For convolution and its grad ops.
   static ConvolutionDimensions ConvolutionDimensionsFromInputs(
       const TensorShapeProto& original_image_shape,
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc
index c3100b4c3a4..559101c22f0 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc
@@ -1163,9 +1163,7 @@ void VirtualScheduler::GenerateRunMetadata(RunMetadata* metadata) {
         node_stats->set_timeline_label(timeline_label);
       }
       node_stats->set_node_name(node_def->name());
-      // Timestamps in microseconds.
-      // TODO(b/138165866): Remove once TimelineServer support is no longer
-      // needed.
+      // Timestamps in microseconds (can be used by timeline_server).
       node_stats->set_op_start_rel_micros(0);
       node_stats->set_all_start_micros(
           nodestate.time_scheduled.asMicroSeconds().count());
@@ -1175,7 +1173,7 @@ void VirtualScheduler::GenerateRunMetadata(RunMetadata* metadata) {
       node_stats->set_all_end_rel_micros(
           nodestate.time_finished.asMicroSeconds().count() -
           nodestate.time_scheduled.asMicroSeconds().count());
-      // Timestamps in nanoseconds.
+      // Timestamps in nanoseconds (can be used by xprof trace).
       node_stats->set_op_start_rel_nanos(0);
       node_stats->set_all_start_nanos(nodestate.time_scheduled.count());
       node_stats->set_op_end_rel_nanos(nodestate.time_finished.count() -
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 0872c0b0611..50283e79e69 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -1536,7 +1536,6 @@ tf_kernel_library(
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:stream_executor",
         "//third_party/eigen3",
-        "@farmhash_archive//:farmhash",
     ],
 )
 
@@ -5341,7 +5340,6 @@ tf_kernel_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//third_party/eigen3",
-        "@farmhash_archive//:farmhash",
     ],
     alwayslink = 1,
 )
diff --git a/tensorflow/core/kernels/data/experimental/set_stats_aggregator_dataset_op.cc b/tensorflow/core/kernels/data/experimental/set_stats_aggregator_dataset_op.cc
index e7d64e10c50..de124e49fe9 100644
--- a/tensorflow/core/kernels/data/experimental/set_stats_aggregator_dataset_op.cc
+++ b/tensorflow/core/kernels/data/experimental/set_stats_aggregator_dataset_op.cc
@@ -200,14 +200,14 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel {
       }
 
       Status SaveInternal(IteratorStateWriter* writer) override {
-        return errors::Unimplemented(dataset()->DebugString(),
-                                     " does not support checkpointing");
+        mutex_lock l(mu_);
+        return SaveInput(writer, input_impl_);
       }
 
       Status RestoreInternal(IteratorContext* ctx,
                              IteratorStateReader* reader) override {
-        return errors::Unimplemented(dataset()->DebugString(),
-                                     " does not support checkpointing");
+        mutex_lock l(mu_);
+        return RestoreInput(ctx, reader, input_impl_);
       }
 
      private:
diff --git a/tensorflow/core/kernels/debug_ops.cc b/tensorflow/core/kernels/debug_ops.cc
index 79eab006307..03c7cfdac38 100644
--- a/tensorflow/core/kernels/debug_ops.cc
+++ b/tensorflow/core/kernels/debug_ops.cc
@@ -150,28 +150,60 @@ REGISTER_KERNEL_BUILDER(Name("DebugIdentityV2")
                         DebugIdentityV2Op);
 #endif
 
-#define REGISTER_DEBUG_NUMERIC_SUMMARY_V2(type)           \
-  REGISTER_KERNEL_BUILDER(Name("DebugNumericSummaryV2")   \
-                              .Device(DEVICE_CPU)         \
-                              .TypeConstraint<type>("T"), \
-                          DebugNumericSummaryV2Op<CPUDevice, type>);
-TF_CALL_half(REGISTER_DEBUG_NUMERIC_SUMMARY_V2);
-TF_CALL_bfloat16(REGISTER_DEBUG_NUMERIC_SUMMARY_V2);
-TF_CALL_float(REGISTER_DEBUG_NUMERIC_SUMMARY_V2);
-TF_CALL_double(REGISTER_DEBUG_NUMERIC_SUMMARY_V2);
+#define REGISTER_DEBUG_NUMERIC_SUMMARY_V2_FLOAT(type)                 \
+  REGISTER_KERNEL_BUILDER(Name("DebugNumericSummaryV2")               \
+                              .Device(DEVICE_CPU)                     \
+                              .TypeConstraint<type>("T")              \
+                              .TypeConstraint<float>("output_dtype"), \
+                          DebugNumericSummaryV2Op<CPUDevice, type, float>);
+TF_CALL_half(REGISTER_DEBUG_NUMERIC_SUMMARY_V2_FLOAT);
+TF_CALL_bfloat16(REGISTER_DEBUG_NUMERIC_SUMMARY_V2_FLOAT);
+TF_CALL_float(REGISTER_DEBUG_NUMERIC_SUMMARY_V2_FLOAT);
+TF_CALL_double(REGISTER_DEBUG_NUMERIC_SUMMARY_V2_FLOAT);
+
+#define REGISTER_DEBUG_NUMERIC_SUMMARY_V2_DOUBLE(type)                 \
+  REGISTER_KERNEL_BUILDER(Name("DebugNumericSummaryV2")                \
+                              .Device(DEVICE_CPU)                      \
+                              .TypeConstraint<type>("T")               \
+                              .TypeConstraint<double>("output_dtype"), \
+                          DebugNumericSummaryV2Op<CPUDevice, type, double>);
+TF_CALL_half(REGISTER_DEBUG_NUMERIC_SUMMARY_V2_DOUBLE);
+TF_CALL_bfloat16(REGISTER_DEBUG_NUMERIC_SUMMARY_V2_DOUBLE);
+TF_CALL_float(REGISTER_DEBUG_NUMERIC_SUMMARY_V2_DOUBLE);
+TF_CALL_double(REGISTER_DEBUG_NUMERIC_SUMMARY_V2_DOUBLE);
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 REGISTER_KERNEL_BUILDER(Name("DebugNumericSummaryV2")
                             .Device(DEVICE_GPU)
-                            .TypeConstraint<Eigen::half>("T"),
-                        DebugNumericSummaryV2Op<GPUDevice, Eigen::half>);
-REGISTER_KERNEL_BUILDER(
-    Name("DebugNumericSummaryV2").Device(DEVICE_GPU).TypeConstraint<float>("T"),
-    DebugNumericSummaryV2Op<GPUDevice, float>);
+                            .TypeConstraint<Eigen::half>("T")
+                            .TypeConstraint<float>("output_dtype"),
+                        DebugNumericSummaryV2Op<GPUDevice, Eigen::half, float>);
 REGISTER_KERNEL_BUILDER(Name("DebugNumericSummaryV2")
                             .Device(DEVICE_GPU)
-                            .TypeConstraint<double>("T"),
-                        DebugNumericSummaryV2Op<GPUDevice, double>);
+                            .TypeConstraint<float>("T")
+                            .TypeConstraint<float>("output_dtype"),
+                        DebugNumericSummaryV2Op<GPUDevice, float, float>);
+REGISTER_KERNEL_BUILDER(Name("DebugNumericSummaryV2")
+                            .Device(DEVICE_GPU)
+                            .TypeConstraint<double>("T")
+                            .TypeConstraint<float>("output_dtype"),
+                        DebugNumericSummaryV2Op<GPUDevice, double, float>);
+REGISTER_KERNEL_BUILDER(
+    Name("DebugNumericSummaryV2")
+        .Device(DEVICE_GPU)
+        .TypeConstraint<Eigen::half>("T")
+        .TypeConstraint<double>("output_dtype"),
+    DebugNumericSummaryV2Op<GPUDevice, Eigen::half, double>);
+REGISTER_KERNEL_BUILDER(Name("DebugNumericSummaryV2")
+                            .Device(DEVICE_GPU)
+                            .TypeConstraint<float>("T")
+                            .TypeConstraint<double>("output_dtype"),
+                        DebugNumericSummaryV2Op<GPUDevice, float, double>);
+REGISTER_KERNEL_BUILDER(Name("DebugNumericSummaryV2")
+                            .Device(DEVICE_GPU)
+                            .TypeConstraint<double>("T")
+                            .TypeConstraint<double>("output_dtype"),
+                        DebugNumericSummaryV2Op<GPUDevice, double, double>);
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 }  // namespace tensorflow
diff --git a/tensorflow/core/kernels/debug_ops.h b/tensorflow/core/kernels/debug_ops.h
index 72a333dd1f0..5d1c78e9d15 100644
--- a/tensorflow/core/kernels/debug_ops.h
+++ b/tensorflow/core/kernels/debug_ops.h
@@ -455,22 +455,62 @@ typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
-template <typename T>
-struct ReduceInfNanTwoSlotsLaunch {
-  void Run(const GPUDevice& d, const T* data, int size, float output[2]);
+template <typename Tin, typename Tout>
+struct CurtHealthLaunch {
+  void Run(const GPUDevice& d, const Tin* data, int size, Tout output[1]);
 };
 
-extern template struct ReduceInfNanTwoSlotsLaunch<Eigen::half>;
-extern template struct ReduceInfNanTwoSlotsLaunch<float>;
-extern template struct ReduceInfNanTwoSlotsLaunch<double>;
+extern template struct CurtHealthLaunch<Eigen::half, float>;
+extern template struct CurtHealthLaunch<float, float>;
+extern template struct CurtHealthLaunch<double, float>;
+extern template struct CurtHealthLaunch<Eigen::half, double>;
+extern template struct CurtHealthLaunch<float, double>;
+extern template struct CurtHealthLaunch<double, double>;
+
+template <typename Tin, typename Tout>
+struct ConciseHealthLaunch {
+  void Run(const GPUDevice& d, const Tin* data, int size, Tout output[3]);
+};
+
+extern template struct ConciseHealthLaunch<Eigen::half, float>;
+extern template struct ConciseHealthLaunch<float, float>;
+extern template struct ConciseHealthLaunch<double, float>;
+extern template struct ConciseHealthLaunch<Eigen::half, double>;
+extern template struct ConciseHealthLaunch<float, double>;
+extern template struct ConciseHealthLaunch<double, double>;
+
+template <typename Tin, typename Tout>
+struct FullHealthLaunch {
+  void Run(const GPUDevice& d, const Tin* data, int size, Tout output[6]);
+};
+
+extern template struct FullHealthLaunch<Eigen::half, float>;
+extern template struct FullHealthLaunch<float, float>;
+extern template struct FullHealthLaunch<double, float>;
+extern template struct FullHealthLaunch<Eigen::half, double>;
+extern template struct FullHealthLaunch<float, double>;
+extern template struct FullHealthLaunch<double, double>;
+
+template <typename Tin, typename Tout>
+struct ReduceInfNanThreeSlotsLaunch {
+  void Run(const GPUDevice& d, const Tin* data, int size, Tout output[3]);
+};
+
+extern template struct ReduceInfNanThreeSlotsLaunch<Eigen::half, float>;
+extern template struct ReduceInfNanThreeSlotsLaunch<float, float>;
+extern template struct ReduceInfNanThreeSlotsLaunch<double, float>;
+extern template struct ReduceInfNanThreeSlotsLaunch<Eigen::half, double>;
+extern template struct ReduceInfNanThreeSlotsLaunch<float, double>;
+extern template struct ReduceInfNanThreeSlotsLaunch<double, double>;
+
 #endif
 
-template <typename Device, typename T>
+template <typename Device, typename Tin, typename Tout>
 class DebugNumericSummaryV2Op;
 
 // Numeric summary op for tfdbg v2: CPU Kernel.
-template <typename T>
-class DebugNumericSummaryV2Op<CPUDevice, T> : public OpKernel {
+template <typename Tin, typename Tout>
+class DebugNumericSummaryV2Op<CPUDevice, Tin, Tout> : public OpKernel {
  public:
   explicit DebugNumericSummaryV2Op(OpKernelConstruction* context)
       : OpKernel(context) {
@@ -481,27 +521,142 @@ class DebugNumericSummaryV2Op<CPUDevice, T> : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& tensor = context->input(0);
+    auto in = tensor.flat<Tin>();
+    const Tin* data = in.data();
+    const int64 size = in.size();
+    Tensor* output_tensor;
+    Tout tensor_id = static_cast<Tout>(tensor_id_);
+    const Tout num_elem = static_cast<Tout>(context->input(0).NumElements());
+    // Disregard lossy cast if mode is REDUCE_INF_NAN_THREE_SLOTS because
+    // that mode does not make use of tensor_id.
+    if (tensor_debug_mode_ != 8) {
+      OP_REQUIRES(
+          context, tensor_id_ <= kMaxTensorId,
+          errors::InvalidArgument("DebugNumericSummaryV2Op requires "
+                                  "tensor_id to be less than or equal to "
+                                  "(2^",
+                                  std::numeric_limits<Tout>::digits,
+                                  "). Given tensor_id:", tensor_id_));
+    }
 
-    if (tensor_debug_mode_ == 8) {  // REDUCE_INF_NAN_THREE_SLOTS.
-      auto in = tensor.flat<T>();
-      const T* data = in.data();
-      const int64 size = in.size();
+    if (tensor_debug_mode_ == 2) {  // CURT_HEALTH
+      TensorShape shape({2});
+      OP_REQUIRES_OK(context,
+                     context->allocate_output(0, shape, &output_tensor));
+      output_tensor->flat<Tout>()(0) = tensor_id;  // Slot tensor id
+      output_tensor->flat<Tout>()(1) = 0.0;        // Has inf or nan
+      int fp_props =
+          std::accumulate(data, data + size, 0, [](const int x, const Tin& y) {
+            return Eigen::numext::isfinite(y) ? x : 1;
+          });
+      if (fp_props) {
+        output_tensor->flat<Tout>()(1) = 1.0;
+      }
+    } else if (tensor_debug_mode_ == 3) {  // CONCISE_HEALTH
+      TensorShape shape({5});
+      OP_REQUIRES_OK(context,
+                     context->allocate_output(0, shape, &output_tensor));
+      output_tensor->flat<Tout>()(0) = tensor_id;
+      output_tensor->flat<Tout>()(1) = num_elem;
 
-      Tensor* output_tensor;
+      // Accumlator value [neg_inf_count, pos_inf_count, nan_count]
+      Tout fp_props[3] = {0.0, 0.0, 0.0};
+      std::for_each(data, data + size, [&fp_props](const Tin& y) {
+        if (TF_PREDICT_TRUE(Eigen::numext::isfinite(y))) {
+          // Do nothing: common case.
+        } else if (Eigen::numext::isinf(y)) {
+          if (y < static_cast<Tin>(0.f)) {
+            ++fp_props[0];
+          } else {
+            ++fp_props[1];
+          }
+        } else if (Eigen::numext::isnan(y)) {
+          ++fp_props[2];
+        }
+      });
+      output_tensor->flat<Tout>()(2) = fp_props[0];  // Slot for -inf count
+      output_tensor->flat<Tout>()(3) = fp_props[1];  // Slot for inf count
+      output_tensor->flat<Tout>()(4) = fp_props[2];  // Slot for nan count
+    } else if (tensor_debug_mode_ == 4) {            // FULL HEALTH
+      TensorShape shape({11});
+      OP_REQUIRES_OK(context,
+                     context->allocate_output(0, shape, &output_tensor));
+      int num_dims = tensor.dims();
+      output_tensor->flat<Tout>()(0) = tensor_id;
+      output_tensor->flat<Tout>()(1) = -1.0;  // TODO(144919262): Device ID
+      output_tensor->flat<Tout>()(2) = static_cast<Tout>(tensor.dtype());
+      output_tensor->flat<Tout>()(3) = static_cast<Tout>(num_dims);
+      output_tensor->flat<Tout>()(4) = num_elem;
+
+      // Accumlator value [neg_inf_count, pos_inf_count, nan_count, neg_count,
+      //                   zero_count, pos_count]
+      Tout fp_props[6] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+      std::for_each(data, data + size, [&fp_props](const Tin& y) {
+        if (TF_PREDICT_TRUE(Eigen::numext::isfinite(y))) {
+          if (y < static_cast<Tin>(0.f)) {
+            ++fp_props[3];
+          } else if (y == static_cast<Tin>(0.f)) {
+            ++fp_props[4];
+          } else {
+            ++fp_props[5];
+          }
+        } else if (Eigen::numext::isinf(y)) {
+          if (y < static_cast<Tin>(0.f)) {
+            ++fp_props[0];
+          } else {
+            ++fp_props[1];
+          }
+        } else if (Eigen::numext::isnan(y)) {
+          ++fp_props[2];
+        }
+      });
+      output_tensor->flat<Tout>()(5) = fp_props[0];   // Slot for -inf count
+      output_tensor->flat<Tout>()(6) = fp_props[1];   // Slot for inf count
+      output_tensor->flat<Tout>()(7) = fp_props[2];   // Slot for nan count.
+      output_tensor->flat<Tout>()(8) = fp_props[3];   // Slot for neg count.
+      output_tensor->flat<Tout>()(9) = fp_props[4];   // Slot for zero count.
+      output_tensor->flat<Tout>()(10) = fp_props[5];  // Slot for pos count.
+    } else if (tensor_debug_mode_ == 5) {             // SHAPE
+      TensorShape shape({10});
+      OP_REQUIRES_OK(context,
+                     context->allocate_output(0, shape, &output_tensor));
+
+      int num_dims = tensor.dims();
+      output_tensor->flat<Tout>()(0) = tensor_id;
+      output_tensor->flat<Tout>()(1) = static_cast<Tout>(tensor.dtype());
+      output_tensor->flat<Tout>()(2) = static_cast<Tout>(num_dims);
+      output_tensor->flat<Tout>()(3) = num_elem;
+
+      // Tensor shape - stored as (6 columns)
+      // if num_dim is less than 6, we right pad the shape with zeros
+      // if num_dim is greater than 6, we truncate the head (left most) of the
+      // dimensions as they are more predictable than the last few (e.g. batch
+      // size as first dimension)
+      int dim_idx = 4;
+      for (int i = std::max(0, num_dims - kShapeDims);
+           i < std::max(6, num_dims); ++i) {
+        if (i < num_dims) {
+          output_tensor->flat<Tout>()(dim_idx++) =
+              static_cast<Tout>(tensor.dim_size(i));
+        } else {
+          output_tensor->flat<Tout>()(dim_idx++) = 0.0;
+        }
+      }
+    } else if (tensor_debug_mode_ == 8) {  // REDUCE_INF_NAN_THREE_SLOTS.
       TensorShape shape({3});
       OP_REQUIRES_OK(context,
                      context->allocate_output(0, shape, &output_tensor));
-      output_tensor->flat<float>()(0) = 0.0;  // Slot for -inf.
-      output_tensor->flat<float>()(1) = 0.0;  // Slot for inf.
-      output_tensor->flat<float>()(2) = 0.0;  // Slot for nan.
+      output_tensor->flat<Tout>()(0) = 0.0;  // Slot for -inf.
+      output_tensor->flat<Tout>()(1) = 0.0;  // Slot for inf.
+      output_tensor->flat<Tout>()(2) = 0.0;  // Slot for nan.
 
       int fp_props =
-          std::accumulate(data, data + size, 0, [](const int x, const T& y) {
+          std::accumulate(data, data + size, 0, [](const int x, const Tin& y) {
             int result = x;
             if (TF_PREDICT_TRUE(Eigen::numext::isfinite(y))) {
               // Do nothing: common case.
             } else if (Eigen::numext::isinf(y)) {
-              result |= y < static_cast<T>(0.f) ? kNegInfBit : kPosInfBit;
+              result |= y < static_cast<Tin>(0.f) ? kNegInfBit : kPosInfBit;
             } else if (Eigen::numext::isnan(y)) {
               result |= kNaNBit;
             }
@@ -509,18 +664,14 @@ class DebugNumericSummaryV2Op<CPUDevice, T> : public OpKernel {
           });
 
       if (fp_props & kNegInfBit) {
-        output_tensor->flat<float>()(0) =
-            -std::numeric_limits<float>::infinity();
+        output_tensor->flat<Tout>()(0) = -std::numeric_limits<Tout>::infinity();
       }
       if (fp_props & kPosInfBit) {
-        output_tensor->flat<float>()(1) =
-            std::numeric_limits<float>::infinity();
+        output_tensor->flat<Tout>()(1) = std::numeric_limits<Tout>::infinity();
       }
       if (fp_props & kNaNBit) {
-        output_tensor->flat<float>()(2) =
-            std::numeric_limits<float>::quiet_NaN();
+        output_tensor->flat<Tout>()(2) = std::numeric_limits<Tout>::quiet_NaN();
       }
-
     } else {
       // TODO(cais): Implement other tensor debug modes in debug_event.proto.
       context->SetStatus(errors::Unimplemented(
@@ -530,16 +681,18 @@ class DebugNumericSummaryV2Op<CPUDevice, T> : public OpKernel {
 
  private:
   int tensor_debug_mode_;
-  int tensor_id_;
+  int64 tensor_id_;
+  static constexpr int kShapeDims = 6;
   static constexpr int kNegInfBit = 0x01;
   static constexpr int kPosInfBit = 0x02;
   static constexpr int kNaNBit = 0x04;
+  static constexpr int64 kMaxTensorId = 1L << std::numeric_limits<Tout>::digits;
 };
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
-template <typename T>
-class DebugNumericSummaryV2Op<GPUDevice, T> : public AsyncOpKernel {
+template <typename Tin, typename Tout>
+class DebugNumericSummaryV2Op<GPUDevice, Tin, Tout> : public AsyncOpKernel {
  public:
   typedef GPUDevice Device;
 
@@ -551,8 +704,146 @@ class DebugNumericSummaryV2Op<GPUDevice, T> : public AsyncOpKernel {
   }
 
   void ComputeAsync(OpKernelContext* context, DoneCallback done) override {
-    if (tensor_debug_mode_ == 8) {  // REDUCE_INF_NAN_THREE_SLOTS.
-      Tensor* output_tensor;
+    Tensor* output_tensor;
+    Tout tensor_id = static_cast<Tout>(tensor_id_);
+    const Tensor& tensor = context->input(0);
+    const Tout num_elem = static_cast<Tout>(tensor.NumElements());
+    const Device& d = context->eigen_device<Device>();
+    auto input = tensor.flat<Tin>();
+    auto check_cb = [this, done]() { done(); };
+    // Disregard lossy cast if mode is REDUCE_INF_NAN_THREE_SLOTS because
+    // that mode does not make use of tensor_id.
+    if (tensor_debug_mode_ != 8) {
+      OP_REQUIRES_ASYNC(
+          context, tensor_id_ <= kMaxTensorId,
+          errors::InvalidArgument("DebugNumericSummaryV2Op requires "
+                                  "tensor_id to be less than or equal to "
+                                  "(2^",
+                                  std::numeric_limits<Tout>::digits,
+                                  "). Given tensor_id:", tensor_id_),
+          done);
+    }
+
+    if (tensor_debug_mode_ == 2) {  // CURT_HEALTH.
+      TensorShape shape({2});
+      OP_REQUIRES_OK(context,
+                     context->allocate_output(0, shape, &output_tensor));
+
+      auto* stream = context->op_device_context()->stream();
+      OP_REQUIRES_ASYNC(context, stream != nullptr,
+                        errors::Internal("No GPU stream available."), done);
+
+      se::DeviceMemoryBase output_tensor_ptr(
+          output_tensor->flat<Tout>().data(),
+          output_tensor->flat<Tout>().size());
+      stream->ThenMemZero(&output_tensor_ptr, 2 * sizeof(Tout));
+      // Copy tensor_id to slot zero
+      stream->ThenMemcpy(&output_tensor_ptr, &tensor_id, sizeof(Tout));
+      if (num_elem == 0) {
+        done();
+        return;
+      }
+
+      // Call the GPU kernels for the numerical (inf/nan) checks.
+      auto input = context->input(0).flat<Tin>();
+      CurtHealthLaunch<Tin, Tout>().Run(d, input.data(), input.size(),
+                                        output_tensor->flat<Tout>().data() + 1);
+
+      context->device()->tensorflow_gpu_device_info()->event_mgr->ThenExecute(
+          stream, std::move(check_cb));
+    } else if (tensor_debug_mode_ == 3) {  // CONCISE_HEALTH.
+      TensorShape shape({5});
+      OP_REQUIRES_OK(context,
+                     context->allocate_output(0, shape, &output_tensor));
+
+      auto* stream = context->op_device_context()->stream();
+      OP_REQUIRES_ASYNC(context, stream != nullptr,
+                        errors::Internal("No GPU stream available."), done);
+
+      se::DeviceMemoryBase output_tensor_ptr(
+          output_tensor->flat<Tout>().data(),
+          output_tensor->flat<Tout>().size());
+      stream->ThenMemset32(&output_tensor_ptr, 0, 5 * sizeof(Tout));
+      const Tout static_output[] = {tensor_id, num_elem};
+      stream->ThenMemcpy(&output_tensor_ptr, &static_output, 2 * sizeof(Tout));
+      if (num_elem == 0) {
+        done();
+        return;
+      }
+
+      // Call the GPU kernels for the numerical (inf/nan) checks.
+      ConciseHealthLaunch<Tin, Tout>().Run(
+          d, input.data(), input.size(),
+          output_tensor->flat<Tout>().data() + 2);
+
+      context->device()->tensorflow_gpu_device_info()->event_mgr->ThenExecute(
+          stream, std::move(check_cb));
+    } else if (tensor_debug_mode_ == 4) {  // FULL HEALTH
+      TensorShape shape({11});
+      OP_REQUIRES_OK(context,
+                     context->allocate_output(0, shape, &output_tensor));
+
+      auto* stream = context->op_device_context()->stream();
+      OP_REQUIRES_ASYNC(context, stream != nullptr,
+                        errors::Internal("No GPU stream available."), done);
+
+      se::DeviceMemoryBase output_tensor_ptr(
+          output_tensor->flat<Tout>().data(),
+          output_tensor->flat<Tout>().size());
+      stream->ThenMemset32(&output_tensor_ptr, 0, 11 * sizeof(Tout));
+
+      int num_dims = tensor.dims();
+      const Tout static_output[] = {tensor_id,
+                                    -1.0,  // TODO(144919262): Device ID
+                                    static_cast<Tout>(tensor.dtype()),
+                                    static_cast<Tout>(num_dims), num_elem};
+      stream->ThenMemcpy(&output_tensor_ptr, &static_output, 5 * sizeof(Tout));
+      if (num_elem == 0) {
+        done();
+        return;
+      }
+
+      // Call the GPU kernels for the numerical (inf/nan) checks and
+      // pos/neg/zero counts.
+      FullHealthLaunch<Tin, Tout>().Run(d, input.data(), input.size(),
+                                        output_tensor->flat<Tout>().data() + 5);
+
+      context->device()->tensorflow_gpu_device_info()->event_mgr->ThenExecute(
+          stream, std::move(check_cb));
+    } else if (tensor_debug_mode_ == 5) {  // SHAPE
+      TensorShape shape({10});
+      OP_REQUIRES_OK(context,
+                     context->allocate_output(0, shape, &output_tensor));
+
+      auto* stream = context->op_device_context()->stream();
+      OP_REQUIRES_ASYNC(context, stream != nullptr,
+                        errors::Internal("No GPU stream available."), done);
+
+      se::DeviceMemoryBase output_tensor_ptr(
+          output_tensor->flat<Tout>().data(),
+          output_tensor->flat<Tout>().size());
+
+      int num_dims = tensor.dims();
+      Tout static_output[10] = {tensor_id,
+                                static_cast<Tout>(tensor.dtype()),
+                                static_cast<Tout>(num_dims),
+                                num_elem,
+                                0.0,
+                                0.0,
+                                0.0,
+                                0.0,
+                                0.0,
+                                0.0};
+      // Tensor shape: right pad zeros, truncate head
+      int dim_idx = 4;
+      for (int i = std::max(0, num_dims - 6); i < num_dims; ++i) {
+        static_output[dim_idx++] = static_cast<Tout>(tensor.dim_size(i));
+      }
+      // Write to device stream
+      stream->ThenMemcpy(&output_tensor_ptr, &static_output, sizeof(Tout) * 10);
+      context->device()->tensorflow_gpu_device_info()->event_mgr->ThenExecute(
+          stream, std::move(check_cb));
+    } else if (tensor_debug_mode_ == 8) {  // REDUCE_INF_NAN_THREE_SLOTS.
       TensorShape shape({3});
       OP_REQUIRES_OK(context,
                      context->allocate_output(0, shape, &output_tensor));
@@ -562,22 +853,19 @@ class DebugNumericSummaryV2Op<GPUDevice, T> : public AsyncOpKernel {
                         errors::Internal("No GPU stream available."), done);
 
       se::DeviceMemoryBase output_tensor_ptr(
-          output_tensor->flat<float>().data(),
-          output_tensor->flat<float>().size());
+          output_tensor->flat<Tout>().data(),
+          output_tensor->flat<Tout>().size());
       stream->ThenMemset32(&output_tensor_ptr, 0,
-                           output_tensor->flat<float>().size() * sizeof(float));
-      if (context->input(0).NumElements() == 0) {
+                           output_tensor->flat<Tout>().size() * sizeof(Tout));
+      if (num_elem == 0) {
         done();
         return;
       }
 
       // Call the GPU kernels for the numerical (inf/nan) checks.
-      const Device& d = context->eigen_device<Device>();
-      auto input = context->input(0).flat<T>();
-      ReduceInfNanTwoSlotsLaunch<T>().Run(d, input.data(), input.size(),
-                                          output_tensor->flat<float>().data());
-
-      auto check_cb = [this, done]() { done(); };
+      auto input = context->input(0).flat<Tin>();
+      ReduceInfNanThreeSlotsLaunch<Tin, Tout>().Run(
+          d, input.data(), input.size(), output_tensor->flat<Tout>().data());
 
       context->device()->tensorflow_gpu_device_info()->event_mgr->ThenExecute(
           stream, std::move(check_cb));
@@ -591,7 +879,8 @@ class DebugNumericSummaryV2Op<GPUDevice, T> : public AsyncOpKernel {
 
  private:
   int tensor_debug_mode_;
-  int tensor_id_;
+  int64 tensor_id_;
+  static constexpr int64 kMaxTensorId = 1L << std::numeric_limits<Tout>::digits;
 };
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/kernels/debug_ops_gpu.cu.cc b/tensorflow/core/kernels/debug_ops_gpu.cu.cc
index 42bca1ab59a..2e93c3ca24d 100644
--- a/tensorflow/core/kernels/debug_ops_gpu.cu.cc
+++ b/tensorflow/core/kernels/debug_ops_gpu.cu.cc
@@ -33,13 +33,103 @@ namespace {
 
 typedef Eigen::GpuDevice GPUDevice;
 
-// A CUDA kernel that fills a length-2 vector according to whether any of the
-// input data contains infinity or NaN. The first element is filled with
-// infinity of any of the elements is +/- infinity. The second element is
-// filled with NaN if any of the elements is NaN.
-template <typename T>
-__global__ void ReduceInfNanTwoSlotsKernel(const T* __restrict__ data, int size,
-                                           float output[2]) {
+// A CUDA kernel that fills the second element of a vector according
+// to whether any of the input data contains infinity or NaN.
+template <typename Tin, typename Tout>
+__global__ void CurtHealthKernel(const Tin* __restrict__ data, int size,
+                                 Tout output[1]) {
+  const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x;
+  const int32 total_thread_count = gridDim.x * blockDim.x;
+
+  int32 offset = thread_id;
+
+  while (offset < size) {
+    if (isinf(data[offset]) || isnan(data[offset])) {
+      output[0] = 1.0;
+    }
+    offset += total_thread_count;
+  }
+}
+
+// A CUDA kernel that fills the three elements of an output
+// vector with the number of NaNs, -infs, and infs in the input respectively.
+template <typename Tin, typename Tout>
+__global__ void ConciseHealthKernel(const Tin* __restrict__ data, int size,
+                                    Tout output[3]) {
+  const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x;
+  const int32 total_thread_count = gridDim.x * blockDim.x;
+
+  int32 offset = thread_id;
+  Tout accum[3] = {0.0, 0.0, 0.0};
+
+  while (offset < size) {
+    if (isinf(data[offset])) {
+      if (data[offset] < static_cast<Tin>(0.f)) {
+        ++accum[0];
+      } else {
+        ++accum[1];
+      }
+    }
+    if (isnan(data[offset])) {
+      ++accum[2];
+    }
+    offset += total_thread_count;
+  }
+
+  GpuAtomicAdd(output, accum[0]);
+  GpuAtomicAdd(output + 1, accum[1]);
+  GpuAtomicAdd(output + 2, accum[2]);
+}
+
+// A CUDA kernel that fills the six elements of an output vector with the
+// number of -infs, infs, nans, negatives, zeros, and positives in the input
+// respectively.
+template <typename Tin, typename Tout>
+__global__ void FullHealthKernel(const Tin* __restrict__ data, int size,
+                                 Tout output[6]) {
+  const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x;
+  const int32 total_thread_count = gridDim.x * blockDim.x;
+
+  int32 offset = thread_id;
+  Tout accum[6] = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
+
+  while (offset < size) {
+    if (isinf(data[offset])) {
+      if (data[offset] < static_cast<Tin>(0.f)) {
+        ++accum[0];
+      } else {
+        ++accum[1];
+      }
+    } else if (isnan(data[offset])) {
+      ++accum[2];
+    } else {
+      if (data[offset] < static_cast<Tin>(0.f)) {
+        ++accum[3];
+      } else if (data[offset] == static_cast<Tin>(0.f)) {
+        ++accum[4];
+      } else {
+        ++accum[5];
+      }
+    }
+    offset += total_thread_count;
+  }
+
+  GpuAtomicAdd(output, accum[0]);
+  GpuAtomicAdd(output + 1, accum[1]);
+  GpuAtomicAdd(output + 2, accum[2]);
+  GpuAtomicAdd(output + 3, accum[3]);
+  GpuAtomicAdd(output + 4, accum[4]);
+  GpuAtomicAdd(output + 5, accum[5]);
+}
+
+// A CUDA kernel that fills a length-3 vector according to whether any of the
+// input data contains negative infinity, positive infinity, or NaN. The first
+// element is filled with -infinity if any of the elements is -infinity.
+// The second element is filled with +infinity if any of the elements is
+// +infinity. The last is filled with NaN if any of the elements is NaN.
+template <typename Tin, typename Tout>
+__global__ void ReduceInfNanThreeSlotsKernel(const Tin* __restrict__ data,
+                                             int size, Tout output[3]) {
   const int32 thread_id = blockIdx.x * blockDim.x + threadIdx.x;
   const int32 total_thread_count = gridDim.x * blockDim.x;
 
@@ -47,14 +137,14 @@ __global__ void ReduceInfNanTwoSlotsKernel(const T* __restrict__ data, int size,
 
   while (offset < size) {
     if (isinf(data[offset])) {
-      if (data[offset] < static_cast<T>(0.f)) {
-        output[0] = -std::numeric_limits<double>::infinity();
+      if (data[offset] < static_cast<Tin>(0.f)) {
+        output[0] = -std::numeric_limits<Tout>::infinity();
       } else {
-        output[1] = std::numeric_limits<double>::infinity();
+        output[1] = std::numeric_limits<Tout>::infinity();
       }
     }
     if (isnan(data[offset])) {
-      output[2] = std::numeric_limits<double>::quiet_NaN();
+      output[2] = std::numeric_limits<Tout>::quiet_NaN();
     }
     offset += total_thread_count;
   }
@@ -62,22 +152,86 @@ __global__ void ReduceInfNanTwoSlotsKernel(const T* __restrict__ data, int size,
 
 }  // namespace
 
-template <typename T>
-struct ReduceInfNanTwoSlotsLaunch {
-  void Run(const GPUDevice& d, const T* data, int size, float output[2]) {
+template <typename Tin, typename Tout>
+struct CurtHealthLaunch {
+  void Run(const GPUDevice& d, const Tin* data, int size, Tout output[1]) {
     const int32 block_size = d.maxGpuThreadsPerBlock();
     const int32 num_blocks =
         (d.getNumGpuMultiProcessors() * d.maxGpuThreadsPerMultiProcessor()) /
         block_size;
 
-    TF_CHECK_OK(GpuLaunchKernel(ReduceInfNanTwoSlotsKernel<T>, num_blocks,
+    TF_CHECK_OK(GpuLaunchKernel(CurtHealthKernel<Tin, Tout>, num_blocks,
                                 block_size, 0, d.stream(), data, size, output));
   }
 };
 
-template struct ReduceInfNanTwoSlotsLaunch<Eigen::half>;
-template struct ReduceInfNanTwoSlotsLaunch<float>;
-template struct ReduceInfNanTwoSlotsLaunch<double>;
+template struct CurtHealthLaunch<Eigen::half, float>;
+template struct CurtHealthLaunch<float, float>;
+template struct CurtHealthLaunch<double, float>;
+template struct CurtHealthLaunch<Eigen::half, double>;
+template struct CurtHealthLaunch<float, double>;
+template struct CurtHealthLaunch<double, double>;
+
+template <typename Tin, typename Tout>
+struct ConciseHealthLaunch {
+  void Run(const GPUDevice& d, const Tin* data, int size, Tout output[3]) {
+    const int32 block_size = d.maxGpuThreadsPerBlock();
+    const int32 num_blocks =
+        (d.getNumGpuMultiProcessors() * d.maxGpuThreadsPerMultiProcessor()) /
+        block_size;
+
+    TF_CHECK_OK(GpuLaunchKernel(ConciseHealthKernel<Tin, Tout>, num_blocks,
+                                block_size, 0, d.stream(), data, size, output));
+  }
+};
+
+template struct ConciseHealthLaunch<Eigen::half, float>;
+template struct ConciseHealthLaunch<float, float>;
+template struct ConciseHealthLaunch<double, float>;
+template struct ConciseHealthLaunch<Eigen::half, double>;
+template struct ConciseHealthLaunch<float, double>;
+template struct ConciseHealthLaunch<double, double>;
+
+template <typename Tin, typename Tout>
+struct FullHealthLaunch {
+  void Run(const GPUDevice& d, const Tin* data, int size, Tout output[6]) {
+    const int32 block_size = d.maxGpuThreadsPerBlock();
+    const int32 num_blocks =
+        (d.getNumGpuMultiProcessors() * d.maxGpuThreadsPerMultiProcessor()) /
+        block_size;
+
+    TF_CHECK_OK(GpuLaunchKernel(FullHealthKernel<Tin, Tout>, num_blocks,
+                                block_size, 0, d.stream(), data, size, output));
+  }
+};
+
+template struct FullHealthLaunch<Eigen::half, float>;
+template struct FullHealthLaunch<float, float>;
+template struct FullHealthLaunch<double, float>;
+template struct FullHealthLaunch<Eigen::half, double>;
+template struct FullHealthLaunch<float, double>;
+template struct FullHealthLaunch<double, double>;
+
+template <typename Tin, typename Tout>
+struct ReduceInfNanThreeSlotsLaunch {
+  void Run(const GPUDevice& d, const Tin* data, int size, Tout output[3]) {
+    const int32 block_size = d.maxGpuThreadsPerBlock();
+    const int32 num_blocks =
+        (d.getNumGpuMultiProcessors() * d.maxGpuThreadsPerMultiProcessor()) /
+        block_size;
+
+    TF_CHECK_OK(GpuLaunchKernel(ReduceInfNanThreeSlotsKernel<Tin, Tout>,
+                                num_blocks, block_size, 0, d.stream(), data,
+                                size, output));
+  }
+};
+
+template struct ReduceInfNanThreeSlotsLaunch<Eigen::half, float>;
+template struct ReduceInfNanThreeSlotsLaunch<float, float>;
+template struct ReduceInfNanThreeSlotsLaunch<double, float>;
+template struct ReduceInfNanThreeSlotsLaunch<Eigen::half, double>;
+template struct ReduceInfNanThreeSlotsLaunch<float, double>;
+template struct ReduceInfNanThreeSlotsLaunch<double, double>;
 
 }  // namespace tensorflow
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
diff --git a/tensorflow/core/ops/compat/ops_history_v1/CheckNumericsV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v1/CheckNumericsV2.pbtxt
new file mode 100644
index 00000000000..ba06f6a3524
--- /dev/null
+++ b/tensorflow/core/ops/compat/ops_history_v1/CheckNumericsV2.pbtxt
@@ -0,0 +1,28 @@
+op {
+  name: "CheckNumericsV2"
+  input_arg {
+    name: "tensor"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "message"
+    type: "string"
+  }
+  is_stateful: true
+}
diff --git a/tensorflow/core/ops/compat/ops_history_v1/DebugNumericSummaryV2.pbtxt b/tensorflow/core/ops/compat/ops_history_v1/DebugNumericSummaryV2.pbtxt
index d55c9678e7e..31f7efec2ab 100644
--- a/tensorflow/core/ops/compat/ops_history_v1/DebugNumericSummaryV2.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history_v1/DebugNumericSummaryV2.pbtxt
@@ -27,3 +27,45 @@ op {
     }
   }
 }
+op {
+  name: "DebugNumericSummaryV2"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "output_dtype"
+  }
+  attr {
+    name: "output_dtype"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "tensor_debug_mode"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+  attr {
+    name: "tensor_id"
+    type: "int"
+    default_value {
+      i: -1
+    }
+  }
+}
diff --git a/tensorflow/core/ops/compat/ops_history_v1/MatrixDiagPartV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v1/MatrixDiagPartV3.pbtxt
new file mode 100644
index 00000000000..75a1307f279
--- /dev/null
+++ b/tensorflow/core/ops/compat/ops_history_v1/MatrixDiagPartV3.pbtxt
@@ -0,0 +1,38 @@
+op {
+  name: "MatrixDiagPartV3"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "k"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "padding_value"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "diagonal"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "align"
+    type: "string"
+    default_value {
+      s: "RIGHT_LEFT"
+    }
+    allowed_values {
+      list {
+        s: "LEFT_RIGHT"
+        s: "RIGHT_LEFT"
+        s: "LEFT_LEFT"
+        s: "RIGHT_RIGHT"
+      }
+    }
+  }
+}
diff --git a/tensorflow/core/ops/compat/ops_history_v1/MatrixDiagV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v1/MatrixDiagV3.pbtxt
new file mode 100644
index 00000000000..793efcec3b6
--- /dev/null
+++ b/tensorflow/core/ops/compat/ops_history_v1/MatrixDiagV3.pbtxt
@@ -0,0 +1,46 @@
+op {
+  name: "MatrixDiagV3"
+  input_arg {
+    name: "diagonal"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "k"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "num_rows"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "num_cols"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "padding_value"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "align"
+    type: "string"
+    default_value {
+      s: "RIGHT_LEFT"
+    }
+    allowed_values {
+      list {
+        s: "LEFT_RIGHT"
+        s: "RIGHT_LEFT"
+        s: "LEFT_LEFT"
+        s: "RIGHT_RIGHT"
+      }
+    }
+  }
+}
diff --git a/tensorflow/core/ops/compat/ops_history_v1/MatrixSetDiagV3.pbtxt b/tensorflow/core/ops/compat/ops_history_v1/MatrixSetDiagV3.pbtxt
new file mode 100644
index 00000000000..d5a6af8b119
--- /dev/null
+++ b/tensorflow/core/ops/compat/ops_history_v1/MatrixSetDiagV3.pbtxt
@@ -0,0 +1,38 @@
+op {
+  name: "MatrixSetDiagV3"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "diagonal"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "k"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "align"
+    type: "string"
+    default_value {
+      s: "RIGHT_LEFT"
+    }
+    allowed_values {
+      list {
+        s: "LEFT_RIGHT"
+        s: "RIGHT_LEFT"
+        s: "LEFT_LEFT"
+        s: "RIGHT_RIGHT"
+      }
+    }
+  }
+}
diff --git a/tensorflow/core/ops/compat/ops_history_v1/QuantizedDepthwiseConv2DWithBiasAndRelu.pbtxt b/tensorflow/core/ops/compat/ops_history_v1/QuantizedDepthwiseConv2DWithBiasAndRelu.pbtxt
index cc6d92389f7..8265ccb381c 100644
--- a/tensorflow/core/ops/compat/ops_history_v1/QuantizedDepthwiseConv2DWithBiasAndRelu.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history_v1/QuantizedDepthwiseConv2DWithBiasAndRelu.pbtxt
@@ -109,3 +109,122 @@ op {
     }
   }
 }
+op {
+  name: "QuantizedDepthwiseConv2DWithBiasAndRelu"
+  input_arg {
+    name: "input"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "filter"
+    type_attr: "Tfilter"
+  }
+  input_arg {
+    name: "bias"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_filter"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_filter"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "min_output"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_output"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "Tfilter"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_QINT32
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    name: "padding_list"
+    type: "list(int)"
+    default_value {
+      list {
+      }
+    }
+  }
+}
diff --git a/tensorflow/core/ops/compat/ops_history_v1/QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize.pbtxt b/tensorflow/core/ops/compat/ops_history_v1/QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize.pbtxt
index 5413d151a6c..43d2e5d9036 100644
--- a/tensorflow/core/ops/compat/ops_history_v1/QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize.pbtxt
+++ b/tensorflow/core/ops/compat/ops_history_v1/QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize.pbtxt
@@ -127,3 +127,140 @@ op {
     }
   }
 }
+op {
+  name: "QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize"
+  input_arg {
+    name: "input"
+    type_attr: "Tinput"
+  }
+  input_arg {
+    name: "filter"
+    type_attr: "Tfilter"
+  }
+  input_arg {
+    name: "bias"
+    type_attr: "Tbias"
+  }
+  input_arg {
+    name: "min_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_input"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_filter"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_filter"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_freezed_output"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_freezed_output"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "output"
+    type_attr: "out_type"
+  }
+  output_arg {
+    name: "min_output"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_output"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "Tinput"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "Tfilter"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "Tbias"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "out_type"
+    type: "type"
+    default_value {
+      type: DT_QUINT8
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "strides"
+    type: "list(int)"
+  }
+  attr {
+    name: "padding"
+    type: "string"
+    allowed_values {
+      list {
+        s: "SAME"
+        s: "VALID"
+      }
+    }
+  }
+  attr {
+    name: "dilations"
+    type: "list(int)"
+    default_value {
+      list {
+        i: 1
+        i: 1
+        i: 1
+        i: 1
+      }
+    }
+  }
+  attr {
+    name: "padding_list"
+    type: "list(int)"
+    default_value {
+      list {
+      }
+    }
+  }
+}
diff --git a/tensorflow/core/ops/compat/ops_history_v1/QuantizedMatMulWithBiasAndRequantize.pbtxt b/tensorflow/core/ops/compat/ops_history_v1/QuantizedMatMulWithBiasAndRequantize.pbtxt
new file mode 100644
index 00000000000..1aab9762eea
--- /dev/null
+++ b/tensorflow/core/ops/compat/ops_history_v1/QuantizedMatMulWithBiasAndRequantize.pbtxt
@@ -0,0 +1,130 @@
+op {
+  name: "QuantizedMatMulWithBiasAndRequantize"
+  input_arg {
+    name: "a"
+    type_attr: "T1"
+  }
+  input_arg {
+    name: "b"
+    type_attr: "T2"
+  }
+  input_arg {
+    name: "bias"
+    type_attr: "Tbias"
+  }
+  input_arg {
+    name: "min_a"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_a"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_b"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_b"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_freezed_output"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_freezed_output"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "out"
+    type_attr: "Toutput"
+  }
+  output_arg {
+    name: "min_out"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_out"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T1"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "T2"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "Tbias"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "Toutput"
+    type: "type"
+    default_value {
+      type: DT_QUINT8
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "transpose_a"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "transpose_b"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "input_quant_mode"
+    type: "string"
+    default_value {
+      s: "MIN_FIRST"
+    }
+    allowed_values {
+      list {
+        s: "MIN_FIRST"
+        s: "SCALED"
+      }
+    }
+  }
+}
diff --git a/tensorflow/core/ops/debug_ops.cc b/tensorflow/core/ops/debug_ops.cc
index 3d22cbbd4d3..7977974fefc 100644
--- a/tensorflow/core/ops/debug_ops.cc
+++ b/tensorflow/core/ops/debug_ops.cc
@@ -95,15 +95,10 @@ REGISTER_OP("DebugIdentityV2")
 
 REGISTER_OP("DebugNumericSummaryV2")
     .Input("input: T")
-    .Output("output: float32")
+    .Output("output: output_dtype")
+    .Attr("output_dtype: {float32, float64} = DT_FLOAT")
     .Attr("T: type")
     .Attr("tensor_debug_mode: int = -1")
     .Attr("tensor_id: int = -1")
-    .SetShapeFn([](shape_inference::InferenceContext *c) {
-      // The following is for REDUCE_INF_NAN_THREE_SLOTS.
-      // TODO(cais): Support other tensor_debug_mode values.
-      shape_inference::ShapeHandle output_shape = c->MakeShape({3});
-      c->set_output(0, output_shape);
-      return Status::OK();
-    });
+    .SetShapeFn(shape_inference::UnknownShape);
 }  // namespace tensorflow
diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt
index a9f479d5b65..6f07a4b91c1 100644
--- a/tensorflow/core/ops/ops.pbtxt
+++ b/tensorflow/core/ops/ops.pbtxt
@@ -6598,6 +6598,34 @@ op {
   }
   is_stateful: true
 }
+op {
+  name: "CheckNumericsV2"
+  input_arg {
+    name: "tensor"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_BFLOAT16
+        type: DT_HALF
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
+  }
+  attr {
+    name: "message"
+    type: "string"
+  }
+  is_stateful: true
+}
 op {
   name: "Cholesky"
   input_arg {
@@ -10611,7 +10639,20 @@ op {
   }
   output_arg {
     name: "output"
-    type: DT_FLOAT
+    type_attr: "output_dtype"
+  }
+  attr {
+    name: "output_dtype"
+    type: "type"
+    default_value {
+      type: DT_FLOAT
+    }
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_DOUBLE
+      }
+    }
   }
   attr {
     name: "T"
@@ -21523,6 +21564,44 @@ op {
     type: "type"
   }
 }
+op {
+  name: "MatrixDiagPartV3"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "k"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "padding_value"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "diagonal"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "align"
+    type: "string"
+    default_value {
+      s: "RIGHT_LEFT"
+    }
+    allowed_values {
+      list {
+        s: "LEFT_RIGHT"
+        s: "RIGHT_LEFT"
+        s: "LEFT_LEFT"
+        s: "RIGHT_RIGHT"
+      }
+    }
+  }
+}
 op {
   name: "MatrixDiagV2"
   input_arg {
@@ -21554,6 +21633,52 @@ op {
     type: "type"
   }
 }
+op {
+  name: "MatrixDiagV3"
+  input_arg {
+    name: "diagonal"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "k"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "num_rows"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "num_cols"
+    type: DT_INT32
+  }
+  input_arg {
+    name: "padding_value"
+    type_attr: "T"
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "align"
+    type: "string"
+    default_value {
+      s: "RIGHT_LEFT"
+    }
+    allowed_values {
+      list {
+        s: "LEFT_RIGHT"
+        s: "RIGHT_LEFT"
+        s: "LEFT_LEFT"
+        s: "RIGHT_RIGHT"
+      }
+    }
+  }
+}
 op {
   name: "MatrixExponential"
   input_arg {
@@ -21676,6 +21801,44 @@ op {
     type: "type"
   }
 }
+op {
+  name: "MatrixSetDiagV3"
+  input_arg {
+    name: "input"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "diagonal"
+    type_attr: "T"
+  }
+  input_arg {
+    name: "k"
+    type: DT_INT32
+  }
+  output_arg {
+    name: "output"
+    type_attr: "T"
+  }
+  attr {
+    name: "T"
+    type: "type"
+  }
+  attr {
+    name: "align"
+    type: "string"
+    default_value {
+      s: "RIGHT_LEFT"
+    }
+    allowed_values {
+      list {
+        s: "LEFT_RIGHT"
+        s: "RIGHT_LEFT"
+        s: "LEFT_LEFT"
+        s: "RIGHT_RIGHT"
+      }
+    }
+  }
+}
 op {
   name: "MatrixSolve"
   input_arg {
@@ -29971,6 +30134,14 @@ op {
       }
     }
   }
+  attr {
+    name: "padding_list"
+    type: "list(int)"
+    default_value {
+      list {
+      }
+    }
+  }
 }
 op {
   name: "QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize"
@@ -30100,6 +30271,14 @@ op {
       }
     }
   }
+  attr {
+    name: "padding_list"
+    type: "list(int)"
+    default_value {
+      list {
+      }
+    }
+  }
 }
 op {
   name: "QuantizedInstanceNorm"
@@ -30651,6 +30830,136 @@ op {
     }
   }
 }
+op {
+  name: "QuantizedMatMulWithBiasAndRequantize"
+  input_arg {
+    name: "a"
+    type_attr: "T1"
+  }
+  input_arg {
+    name: "b"
+    type_attr: "T2"
+  }
+  input_arg {
+    name: "bias"
+    type_attr: "Tbias"
+  }
+  input_arg {
+    name: "min_a"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_a"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_b"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_b"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "min_freezed_output"
+    type: DT_FLOAT
+  }
+  input_arg {
+    name: "max_freezed_output"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "out"
+    type_attr: "Toutput"
+  }
+  output_arg {
+    name: "min_out"
+    type: DT_FLOAT
+  }
+  output_arg {
+    name: "max_out"
+    type: DT_FLOAT
+  }
+  attr {
+    name: "T1"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "T2"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "Tbias"
+    type: "type"
+    allowed_values {
+      list {
+        type: DT_FLOAT
+        type: DT_QINT32
+      }
+    }
+  }
+  attr {
+    name: "Toutput"
+    type: "type"
+    default_value {
+      type: DT_QUINT8
+    }
+    allowed_values {
+      list {
+        type: DT_QINT8
+        type: DT_QUINT8
+        type: DT_QINT32
+        type: DT_QINT16
+        type: DT_QUINT16
+      }
+    }
+  }
+  attr {
+    name: "transpose_a"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "transpose_b"
+    type: "bool"
+    default_value {
+      b: false
+    }
+  }
+  attr {
+    name: "input_quant_mode"
+    type: "string"
+    default_value {
+      s: "MIN_FIRST"
+    }
+    allowed_values {
+      list {
+        s: "MIN_FIRST"
+        s: "SCALED"
+      }
+    }
+  }
+}
 op {
   name: "QuantizedMaxPool"
   input_arg {
diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD
index 8e6fd49d1ab..94657280223 100644
--- a/tensorflow/core/platform/BUILD
+++ b/tensorflow/core/platform/BUILD
@@ -11,9 +11,6 @@ load("//tensorflow/core/platform:build_config_root.bzl", "if_static")
 load(
     "//tensorflow/core/platform:build_config.bzl",
     "tf_additional_lib_hdrs",
-    "tf_additional_lib_srcs",
-    "tf_additional_monitoring_srcs",
-    "tf_additional_proto_hdrs",
     "tf_additional_tensor_coding_deps",
     "tf_additional_test_srcs",
     "tf_fingerprint_deps",
@@ -23,8 +20,10 @@ load(
 load(
     "//tensorflow/core/platform:default/build_refactor.bzl",
     "tf_instantiate_platform_libraries",
+    "tf_legacy_srcs_no_runtime_google",
     "tf_logging_deps",
     "tf_mobile_aware_deps",
+    "tf_monitoring_deps",
     "tf_platform_helper_deps",
 )
 load(
@@ -60,6 +59,7 @@ tf_instantiate_platform_libraries(names = [
     "human_readable_json",
     "logging",
     "load_library",
+    "monitoring",
     "mutex",
     "net",
     "notification",
@@ -299,6 +299,12 @@ cc_library(
     deps = tf_logging_deps(),
 )
 
+cc_library(
+    name = "monitoring",
+    textual_hdrs = ["monitoring.h"],
+    deps = tf_monitoring_deps(),
+)
+
 cc_library(
     name = "macros",
     hdrs = ["macros.h"],
@@ -542,6 +548,7 @@ cc_library(
         ":logging",
         ":macros",
         ":mutex",
+        ":stacktrace",
         ":str_util",
         ":strcat",
         ":stringpiece",
@@ -761,91 +768,160 @@ filegroup(
     visibility = ["//tensorflow/core:__pkg__"],
 )
 
+# These are the files in common between :legacy_srcs_no_runtime
+# and :legacy_srcs_no_runtime_google
+# These files as basically all the headers + cc files under tensorflow/core/platform,
+# excluding any test sources, testing utilities, cuda, rocm, stream_executor,
+# image headers (gif.h, jpeg.h, png.h), and certain translation units (
+# env_time.cc, logging.cc, logger.cc, mutex.cc) that would cause collisions
+# with :platform_base, a common dependency for downstream targets.
 filegroup(
-    name = "legacy_platform_lib_srcs",
-    srcs = tf_additional_lib_srcs(
-        exclude = [
-            "*test*",
-            "**/*test*",
-            "**/cuda.h",
-            "**/cuda_libdevice_path.cc",
-            "**/rocm.h",
-            "**/monitoring.cc",
-            "**/stream_executor.h",
-            "**/env.cc",
-            "**/env_time.cc",
-            "**/load_library.cc",
-            "**/mutex.cc",
-            "**/net.cc",
-            "**/logging.cc",
-            "**/port.cc",
-            "**/posix_file_system.cc",
-            "**/human_readable_json.cc",
-            "**/rocm.h",
-            "**/rocm_rocdl_path.cc",
-            "**/subprocess.cc",
-            "**/tracing.cc",
-            "**/unbounded_work_queue.cc",
-            "**/windows_file_system.cc",
-            "abi.cc",
-            "coding.cc",
-            "cpu_info.cc",
-            "hash.cc",
-            "numbers.cc",
-            "path.cc",
-            "platform_strings.cc",
-            "protobuf.cc",
-            "random.cc",
-            "scanner.cc",
-            "strcat.cc",
-            "stringprintf.cc",
-        ],
-    ),
-    visibility = ["//tensorflow/core:__pkg__"],
-)
-
-filegroup(
-    name = "legacy_proto_hdrs",
-    srcs = tf_additional_proto_hdrs(),
-    visibility = ["//tensorflow/core:__pkg__"],
+    name = "legacy_srcs_common",
+    srcs = [
+        "//tensorflow/core/platform:abi.cc",
+        "//tensorflow/core/platform:abi.h",
+        "//tensorflow/core/platform:base64.cc",
+        "//tensorflow/core/platform:base64.h",
+        "//tensorflow/core/platform:blocking_counter.h",
+        "//tensorflow/core/platform:byte_order.h",
+        "//tensorflow/core/platform:coding.cc",
+        "//tensorflow/core/platform:coding.h",
+        "//tensorflow/core/platform:context.h",
+        "//tensorflow/core/platform:cord.h",
+        "//tensorflow/core/platform:cpu_feature_guard.cc",
+        "//tensorflow/core/platform:cpu_feature_guard.h",
+        "//tensorflow/core/platform:cpu_info.cc",
+        "//tensorflow/core/platform:cpu_info.h",
+        "//tensorflow/core/platform:demangle.h",
+        "//tensorflow/core/platform:denormal.cc",
+        "//tensorflow/core/platform:denormal.h",
+        "//tensorflow/core/platform:dynamic_annotations.h",
+        "//tensorflow/core/platform:env.cc",
+        "//tensorflow/core/platform:env.h",
+        "//tensorflow/core/platform:env_time.h",
+        "//tensorflow/core/platform:error.cc",
+        "//tensorflow/core/platform:error.h",
+        "//tensorflow/core/platform:errors.h",
+        "//tensorflow/core/platform:file_statistics.h",
+        "//tensorflow/core/platform:file_system.cc",
+        "//tensorflow/core/platform:file_system.h",
+        "//tensorflow/core/platform:file_system_helper.cc",
+        "//tensorflow/core/platform:file_system_helper.h",
+        "//tensorflow/core/platform:fingerprint.h",
+        "//tensorflow/core/platform:hash.cc",
+        "//tensorflow/core/platform:hash.h",
+        "//tensorflow/core/platform:host_info.h",
+        "//tensorflow/core/platform:human_readable_json.h",
+        "//tensorflow/core/platform:init_main.h",
+        "//tensorflow/core/platform:load_library.h",
+        "//tensorflow/core/platform:logger.h",
+        "//tensorflow/core/platform:logging.h",
+        "//tensorflow/core/platform:macros.h",
+        "//tensorflow/core/platform:mem.h",
+        "//tensorflow/core/platform:monitoring.h",
+        "//tensorflow/core/platform:mutex.h",
+        "//tensorflow/core/platform:net.h",
+        "//tensorflow/core/platform:notification.h",
+        "//tensorflow/core/platform:null_file_system.h",
+        "//tensorflow/core/platform:numa.h",
+        "//tensorflow/core/platform:numbers.cc",
+        "//tensorflow/core/platform:numbers.h",
+        "//tensorflow/core/platform:path.cc",
+        "//tensorflow/core/platform:path.h",
+        "//tensorflow/core/platform:platform.h",
+        "//tensorflow/core/platform:platform_strings.cc",
+        "//tensorflow/core/platform:platform_strings.h",
+        "//tensorflow/core/platform:platform_strings_computed.h",
+        "//tensorflow/core/platform:prefetch.h",
+        "//tensorflow/core/platform:profile_utils/android_armv7a_cpu_utils_helper.cc",
+        "//tensorflow/core/platform:profile_utils/android_armv7a_cpu_utils_helper.h",
+        "//tensorflow/core/platform:profile_utils/clock_cycle_profiler.cc",
+        "//tensorflow/core/platform:profile_utils/clock_cycle_profiler.h",
+        "//tensorflow/core/platform:profile_utils/cpu_utils.cc",
+        "//tensorflow/core/platform:profile_utils/cpu_utils.h",
+        "//tensorflow/core/platform:profile_utils/i_cpu_utils_helper.h",
+        "//tensorflow/core/platform:protobuf.cc",
+        "//tensorflow/core/platform:protobuf.h",
+        "//tensorflow/core/platform:protobuf_compiler.h",
+        "//tensorflow/core/platform:protobuf_internal.h",
+        "//tensorflow/core/platform:protobuf_util.cc",
+        "//tensorflow/core/platform:random.cc",
+        "//tensorflow/core/platform:random.h",
+        "//tensorflow/core/platform:raw_coding.h",
+        "//tensorflow/core/platform:refcount.h",
+        "//tensorflow/core/platform:regexp.h",
+        "//tensorflow/core/platform:scanner.cc",
+        "//tensorflow/core/platform:scanner.h",
+        "//tensorflow/core/platform:setround.cc",
+        "//tensorflow/core/platform:setround.h",
+        "//tensorflow/core/platform:snappy.h",
+        "//tensorflow/core/platform:stacktrace.h",
+        "//tensorflow/core/platform:stacktrace_handler.h",
+        "//tensorflow/core/platform:status.cc",
+        "//tensorflow/core/platform:status.h",
+        "//tensorflow/core/platform:str_util.cc",
+        "//tensorflow/core/platform:str_util.h",
+        "//tensorflow/core/platform:strcat.cc",
+        "//tensorflow/core/platform:strcat.h",
+        "//tensorflow/core/platform:stream_executor_no_cuda.h",
+        "//tensorflow/core/platform:stringpiece.h",
+        "//tensorflow/core/platform:stringprintf.cc",
+        "//tensorflow/core/platform:stringprintf.h",
+        "//tensorflow/core/platform:strong_hash.h",
+        "//tensorflow/core/platform:subprocess.h",
+        "//tensorflow/core/platform:tensor_coding.cc",
+        "//tensorflow/core/platform:tensor_coding.h",
+        "//tensorflow/core/platform:test_benchmark.h",
+        "//tensorflow/core/platform:thread_annotations.h",
+        "//tensorflow/core/platform:threadpool.cc",
+        "//tensorflow/core/platform:threadpool.h",
+        "//tensorflow/core/platform:threadpool_interface.h",
+        "//tensorflow/core/platform:threadpool_options.h",
+        "//tensorflow/core/platform:tracing.cc",
+        "//tensorflow/core/platform:tracing.h",
+        "//tensorflow/core/platform:tstring.h",
+        "//tensorflow/core/platform:types.h",
+        "//tensorflow/core/platform:unbounded_work_queue.h",
+    ],
+    visibility = ["//visibility:private"],
 )
 
 filegroup(
     name = "legacy_srcs_no_runtime",
-    srcs = glob(
-        [
-            "**/*.h",
-            "**/*.cc",
-        ],
-        exclude = [
-            "*test.*",
-            "*testutil*",
-            "*testlib*",
-            "*main.cc",
-            "**/*test.*",
-            "**/*testutil*",
-            "**/*testlib*",
-            "**/*main.cc",
-            "**/cuda_libdevice_path.*",
-            # Exclude env_time and logging to avoid collisions with
-            # :platform_base, a common dependency for downstream targets.
-            "**/env_time.cc",
-            "**/logging.cc",
-            "**/mutex.cc",
-            "**/rocm_rocdl_path.*",
-            "default/test_benchmark.*",
-            "cuda.h",
-            "rocm.h",
-            "google/**/*",
-            "hadoop/**/*",
-            "gif.h",
-            "jpeg.h",
-            "png.h",
-            "logger.cc",
-            "stream_executor.*",
-            "windows/**/*",
-        ],
-    ),
+    srcs = [
+        ":legacy_srcs_common",
+        "//tensorflow/core/platform:default/context.h",
+        "//tensorflow/core/platform:default/cord.h",
+        "//tensorflow/core/platform:default/dynamic_annotations.h",
+        "//tensorflow/core/platform:default/env.cc",
+        "//tensorflow/core/platform:default/human_readable_json.cc",
+        "//tensorflow/core/platform:default/integral_types.h",
+        "//tensorflow/core/platform:default/load_library.cc",
+        "//tensorflow/core/platform:default/logging.h",
+        "//tensorflow/core/platform:default/monitoring.cc",
+        "//tensorflow/core/platform:default/mutex.h",
+        "//tensorflow/core/platform:default/mutex_data.h",
+        "//tensorflow/core/platform:default/net.cc",
+        "//tensorflow/core/platform:default/notification.h",
+        "//tensorflow/core/platform:default/port.cc",
+        "//tensorflow/core/platform:default/posix_file_system.cc",
+        "//tensorflow/core/platform:default/posix_file_system.h",
+        "//tensorflow/core/platform:default/stacktrace.h",
+        "//tensorflow/core/platform:default/stacktrace_handler.cc",
+        "//tensorflow/core/platform:default/strong_hash.h",
+        "//tensorflow/core/platform:default/subprocess.cc",
+        "//tensorflow/core/platform:default/subprocess.h",
+        "//tensorflow/core/platform:default/tracing.cc",
+        "//tensorflow/core/platform:default/tracing_impl.h",
+        "//tensorflow/core/platform:default/unbounded_work_queue.cc",
+        "//tensorflow/core/platform:default/unbounded_work_queue.h",
+    ],
+    visibility = ["//tensorflow/core:__pkg__"],
+)
+
+filegroup(
+    name = "legacy_srcs_no_runtime_google",
+    srcs = [":legacy_srcs_common"] + tf_legacy_srcs_no_runtime_google(),
     visibility = ["//tensorflow/core:__pkg__"],
 )
 
@@ -877,51 +953,11 @@ filegroup(
 
 filegroup(
     name = "legacy_lib_internal_srcs",
-    srcs = glob(
-        [
-            "*.cc",
-            "profile_utils/**/*.cc",
-        ],
-        exclude = [
-            "*test*",
-            "**/*test*",
-            "**/env_time.cc",
-            "**/monitoring.cc",
-            "**/cuda_libdevice_path.cc",
-            "**/load_library.cc",
-            "**/mutex.cc",
-            "**/logging.cc",
-            "**/port.cc",
-            "**/human_readable_json.cc",
-            "**/rocm_rocdl_path.cc",
-            "abi.cc",
-            "coding.cc",
-            "cpu_info.cc",
-            "cpu_feature_guard.cc",
-            "denormal.cc",
-            "env.cc",
-            "error.cc",
-            "file_system.cc",
-            "file_system_helper.cc",
-            "hash.cc",
-            "logger.cc",
-            "numbers.cc",
-            "path.cc",
-            "platform_strings.cc",
-            "protobuf.cc",
-            "protobuf_util.cc",
-            "random.cc",
-            "scanner.cc",
-            "setround.cc",
-            "status.cc",
-            "strcat.cc",
-            "stringprintf.cc",
-            "str_util.cc",
-            "tensor_coding.cc",
-            "threadpool.cc",
-            "tracing.cc",
-        ],
-    ),
+    srcs = [
+        "//tensorflow/core/platform:profile_utils/android_armv7a_cpu_utils_helper.cc",
+        "//tensorflow/core/platform:profile_utils/clock_cycle_profiler.cc",
+        "//tensorflow/core/platform:profile_utils/cpu_utils.cc",
+    ],
     visibility = ["//tensorflow/core:__pkg__"],
 )
 
@@ -931,12 +967,6 @@ filegroup(
     visibility = ["//tensorflow/core:__pkg__"],
 )
 
-filegroup(
-    name = "legacy_monitoring_srcs",
-    srcs = tf_additional_monitoring_srcs(),
-    visibility = ["//tensorflow/core:__pkg__"],
-)
-
 # TODO(gunan): Remove the following once references in core/BUILD is removed.
 exports_files(
     glob(
diff --git a/tensorflow/core/platform/build_config.bzl b/tensorflow/core/platform/build_config.bzl
index 4bea0f946cc..cdd810aeacb 100644
--- a/tensorflow/core/platform/build_config.bzl
+++ b/tensorflow/core/platform/build_config.bzl
@@ -9,12 +9,10 @@ load(
     _tf_additional_cupti_test_flags = "tf_additional_cupti_test_flags",
     _tf_additional_cupti_utils_cuda_deps = "tf_additional_cupti_utils_cuda_deps",
     _tf_additional_device_tracer_srcs = "tf_additional_device_tracer_srcs",
+    _tf_additional_env_hdrs = "tf_additional_env_hdrs",
     _tf_additional_lib_deps = "tf_additional_lib_deps",
     _tf_additional_lib_hdrs = "tf_additional_lib_hdrs",
-    _tf_additional_lib_srcs = "tf_additional_lib_srcs",
     _tf_additional_monitoring_hdrs = "tf_additional_monitoring_hdrs",
-    _tf_additional_monitoring_srcs = "tf_additional_monitoring_srcs",
-    _tf_additional_proto_hdrs = "tf_additional_proto_hdrs",
     _tf_additional_rpc_deps = "tf_additional_rpc_deps",
     _tf_additional_tensor_coding_deps = "tf_additional_tensor_coding_deps",
     _tf_additional_test_deps = "tf_additional_test_deps",
@@ -44,12 +42,10 @@ tf_additional_core_deps = _tf_additional_core_deps
 tf_additional_cupti_test_flags = _tf_additional_cupti_test_flags
 tf_additional_cupti_utils_cuda_deps = _tf_additional_cupti_utils_cuda_deps
 tf_additional_device_tracer_srcs = _tf_additional_device_tracer_srcs
+tf_additional_env_hdrs = _tf_additional_env_hdrs
 tf_additional_lib_deps = _tf_additional_lib_deps
 tf_additional_lib_hdrs = _tf_additional_lib_hdrs
-tf_additional_lib_srcs = _tf_additional_lib_srcs
 tf_additional_monitoring_hdrs = _tf_additional_monitoring_hdrs
-tf_additional_monitoring_srcs = _tf_additional_monitoring_srcs
-tf_additional_proto_hdrs = _tf_additional_proto_hdrs
 tf_additional_rpc_deps = _tf_additional_rpc_deps
 tf_additional_tensor_coding_deps = _tf_additional_tensor_coding_deps
 tf_additional_test_deps = _tf_additional_test_deps
diff --git a/tensorflow/core/platform/build_config_root.bzl b/tensorflow/core/platform/build_config_root.bzl
index 7da423bfefa..6a09333e4c5 100644
--- a/tensorflow/core/platform/build_config_root.bzl
+++ b/tensorflow/core/platform/build_config_root.bzl
@@ -5,6 +5,7 @@ load(
     _if_dynamic_kernels = "if_dynamic_kernels",
     _if_static = "if_static",
     _if_static_and_not_mobile = "if_static_and_not_mobile",
+    _register_extension_info = "register_extension_info",
     _tf_additional_grpc_deps_py = "tf_additional_grpc_deps_py",
     _tf_additional_license_deps = "tf_additional_license_deps",
     _tf_additional_plugin_deps = "tf_additional_plugin_deps",
@@ -18,6 +19,7 @@ load(
 if_dynamic_kernels = _if_dynamic_kernels
 if_static = _if_static
 if_static_and_not_mobile = _if_static_and_not_mobile
+register_extension_info = _register_extension_info
 tf_additional_grpc_deps_py = _tf_additional_grpc_deps_py
 tf_additional_license_deps = _tf_additional_license_deps
 tf_additional_plugin_deps = _tf_additional_plugin_deps
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index a95de6632ce..a94114507d2 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -552,59 +552,40 @@ def tf_platform_srcs(files):
         "//conditions:default": native.glob(posix_set),
     })
 
-def tf_additional_lib_hdrs(exclude = []):
-    windows_hdrs = native.glob([
-        "default/*.h",
-        "windows/*.h",
-        "posix/error.h",
-    ], exclude = exclude + [
-        "default/subprocess.h",
-        "default/posix_file_system.h",
-    ])
-    return select({
-        clean_dep("//tensorflow:windows"): windows_hdrs,
-        "//conditions:default": native.glob([
-            "default/*.h",
-            "posix/*.h",
-        ], exclude = exclude),
-    })
-
-def tf_additional_lib_srcs(exclude = []):
-    windows_srcs = native.glob([
-        "default/*.cc",
-        "windows/*.cc",
-        "posix/error.cc",
-    ], exclude = exclude + [
-        "default/env.cc",
-        "default/env_time.cc",
-        "default/load_library.cc",
-        "default/net.cc",
-        "default/port.cc",
-        "default/posix_file_system.cc",
-        "default/subprocess.cc",
-        "default/stacktrace_handler.cc",
-    ])
-    return select({
-        clean_dep("//tensorflow:windows"): windows_srcs,
-        "//conditions:default": native.glob([
-            "default/*.cc",
-            "posix/*.cc",
-        ], exclude = exclude),
+def tf_additional_lib_hdrs():
+    return [
+        "//tensorflow/core/platform:default/context.h",
+        "//tensorflow/core/platform:default/cord.h",
+        "//tensorflow/core/platform:default/dynamic_annotations.h",
+        "//tensorflow/core/platform:default/integral_types.h",
+        "//tensorflow/core/platform:default/logging.h",
+        "//tensorflow/core/platform:default/mutex.h",
+        "//tensorflow/core/platform:default/mutex_data.h",
+        "//tensorflow/core/platform:default/notification.h",
+        "//tensorflow/core/platform:default/stacktrace.h",
+        "//tensorflow/core/platform:default/strong_hash.h",
+        "//tensorflow/core/platform:default/test_benchmark.h",
+        "//tensorflow/core/platform:default/tracing_impl.h",
+        "//tensorflow/core/platform:default/unbounded_work_queue.h",
+    ] + select({
+        "//tensorflow:windows": [
+            "//tensorflow/core/platform:windows/intrinsics_port.h",
+            "//tensorflow/core/platform:windows/stacktrace.h",
+            "//tensorflow/core/platform:windows/subprocess.h",
+            "//tensorflow/core/platform:windows/wide_char.h",
+            "//tensorflow/core/platform:windows/windows_file_system.h",
+        ],
+        "//conditions:default": [
+            "//tensorflow/core/platform:default/posix_file_system.h",
+            "//tensorflow/core/platform:default/subprocess.h",
+        ],
     })
 
 def tf_additional_monitoring_hdrs():
     return []
 
-def tf_additional_monitoring_srcs():
-    return [
-        "default/monitoring.cc",
-    ]
-
-def tf_additional_proto_hdrs():
-    return [
-        "default/integral_types.h",
-        "default/logging.h",
-    ]
+def tf_additional_env_hdrs():
+    return []
 
 def tf_additional_all_protos():
     return [clean_dep("//tensorflow/core:protos_all")]
diff --git a/tensorflow/core/platform/default/build_config_root.bzl b/tensorflow/core/platform/default/build_config_root.bzl
index 2c0f73c8e90..c74ccdc506a 100644
--- a/tensorflow/core/platform/default/build_config_root.bzl
+++ b/tensorflow/core/platform/default/build_config_root.bzl
@@ -67,3 +67,6 @@ def if_dynamic_kernels(extra_deps, otherwise = []):
         str(Label("//tensorflow:dynamic_loaded_kernels")): extra_deps,
         "//conditions:default": otherwise,
     })
+
+def register_extension_info(**kwargs):
+    pass
diff --git a/tensorflow/core/platform/default/build_refactor.bzl b/tensorflow/core/platform/default/build_refactor.bzl
index 4f11699f766..76033a880ae 100644
--- a/tensorflow/core/platform/default/build_refactor.bzl
+++ b/tensorflow/core/platform/default/build_refactor.bzl
@@ -174,6 +174,17 @@ TF_DEFAULT_PLATFORM_LIBRARIES = {
         "visibility": ["//visibility:private"],
         "tags": ["no_oss", "manual"],
     },
+    "monitoring": {
+        "name": "monitoring_impl",
+        "hdrs": [
+            "//tensorflow/core/platform:monitoring.h",
+        ],
+        "srcs": [
+            "//tensorflow/core/platform:default/monitoring.cc",
+        ],
+        "visibility": ["//visibility:private"],
+        "tags": ["no_oss", "manual"],
+    },
     "mutex": {
         "name": "mutex_impl",
         "hdrs": [
@@ -642,3 +653,9 @@ def tf_platform_helper_deps(name):
 
 def tf_logging_deps():
     return [":logging_impl"]
+
+def tf_monitoring_deps():
+    return [":monitoring_impl"]
+
+def tf_legacy_srcs_no_runtime_google():
+    return []
diff --git a/tensorflow/core/platform/default/rules_cc.bzl b/tensorflow/core/platform/default/rules_cc.bzl
new file mode 100644
index 00000000000..803da0c5612
--- /dev/null
+++ b/tensorflow/core/platform/default/rules_cc.bzl
@@ -0,0 +1,19 @@
+"""This forwards all of rules_cc's relevant rules under a common file"""
+
+load(
+    "@rules_cc//cc:defs.bzl",
+    _cc_binary = "cc_binary",
+    _cc_import = "cc_import",
+    _cc_library = "cc_library",
+    _cc_test = "cc_test",
+)
+load(
+    "@rules_cc//examples:experimental_cc_shared_library.bzl",
+    _cc_shared_library = "cc_shared_library",
+)
+
+cc_binary = _cc_binary
+cc_import = _cc_import
+cc_library = _cc_library
+cc_shared_library = _cc_shared_library
+cc_test = _cc_test
diff --git a/tensorflow/core/platform/rules_cc.bzl b/tensorflow/core/platform/rules_cc.bzl
new file mode 100644
index 00000000000..e664512aa4c
--- /dev/null
+++ b/tensorflow/core/platform/rules_cc.bzl
@@ -0,0 +1,16 @@
+"""Provides an indirection layer to bazel cc_rules"""
+
+load(
+    "//tensorflow/core/platform:default/rules_cc.bzl",
+    _cc_binary = "cc_binary",
+    _cc_import = "cc_import",
+    _cc_library = "cc_library",
+    _cc_shared_library = "cc_shared_library",
+    _cc_test = "cc_test",
+)
+
+cc_binary = _cc_binary
+cc_import = _cc_import
+cc_library = _cc_library
+cc_shared_library = _cc_shared_library
+cc_test = _cc_test
diff --git a/tensorflow/core/platform/status.cc b/tensorflow/core/platform/status.cc
index d9cd02a27fb..a7fd3e693a1 100644
--- a/tensorflow/core/platform/status.cc
+++ b/tensorflow/core/platform/status.cc
@@ -22,6 +22,7 @@ limitations under the License.
 
 #include "absl/base/call_once.h"
 #include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/stacktrace.h"
 #include "tensorflow/core/platform/str_util.h"
 #include "tensorflow/core/platform/strcat.h"
 #include "tensorflow/core/platform/stringprintf.h"
@@ -91,6 +92,8 @@ Status::Status(tensorflow::error::Code code, StringPiece msg) {
   state_ = std::unique_ptr<State>(new State);
   state_->code = code;
   state_->msg = string(msg);
+  VLOG(5) << "Generated non-OK status: \"" << *this << "\". "
+          << CurrentStackTrace();
 }
 
 void Status::Update(const Status& new_status) {
diff --git a/tensorflow/core/profiler/convert/BUILD b/tensorflow/core/profiler/convert/BUILD
index 51af083ca5b..cfb50111c1b 100644
--- a/tensorflow/core/profiler/convert/BUILD
+++ b/tensorflow/core/profiler/convert/BUILD
@@ -15,6 +15,14 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "op_stack",
+    hdrs = ["op_stack.h"],
+    deps = [
+        "//tensorflow/core:lib",
+    ],
+)
+
 cc_library(
     name = "op_stats_to_tf_stats",
     srcs = ["op_stats_to_tf_stats.cc"],
diff --git a/tensorflow/core/profiler/convert/op_stack.h b/tensorflow/core/profiler/convert/op_stack.h
new file mode 100644
index 00000000000..6bfa4d77643
--- /dev/null
+++ b/tensorflow/core/profiler/convert/op_stack.h
@@ -0,0 +1,69 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_PROFILER_CONVERT_OP_STACK_H_
+#define TENSORFLOW_CORE_PROFILER_CONVERT_OP_STACK_H_
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "tensorflow/core/platform/types.h"
+
+namespace tensorflow {
+namespace profiler {
+
+template <typename OpInfo>
+class OpStack {
+ public:
+  // Pushes an Op onto the stack.
+  void Push(uint32 op_id, std::unique_ptr<OpInfo> op_info) {
+    stack_.emplace_back(op_id, std::move(op_info));
+  }
+
+  // Pops the Op with the given op_id from the stack.
+  std::unique_ptr<OpInfo> Pop(uint32 op_id) {
+    // Pop until match or stack_ is empty.
+    std::unique_ptr<OpInfo> result;
+    while (!stack_.empty()) {
+      auto back = std::move(stack_.back());
+      stack_.pop_back();
+      if (op_id == back.first) {
+        result = std::move(back.second);
+        break;
+      }
+    }
+    return result;
+  }
+
+  // Returns the Op at the top of the stack.
+  OpInfo* Top() const {
+    return stack_.empty() ? nullptr : stack_.back().second.get();
+  }
+
+  // Returns true if the stack is empty.
+  bool Empty() const { return stack_.empty(); }
+
+  // Clears the stack.
+  void Clear() { stack_.clear(); }
+
+ private:
+  std::vector<std::pair<uint32 /*op_id*/, std::unique_ptr<OpInfo>>> stack_;
+};
+
+}  // namespace profiler
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_PROFILER_CONVERT_OP_STACK_H_
diff --git a/tensorflow/core/profiler/internal/BUILD b/tensorflow/core/profiler/internal/BUILD
index 25abf6d82cf..a69806ef639 100644
--- a/tensorflow/core/profiler/internal/BUILD
+++ b/tensorflow/core/profiler/internal/BUILD
@@ -405,7 +405,6 @@ tf_cc_test(
 
 cc_library(
     name = "profiler_interface",
-    srcs = ["profiler_interface.cc"],
     hdrs = ["profiler_interface.h"],
     deps = [
         "//tensorflow/core:lib",
@@ -414,15 +413,22 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "profiler_factory",
+    srcs = ["profiler_factory.cc"],
+    hdrs = ["profiler_factory.h"],
+    deps = [
+        ":profiler_interface",
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
+    ],
+)
+
 filegroup(
     name = "mobile_srcs",
     srcs = [
-        "annotation_stack.cc",
-        "annotation_stack.h",
-        "profiler_interface.cc",
         "profiler_interface.h",
-        "traceme_recorder.cc",
-        "traceme_recorder.h",
     ],
     visibility = ["//visibility:public"],
 )
diff --git a/tensorflow/core/profiler/internal/cpu/BUILD b/tensorflow/core/profiler/internal/cpu/BUILD
index 93fc8e05813..a3048dfb85b 100644
--- a/tensorflow/core/profiler/internal/cpu/BUILD
+++ b/tensorflow/core/profiler/internal/cpu/BUILD
@@ -29,6 +29,7 @@ cc_library(
         "//tensorflow/core:lib",
         "//tensorflow/core:lib_internal",
         "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/profiler/internal:profiler_factory",
         "//tensorflow/core/profiler/internal:profiler_interface",
         "//tensorflow/core/profiler/internal:traceme_recorder",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
diff --git a/tensorflow/core/profiler/internal/cpu/host_tracer.cc b/tensorflow/core/profiler/internal/cpu/host_tracer.cc
index 9b32ce80084..79fe7cf3729 100644
--- a/tensorflow/core/profiler/internal/cpu/host_tracer.cc
+++ b/tensorflow/core/profiler/internal/cpu/host_tracer.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/env_time.h"
 #include "tensorflow/core/profiler/internal/cpu/host_tracer_utils.h"
+#include "tensorflow/core/profiler/internal/profiler_factory.h"
 #include "tensorflow/core/profiler/internal/profiler_interface.h"
 #include "tensorflow/core/profiler/internal/traceme_recorder.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
diff --git a/tensorflow/core/profiler/internal/cpu/host_tracer_test.cc b/tensorflow/core/profiler/internal/cpu/host_tracer_test.cc
index 7623a4ec9b8..ffe702ad121 100644
--- a/tensorflow/core/profiler/internal/cpu/host_tracer_test.cc
+++ b/tensorflow/core/profiler/internal/cpu/host_tracer_test.cc
@@ -126,7 +126,6 @@ TEST(HostTracerTest, CollectsTraceMeEventsAsXSpace) {
 
   ASSERT_EQ(space.planes_size(), 1);
   const auto& plane = space.planes(0);
-  EXPECT_EQ(plane.name(), "Host Threads");
   ASSERT_EQ(plane.lines_size(), 1);
   ASSERT_EQ(plane.event_metadata_size(), 6);
   ASSERT_EQ(plane.stat_metadata_size(), 2);
diff --git a/tensorflow/core/profiler/internal/cpu/host_tracer_utils.cc b/tensorflow/core/profiler/internal/cpu/host_tracer_utils.cc
index 099cd1a45ae..8d669e431ff 100644
--- a/tensorflow/core/profiler/internal/cpu/host_tracer_utils.cc
+++ b/tensorflow/core/profiler/internal/cpu/host_tracer_utils.cc
@@ -38,9 +38,9 @@ void MakeCompleteEvents(TraceMeRecorder::Events* events) {
   std::vector<TraceMeRecorder::Event*> end_events;
   for (auto& thread : *events) {
     for (auto& event : thread.events) {
-      if (event.start_time && !event.end_time) {  // ActivityStart
+      if (IsStartEvent(event)) {
         start_events.emplace(event.activity_id, &event);
-      } else if (!event.start_time && event.end_time) {  // ActivityEnd
+      } else if (IsEndEvent(event)) {
         auto iter = start_events.find(event.activity_id);
         if (iter != start_events.end()) {  // same thread
           auto* start_event = iter->second;
@@ -68,7 +68,6 @@ void ConvertCompleteEventsToXPlane(uint64 start_timestamp_ns,
                                    const TraceMeRecorder::Events& events,
                                    XPlane* raw_plane) {
   XPlaneBuilder xplane(raw_plane);
-  xplane.SetName("Host Threads");
   absl::flat_hash_map<string, XEventMetadata*> xevent_metadata_by_name;
   absl::flat_hash_map<string, XStatMetadata*> xstat_metadata_by_name;
   for (const auto& thread : events) {
@@ -78,28 +77,27 @@ void ConvertCompleteEventsToXPlane(uint64 start_timestamp_ns,
     xline.SetTimestampNs(start_timestamp_ns);
     xline.ReserveEvents(thread.events.size());
     for (const auto& event : thread.events) {
-      if (event.start_time && event.end_time) {
-        Annotation annotation = ParseAnnotation(event.name);
-        XEventMetadata*& xevent_metadata =
-            xevent_metadata_by_name[annotation.name];
-        if (xevent_metadata == nullptr) {
-          xevent_metadata =
-              xplane.GetOrCreateEventMetadata(xevent_metadata_by_name.size());
-          xevent_metadata->set_name(string(annotation.name));
-        }
-        XEventBuilder xevent = xline.AddEvent(*xevent_metadata);
-        xevent.SetTimestampNs(event.start_time);
-        xevent.SetEndTimestampNs(event.end_time);
-        xevent.ReserveStats(annotation.metadata.size());
-        for (const auto& metadata : annotation.metadata) {
-          XStatMetadata*& xstat_metadata = xstat_metadata_by_name[metadata.key];
-          if (xstat_metadata == nullptr) {
-            xstat_metadata =
-                xplane.GetOrCreateStatMetadata(xstat_metadata_by_name.size());
-            xstat_metadata->set_name(string(metadata.key));
-          }
-          xevent.ParseAndAddStatValue(*xstat_metadata, metadata.value);
+      if (!IsCompleteEvent(event)) continue;
+      Annotation annotation = ParseAnnotation(event.name);
+      XEventMetadata*& xevent_metadata =
+          xevent_metadata_by_name[annotation.name];
+      if (xevent_metadata == nullptr) {
+        xevent_metadata =
+            xplane.GetOrCreateEventMetadata(xevent_metadata_by_name.size());
+        xevent_metadata->set_name(string(annotation.name));
+      }
+      XEventBuilder xevent = xline.AddEvent(*xevent_metadata);
+      xevent.SetTimestampNs(event.start_time);
+      xevent.SetEndTimestampNs(event.end_time);
+      xevent.ReserveStats(annotation.metadata.size());
+      for (const auto& metadata : annotation.metadata) {
+        XStatMetadata*& xstat_metadata = xstat_metadata_by_name[metadata.key];
+        if (xstat_metadata == nullptr) {
+          xstat_metadata =
+              xplane.GetOrCreateStatMetadata(xstat_metadata_by_name.size());
+          xstat_metadata->set_name(string(metadata.key));
         }
+        xevent.ParseAndAddStatValue(*xstat_metadata, metadata.value);
       }
     }
   }
diff --git a/tensorflow/core/profiler/internal/cpu/host_tracer_utils.h b/tensorflow/core/profiler/internal/cpu/host_tracer_utils.h
index e4227601990..fa5bf382c88 100644
--- a/tensorflow/core/profiler/internal/cpu/host_tracer_utils.h
+++ b/tensorflow/core/profiler/internal/cpu/host_tracer_utils.h
@@ -22,6 +22,22 @@ limitations under the License.
 namespace tensorflow {
 namespace profiler {
 
+// Returns true if event was created by TraceMe::ActivityStart.
+inline bool IsStartEvent(const TraceMeRecorder::Event& event) {
+  return (event.start_time != 0) && (event.end_time == 0);
+}
+
+// Returns true if event was created by TraceMe::ActivityEnd.
+inline bool IsEndEvent(const TraceMeRecorder::Event& event) {
+  return (event.start_time == 0) && (event.end_time != 0);
+}
+
+// Returns true if event was created by TraceMe::Stop or MakeCompleteEvents
+// below.
+inline bool IsCompleteEvent(const TraceMeRecorder::Event& event) {
+  return (event.start_time != 0) && (event.end_time != 0);
+}
+
 // Combine events created by TraceMe::ActivityStart and TraceMe::ActivityEnd,
 // which can be paired up by their activity_id.
 void MakeCompleteEvents(TraceMeRecorder::Events* events);
diff --git a/tensorflow/core/profiler/internal/gpu/BUILD b/tensorflow/core/profiler/internal/gpu/BUILD
index 2d752f363d9..620f92e5709 100644
--- a/tensorflow/core/profiler/internal/gpu/BUILD
+++ b/tensorflow/core/profiler/internal/gpu/BUILD
@@ -36,6 +36,7 @@ tf_cuda_library(
         "//tensorflow/core:protos_all_cc",
         "//tensorflow/core/profiler/internal:annotation_stack",
         "//tensorflow/core/profiler/internal:parse_annotation",
+        "//tensorflow/core/profiler/internal:profiler_factory",
         "//tensorflow/core/profiler/internal:profiler_interface",
         "//tensorflow/core/profiler/lib:traceme",
         "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
diff --git a/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc b/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc
index 34d6427cd4a..8736f777da4 100644
--- a/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc
+++ b/tensorflow/core/profiler/internal/gpu/cupti_tracer.cc
@@ -473,6 +473,8 @@ void AddMemcpyActivityEvent(CuptiTraceCollector *collector,
   event.memcpy_info.num_bytes = memcpy->bytes;
   event.memcpy_info.destination = memcpy->deviceId;
   event.memcpy_info.async = memcpy->flags & CUPTI_ACTIVITY_FLAG_MEMCPY_ASYNC;
+  event.memcpy_info.src_mem_kind = memcpy->srcKind;
+  event.memcpy_info.dst_mem_kind = memcpy->dstKind;
   collector->AddEvent(std::move(event));
 }
 
@@ -495,6 +497,8 @@ void AddMemcpy2ActivityEvent(CuptiTraceCollector *collector,
   event.memcpy_info.num_bytes = memcpy2->bytes;
   event.memcpy_info.destination = memcpy2->dstDeviceId;
   event.memcpy_info.async = memcpy2->flags & CUPTI_ACTIVITY_FLAG_MEMCPY_ASYNC;
+  event.memcpy_info.src_mem_kind = memcpy2->srcKind;
+  event.memcpy_info.dst_mem_kind = memcpy2->dstKind;
   collector->AddEvent(std::move(event));
 }
 
@@ -640,6 +644,7 @@ struct MemcpyRecord {
   CUcontext context;
   CUstream stream;
   uint32 correlation_id;
+  bool async;
   CUevent start_event;
   CUevent stop_event;
   uint64 start_timestamp;
@@ -740,9 +745,10 @@ class CudaEventRecorder {
   // Registers the start of a copy operation. The returned index should be
   // passed to StopMemcpy() after the memcpy has completed.
   size_t StartMemcpy(CuptiTracerEventType type, size_t size_bytes,
-                     CUcontext context, CUstream stream,
-                     uint32 correlation_id) {
-    MemcpyRecord record = {type, size_bytes, context, stream, correlation_id};
+                     CUcontext context, CUstream stream, uint32 correlation_id,
+                     bool async) {
+    MemcpyRecord record = {type,   size_bytes,     context,
+                           stream, correlation_id, async};
     record.start_timestamp = CuptiTracer::GetTimestamp();
     LogIfError(CreateAndRecordEvent(&record.start_event, stream));
     absl::MutexLock lock(&mutex_);
@@ -949,8 +955,8 @@ class CudaEventRecorder {
     event.memcpy_info.num_bytes = record.size_bytes;
     // TODO: support MemcpyD2D where destination != source;
     event.memcpy_info.destination = ordinal_;
-    // TODO: support differentiate sync and async memcpy.
-    event.memcpy_info.async = false;
+    event.memcpy_info.async = record.async;
+    // TODO: set src_mem_kind and dst_mem_kind.
     collector_->AddEvent(std::move(event));
     return Status::OK();
   }
@@ -1168,16 +1174,16 @@ class CuptiDriverApiHookWithCudaEvent : public CuptiDriverApiHook {
     auto params = static_cast<const T *>(cbdata->functionParams);
     *cbdata->correlationData =
         recorder->StartMemcpy(type, params->ByteCount, cbdata->context, nullptr,
-                              cbdata->correlationId);
+                              cbdata->correlationId, /*async*/ false);
   }
   template <typename T>
   static void StartMemcpyAsync(CuptiTracerEventType type,
                                const CUpti_CallbackData *cbdata,
                                CudaEventRecorder *recorder) {
     auto params = static_cast<const T *>(cbdata->functionParams);
-    *cbdata->correlationData =
-        recorder->StartMemcpy(type, params->ByteCount, cbdata->context,
-                              params->hStream, cbdata->correlationId);
+    *cbdata->correlationData = recorder->StartMemcpy(
+        type, params->ByteCount, cbdata->context, params->hStream,
+        cbdata->correlationId, /*async*/ true);
   }
 
   static CUmemorytype GetMemoryType(CUdeviceptr ptr) {
diff --git a/tensorflow/core/profiler/internal/gpu/cupti_tracer.h b/tensorflow/core/profiler/internal/gpu/cupti_tracer.h
index bcfe1c27d38..12a61af1fa5 100644
--- a/tensorflow/core/profiler/internal/gpu/cupti_tracer.h
+++ b/tensorflow/core/profiler/internal/gpu/cupti_tracer.h
@@ -41,6 +41,10 @@ struct MemcpyDetails {
   // This contains CUpti_ActivityMemcpyKind for activity event (on device).
   // For events from other CuptiTracerEventSource, it is always 0.
   int8 kind;
+  // CUpti_ActivityMemoryKind of source.
+  int8 src_mem_kind;
+  // CUpti_ActivityMemoryKind of destination.
+  int8 dst_mem_kind;
 };
 
 struct MemAllocDetails {
diff --git a/tensorflow/core/profiler/internal/gpu/device_tracer.cc b/tensorflow/core/profiler/internal/gpu/device_tracer.cc
index 1388fab75a3..9b3254ed905 100644
--- a/tensorflow/core/profiler/internal/gpu/device_tracer.cc
+++ b/tensorflow/core/profiler/internal/gpu/device_tracer.cc
@@ -31,28 +31,26 @@ limitations under the License.
 #include "tensorflow/core/profiler/internal/gpu/cupti_tracer.h"
 #include "tensorflow/core/profiler/internal/gpu/cupti_wrapper.h"
 #include "tensorflow/core/profiler/internal/parse_annotation.h"
+#include "tensorflow/core/profiler/internal/profiler_factory.h"
 #include "tensorflow/core/profiler/internal/profiler_interface.h"
 #include "tensorflow/core/util/env_var.h"
 
 namespace tensorflow {
 namespace profiler {
 
-// Adapter from CuptiTraceCollector to StepStatsCollector: This class convert
-// and filter from CuptiTracerEvent to tensorflow::NodeExecStats.
-// We can not just forward event on the fly because StepStatsCollector have
-// a single mutex for all devices, Therefore we will cache events and forward
-// only when Flush().
-class StepStatsCuptiTracerAdaptor : public CuptiTraceCollector {
+// CuptiTraceCollectorImpl store the CuptiTracerEvents from CuptiTracer and
+// eventually convert and filter them to StepStats or XSpace.
+class CuptiTraceCollectorImpl : public CuptiTraceCollector {
  public:
-  StepStatsCuptiTracerAdaptor(const CuptiTracerCollectorOptions& option,
-                              uint64 start_walltime_ns, uint64 start_gpu_ns)
+  CuptiTraceCollectorImpl(const CuptiTracerCollectorOptions& option,
+                          uint64 start_walltime_ns, uint64 start_gpu_ns)
       : CuptiTraceCollector(option),
         num_callback_events_(0),
         num_activity_events_(0),
         start_walltime_ns_(start_walltime_ns),
         start_gpu_ns_(start_gpu_ns),
         num_gpus_(option.num_gpus),
-        per_device_adaptor_(option.num_gpus) {}
+        per_device_collector_(option.num_gpus) {}
 
   void AddEvent(CuptiTracerEvent&& event) override {
     if (event.device_id >= num_gpus_) return;
@@ -69,7 +67,7 @@ class StepStatsCuptiTracerAdaptor : public CuptiTraceCollector {
       }
       num_activity_events_++;
     }
-    per_device_adaptor_[event.device_id].AddEvent(std::move(event));
+    per_device_collector_[event.device_id].AddEvent(std::move(event));
   }
   void OnEventsDropped(const std::string& reason, uint32 num_events) override {}
   void Flush() override {}
@@ -78,8 +76,21 @@ class StepStatsCuptiTracerAdaptor : public CuptiTraceCollector {
               << " callback api events and " << num_activity_events_
               << " activity events.";
     for (int i = 0; i < num_gpus_; ++i) {
-      per_device_adaptor_[i].Flush(trace_collector, i, start_walltime_ns_,
-                                   start_gpu_ns_);
+      per_device_collector_[i].Flush(i, start_walltime_ns_, start_gpu_ns_,
+                                     trace_collector);
+    }
+  }
+  void Export(XSpace* space) {
+    LOG(INFO) << " GpuTracer has collected " << num_callback_events_
+              << " callback api events and " << num_activity_events_
+              << " activity events.";
+    for (int i = 0; i < num_gpus_; ++i) {
+      // TODO(jiesun): determine if we need to export the launching events into
+      // the same plane that host tracer uses.
+      XPlane* host_plane = nullptr;
+      XPlane* device_plane = space->add_planes();
+      per_device_collector_[i].Flush(i, start_walltime_ns_, start_gpu_ns_,
+                                     device_plane, host_plane);
     }
   }
 
@@ -95,7 +106,7 @@ class StepStatsCuptiTracerAdaptor : public CuptiTraceCollector {
     uint32 thread_id;
     uint64 enqueue_time_ns;
   };
-  struct PerDeviceAdaptor {
+  struct PerDeviceCollector {
     void AddEvent(CuptiTracerEvent&& event) {
       absl::MutexLock lock(&mutex);
       if (event.source == CuptiTracerEventSource::DriverCallback) {
@@ -113,8 +124,9 @@ class StepStatsCuptiTracerAdaptor : public CuptiTraceCollector {
         events.emplace_back(std::move(event));
       }
     }
-    void Flush(StepStatsCollector* collector, int32 device_ordinal,
-               uint64 start_walltime_ns, uint64 start_gpu_ns) {
+
+    void Flush(int32 device_ordinal, uint64 start_walltime_ns,
+               uint64 start_gpu_ns, StepStatsCollector* collector) {
       absl::MutexLock lock(&mutex);
       stream_device = absl::StrCat("/device:GPU:", device_ordinal, "/stream:");
       memcpy_device = absl::StrCat("/device:GPU:", device_ordinal, "/memcpy");
@@ -198,6 +210,9 @@ class StepStatsCuptiTracerAdaptor : public CuptiTraceCollector {
       }
     }
 
+    void Flush(int32 device_ordinal, uint64 start_walltime_ns,
+               uint64 start_gpu_ns, XPlane* device_plane, XPlane* host_plane) {}
+
     absl::Mutex mutex;
     std::string stream_device GUARDED_BY(mutex);
     std::string memcpy_device GUARDED_BY(mutex);
@@ -206,9 +221,9 @@ class StepStatsCuptiTracerAdaptor : public CuptiTraceCollector {
     absl::flat_hash_map<uint32, CorrelationInfo> correlation_info
         GUARDED_BY(mutex);
   };
-  absl::FixedArray<PerDeviceAdaptor> per_device_adaptor_;
+  absl::FixedArray<PerDeviceCollector> per_device_collector_;
 
-  TF_DISALLOW_COPY_AND_ASSIGN(StepStatsCuptiTracerAdaptor);
+  TF_DISALLOW_COPY_AND_ASSIGN(CuptiTraceCollectorImpl);
 };
 
 // GpuTracer for GPU.
@@ -245,7 +260,7 @@ class GpuTracer : public profiler::ProfilerInterface {
   CuptiTracer* cupti_tracer_;
   CuptiTracerOptions options_;
   StepStats step_stats_;
-  std::unique_ptr<StepStatsCuptiTracerAdaptor> step_stats_cupti_adaptor_;
+  std::unique_ptr<CuptiTraceCollectorImpl> cupti_collector_;
 };
 
 Status GpuTracer::DoStart() {
@@ -306,11 +321,11 @@ Status GpuTracer::DoStart() {
   collector_options.num_gpus = cupti_tracer_->NumGpus();
   uint64 start_gputime_ns = CuptiTracer::GetTimestamp();
   uint64 start_walltime_ns = tensorflow::EnvTime::NowNanos();
-  step_stats_cupti_adaptor_ = absl::make_unique<StepStatsCuptiTracerAdaptor>(
+  cupti_collector_ = absl::make_unique<CuptiTraceCollectorImpl>(
       collector_options, start_walltime_ns, start_gputime_ns);
 
   AnnotationStack::Enable(true);
-  cupti_tracer_->Enable(options_, step_stats_cupti_adaptor_.get());
+  cupti_tracer_->Enable(options_, cupti_collector_.get());
   return Status::OK();
 }
 
@@ -354,11 +369,11 @@ Status GpuTracer::CollectData(RunMetadata* run_metadata) {
       return Status::OK();
     case State::kStoppedOk: {
       // Input run_metadata is shared by profiler interfaces, we need append.
-      StepStatsCollector trace_collector(&step_stats_);
-      if (step_stats_cupti_adaptor_) {
-        step_stats_cupti_adaptor_->Export(&trace_collector);
+      StepStatsCollector step_stats_collector(&step_stats_);
+      if (cupti_collector_) {
+        cupti_collector_->Export(&step_stats_collector);
       }
-      trace_collector.Finalize();
+      step_stats_collector.Finalize();
       for (auto& dev_stats : *step_stats_.mutable_dev_stats()) {
         run_metadata->mutable_step_stats()->add_dev_stats()->Swap(&dev_stats);
       }
@@ -369,11 +384,28 @@ Status GpuTracer::CollectData(RunMetadata* run_metadata) {
 }
 
 Status GpuTracer::CollectData(XSpace* space) {
-  return errors::Unimplemented("Collect data into XSpace not yet implemented");
+  switch (profiling_state_) {
+    case State::kNotStarted:
+      VLOG(1) << "No trace data collected, session wasn't started";
+      return Status::OK();
+    case State::kStartedOk:
+      return errors::FailedPrecondition("Cannot collect trace before stopping");
+    case State::kStartedError:
+      LOG(ERROR) << "Cannot collect, xprof failed to start";
+      return Status::OK();
+    case State::kStoppedError:
+      VLOG(1) << "No trace data collected";
+      return Status::OK();
+    case State::kStoppedOk: {
+      if (cupti_collector_) {
+        cupti_collector_->Export(space);
+      }
+      return Status::OK();
+    }
+  }
+  return errors::Internal("Invalid profiling state: ", profiling_state_);
 }
 
-}  // namespace profiler
-
 // Not in anonymous namespace for testing purposes.
 std::unique_ptr<profiler::ProfilerInterface> CreateGpuTracer(
     const profiler::ProfilerOptions& options) {
@@ -394,6 +426,7 @@ auto register_gpu_tracer_factory = [] {
   return 0;
 }();
 
+}  // namespace profiler
 }  // namespace tensorflow
 
 #endif  // GOOGLE_CUDA
diff --git a/tensorflow/core/profiler/internal/gpu/device_tracer_test.cc b/tensorflow/core/profiler/internal/gpu/device_tracer_test.cc
index 9ed8896f16c..c123c59772b 100644
--- a/tensorflow/core/profiler/internal/gpu/device_tracer_test.cc
+++ b/tensorflow/core/profiler/internal/gpu/device_tracer_test.cc
@@ -39,14 +39,15 @@ limitations under the License.
 #include "tensorflow/core/util/device_name_utils.h"
 
 namespace tensorflow {
+namespace profiler {
 
 #if GOOGLE_CUDA
-std::unique_ptr<profiler::ProfilerInterface> CreateGpuTracer(
-    const profiler::ProfilerOptions& options);
+std::unique_ptr<ProfilerInterface> CreateGpuTracer(
+    const ProfilerOptions& options);
 #else
 // We don't have device tracer for non-cuda case.
-std::unique_ptr<profiler::ProfilerInterface> CreateGpuTracer(
-    const profiler::ProfilerOptions& options) {
+std::unique_ptr<ProfilerInterface> CreateGpuTracer(
+    const ProfilerOptions& options) {
   return nullptr;
 }
 #endif
@@ -243,4 +244,5 @@ TEST_F(DeviceTracerTest, RunWithTraceOption) {
 }
 
 }  // namespace
+}  // namespace profiler
 }  // namespace tensorflow
diff --git a/tensorflow/core/profiler/internal/profiler_interface.cc b/tensorflow/core/profiler/internal/profiler_factory.cc
similarity index 89%
rename from tensorflow/core/profiler/internal/profiler_interface.cc
rename to tensorflow/core/profiler/internal/profiler_factory.cc
index bd3163fce24..bf1dedc3c4b 100644
--- a/tensorflow/core/profiler/internal/profiler_interface.cc
+++ b/tensorflow/core/profiler/internal/profiler_factory.cc
@@ -1,4 +1,4 @@
-/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+/* Copyright 2019 The TensorFlow Authors All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -12,12 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
-#include "tensorflow/core/profiler/internal/profiler_interface.h"
+#include "tensorflow/core/profiler/internal/profiler_factory.h"
 
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 
 namespace tensorflow {
+namespace profiler {
 namespace {
 
 mutex mu(LINKER_INITIALIZED);
@@ -44,4 +45,6 @@ void CreateProfilers(
     }
   }
 }
+
+}  // namespace profiler
 }  // namespace tensorflow
diff --git a/tensorflow/core/profiler/internal/profiler_factory.h b/tensorflow/core/profiler/internal/profiler_factory.h
new file mode 100644
index 00000000000..4473e21699e
--- /dev/null
+++ b/tensorflow/core/profiler/internal/profiler_factory.h
@@ -0,0 +1,37 @@
+/* Copyright 2019 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CORE_PROFILER_INTERNAL_PROFILER_FACTORY_H_
+#define TENSORFLOW_CORE_PROFILER_INTERNAL_PROFILER_FACTORY_H_
+
+#include <memory>
+#include <vector>
+
+#include "tensorflow/core/profiler/internal/profiler_interface.h"
+
+namespace tensorflow {
+namespace profiler {
+
+using ProfilerFactory =
+    std::unique_ptr<ProfilerInterface> (*)(const ProfilerOptions&);
+
+void RegisterProfilerFactory(ProfilerFactory factory);
+
+void CreateProfilers(const ProfilerOptions& options,
+                     std::vector<std::unique_ptr<ProfilerInterface>>* result);
+
+}  // namespace profiler
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_PROFILER_INTERNAL_PROFILER_FACTORY_H_
diff --git a/tensorflow/core/profiler/internal/profiler_interface.h b/tensorflow/core/profiler/internal/profiler_interface.h
index 6d7d456f95e..dc8060082f6 100644
--- a/tensorflow/core/profiler/internal/profiler_interface.h
+++ b/tensorflow/core/profiler/internal/profiler_interface.h
@@ -15,9 +15,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_INTERNAL_PROFILER_INTERFACE_H_
 #define TENSORFLOW_CORE_PROFILER_INTERNAL_PROFILER_INTERFACE_H_
 
-#include <memory>
-#include <vector>
-
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/protobuf/config.pb.h"
@@ -76,16 +73,6 @@ class ProfilerInterface {
 };
 
 }  // namespace profiler
-
-using ProfilerFactory = std::unique_ptr<profiler::ProfilerInterface> (*)(
-    const profiler::ProfilerOptions&);
-
-void RegisterProfilerFactory(ProfilerFactory factory);
-
-void CreateProfilers(
-    const profiler::ProfilerOptions& options,
-    std::vector<std::unique_ptr<profiler::ProfilerInterface>>* result);
-
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_PROFILER_INTERNAL_PROFILER_INTERFACE_H_
diff --git a/tensorflow/core/profiler/lib/BUILD b/tensorflow/core/profiler/lib/BUILD
index 3ee9cd78c22..54b85b03045 100644
--- a/tensorflow/core/profiler/lib/BUILD
+++ b/tensorflow/core/profiler/lib/BUILD
@@ -1,8 +1,4 @@
-load(
-    "//tensorflow:tensorflow.bzl",
-    "tf_cuda_library",
-)
-load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
+load("//tensorflow:tensorflow.bzl", "tf_cuda_library")
 
 package(
     default_visibility = [
@@ -12,42 +8,38 @@ package(
     licenses = ["notice"],  # Apache 2.0
 )
 
-tf_cuda_library(
+cc_library(
     name = "profiler_session",
-    srcs = [
-        "profiler_session.cc",
-    ],
-    hdrs = [
-        "profiler_session.h",
-    ],
+    srcs = ["profiler_session.cc"],
+    hdrs = ["profiler_session.h"],
     visibility = ["//tensorflow:internal"],
     deps = [
+        ":profiler_utils",
         "//tensorflow/core/profiler/internal:profiler_interface",
-        "//tensorflow/core/profiler/lib:profiler_utils",
+        "//tensorflow/core/profiler/internal:profiler_factory",
         "@com_google_absl//absl/strings",
     ] + select({
         "//tensorflow:android": [],
         "//conditions:default": [
+            "//tensorflow/core/platform",
             "//tensorflow/core:core_cpu_lib",
-            "//tensorflow/core:framework",
-            "//tensorflow/core:framework_internal",
             "//tensorflow/core:lib",
             "//tensorflow/core:lib_internal",
             "//tensorflow/core:protos_all_cc",
-            "//tensorflow/core:session_options",
         ],
     }),
 )
 
 tf_cuda_library(
     name = "profiler_lib",
+    cuda_deps = [
+        "//tensorflow/core/profiler/internal/gpu:device_tracer",
+    ],
     visibility = ["//tensorflow:internal"],
     deps = [
         "//tensorflow/core/profiler/internal/cpu:host_tracer",
-    ] + if_cuda([
-        "//tensorflow/core/profiler/internal/gpu:device_tracer",
-    ]),
-    alwayslink = 1,
+    ],
+    alwayslink = True,
 )
 
 cc_library(
@@ -56,6 +48,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         "//tensorflow/core:lib",
+        "//tensorflow/core/platform",
         "//tensorflow/core/profiler/internal:traceme_recorder",
         "@com_google_absl//absl/strings",
     ],
@@ -67,6 +60,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         "//tensorflow/core:lib",
+        "//tensorflow/core/platform",
         "//tensorflow/core/profiler/internal:annotation_stack",
         "@com_google_absl//absl/strings",
     ],
@@ -76,8 +70,7 @@ cc_library(
     name = "profiler_utils",
     srcs = ["profiler_utils.cc"],
     hdrs = ["profiler_utils.h"],
-    visibility = ["//tensorflow:internal"],
-    alwayslink = 1,
+    visibility = ["//tensorflow/core/profiler:internal"],
 )
 
 filegroup(
@@ -85,8 +78,6 @@ filegroup(
     srcs = [
         "profiler_session.cc",
         "profiler_session.h",
-        "profiler_utils.cc",
-        "profiler_utils.h",
         "scoped_annotation.h",
         "traceme.h",
     ],
diff --git a/tensorflow/core/profiler/lib/profiler_session.cc b/tensorflow/core/profiler/lib/profiler_session.cc
index 4e0a4d5ad6f..3f69e5ad624 100644
--- a/tensorflow/core/profiler/lib/profiler_session.cc
+++ b/tensorflow/core/profiler/lib/profiler_session.cc
@@ -15,8 +15,8 @@ limitations under the License.
 
 #include "tensorflow/core/profiler/lib/profiler_session.h"
 
-#include <cstddef>
-#include <string>
+#include <stddef.h>
+
 #include <vector>
 
 #include "absl/strings/str_split.h"
@@ -24,8 +24,12 @@ limitations under the License.
 #include "tensorflow/core/common_runtime/step_stats_collector.h"
 #include "tensorflow/core/platform/env.h"
 #include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/platform.h"
 #include "tensorflow/core/platform/types.h"
+#if !defined(IS_MOBILE_PLATFORM)
+#include "tensorflow/core/profiler/internal/profiler_factory.h"
 #include "tensorflow/core/profiler/lib/profiler_utils.h"
+#endif
 #include "tensorflow/core/protobuf/config.pb.h"
 #include "tensorflow/core/protobuf/error_codes.pb.h"
 #include "tensorflow/core/protobuf/trace_events.pb.h"
@@ -38,7 +42,7 @@ namespace {
 // If the "op_type" is missing, returns the node_name.
 // This is done so all ops with the same type appear in the same color in trace
 // viewer.
-inline std::string EventName(absl::string_view node_name) {
+inline string EventName(absl::string_view node_name) {
   // NOTE: open source device tracer now append cupti kernel name after
   // annotation as node_name, @@ is used as separator. kernel name is
   // demangled and possibly contains "::" patterns.
@@ -52,10 +56,10 @@ inline std::string EventName(absl::string_view node_name) {
         absl::StrSplit(annotation_stack.back(), '#');
     std::vector<absl::string_view> parts =
         absl::StrSplit(annotation_parts.front(), ':');
-    return std::string(parts.back());
+    return string(parts.back());
   } else {
     std::vector<absl::string_view> parts = absl::StrSplit(node_name, ':');
-    return std::string(parts.back());
+    return string(parts.back());
   }
 }
 
@@ -193,7 +197,9 @@ Status ProfilerSession::CollectData(RunMetadata* run_metadata) {
 
   if (active_) {
     // Allow another session to start.
+#if !defined(IS_MOBILE_PLATFORM)
     profiler::ReleaseProfilerLock();
+#endif
     active_ = false;
   }
 
@@ -214,7 +220,11 @@ Status ProfilerSession::SerializeToString(string* content) {
 }
 
 ProfilerSession::ProfilerSession(const profiler::ProfilerOptions& options)
+#if !defined(IS_MOBILE_PLATFORM)
     : active_(profiler::AcquireProfilerLock()),
+#else
+    : active_(false),
+#endif
       start_time_micros_(Env::Default()->NowNanos() / EnvTime::kMicrosToNanos) {
   if (!active_) {
     status_ = tensorflow::Status(error::UNAVAILABLE,
@@ -224,7 +234,9 @@ ProfilerSession::ProfilerSession(const profiler::ProfilerOptions& options)
 
   LOG(INFO) << "Profiler session started.";
 
+#if !defined(IS_MOBILE_PLATFORM)
   CreateProfilers(options, &profilers_);
+#endif
   status_ = Status::OK();
 
   for (auto& profiler : profilers_) {
@@ -243,7 +255,9 @@ ProfilerSession::~ProfilerSession() {
 
   if (active_) {
     // Allow another session to start.
+#if !defined(IS_MOBILE_PLATFORM)
     profiler::ReleaseProfilerLock();
+#endif
   }
 }
 }  // namespace tensorflow
diff --git a/tensorflow/core/profiler/lib/profiler_session.h b/tensorflow/core/profiler/lib/profiler_session.h
index e47a85594ed..85b9901d889 100644
--- a/tensorflow/core/profiler/lib/profiler_session.h
+++ b/tensorflow/core/profiler/lib/profiler_session.h
@@ -15,10 +15,14 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_LIB_PROFILER_SESSION_H_
 #define TENSORFLOW_CORE_PROFILER_LIB_PROFILER_SESSION_H_
 
+#include <memory>
+#include <vector>
+
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 #include "tensorflow/core/profiler/internal/profiler_interface.h"
+#include "tensorflow/core/protobuf/config.pb.h"
 
 namespace tensorflow {
 
@@ -41,7 +45,9 @@ class ProfilerSession {
 
   tensorflow::Status Status() LOCKS_EXCLUDED(mutex_);
 
-  tensorflow::Status CollectData(RunMetadata* run_metadata);
+  tensorflow::Status CollectData(RunMetadata* run_metadata)
+      LOCKS_EXCLUDED(mutex_);
+
   tensorflow::Status SerializeToString(string* content) LOCKS_EXCLUDED(mutex_);
 
  private:
diff --git a/tensorflow/core/profiler/lib/scoped_annotation.h b/tensorflow/core/profiler/lib/scoped_annotation.h
index 1ba6982b8f1..61b0cf42dd6 100644
--- a/tensorflow/core/profiler/lib/scoped_annotation.h
+++ b/tensorflow/core/profiler/lib/scoped_annotation.h
@@ -21,8 +21,11 @@ limitations under the License.
 
 #include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/platform.h"
 #include "tensorflow/core/platform/types.h"
+#if !defined(IS_MOBILE_PLATFORM)
 #include "tensorflow/core/profiler/internal/annotation_stack.h"
+#endif
 
 namespace tensorflow {
 namespace profiler {
@@ -39,31 +42,39 @@ namespace profiler {
 class ScopedAnnotation {
  public:
   explicit ScopedAnnotation(absl::string_view name) {
+#if !defined(IS_MOBILE_PLATFORM)
     if (TF_PREDICT_FALSE(AnnotationStack::IsEnabled())) {
       old_length_ = AnnotationStack::PushAnnotation(name);
     }
+#endif
   }
 
   explicit ScopedAnnotation(const char* name)
       : ScopedAnnotation(absl::string_view(name)) {}
 
   explicit ScopedAnnotation(const string& name) {
+#if !defined(IS_MOBILE_PLATFORM)
     if (TF_PREDICT_FALSE(AnnotationStack::IsEnabled())) {
       old_length_ = AnnotationStack::PushAnnotation(name);
     }
+#endif
   }
 
   explicit ScopedAnnotation(string&& name) {
+#if !defined(IS_MOBILE_PLATFORM)
     if (TF_PREDICT_FALSE(AnnotationStack::IsEnabled())) {
       old_length_ = AnnotationStack::PushAnnotation(std::move(name));
     }
+#endif
   }
 
   template <typename NameGeneratorT>
   explicit ScopedAnnotation(NameGeneratorT name_generator) {
+#if !defined(IS_MOBILE_PLATFORM)
     if (TF_PREDICT_FALSE(AnnotationStack::IsEnabled())) {
       old_length_ = AnnotationStack::PushAnnotation(name_generator());
     }
+#endif
   }
 
   // Pops the name passed in the constructor from the current annotation.
@@ -71,12 +82,20 @@ class ScopedAnnotation {
     // TODO(b/137971921): without this memory fence, two presubmit tests will
     // fail probably due to compiler in that presubmit config.
     std::atomic_thread_fence(std::memory_order_acquire);
+#if !defined(IS_MOBILE_PLATFORM)
     if (TF_PREDICT_FALSE(old_length_ != kInvalidLength)) {
       AnnotationStack::PopAnnotation(old_length_);
     }
+#endif
   }
 
-  static bool IsEnabled() { return AnnotationStack::IsEnabled(); }
+  static bool IsEnabled() {
+#if !defined(IS_MOBILE_PLATFORM)
+    return AnnotationStack::IsEnabled();
+#else
+    return false;
+#endif
+  }
 
  private:
   // signals that annotation is disabled at the constructor.
diff --git a/tensorflow/core/profiler/lib/traceme.h b/tensorflow/core/profiler/lib/traceme.h
index 821c5eaf9d2..2937a3483ac 100644
--- a/tensorflow/core/profiler/lib/traceme.h
+++ b/tensorflow/core/profiler/lib/traceme.h
@@ -15,14 +15,15 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
 #define TENSORFLOW_CORE_PROFILER_LIB_TRACEME_H_
 
-#include <string>
-
 #include "absl/strings/string_view.h"
 #include "tensorflow/core/platform/env_time.h"
 #include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/platform.h"
 #include "tensorflow/core/platform/types.h"
+#if !defined(IS_MOBILE_PLATFORM)
 #include "tensorflow/core/profiler/internal/traceme_recorder.h"
+#endif
 
 namespace tensorflow {
 namespace profiler {
@@ -80,12 +81,12 @@ class TraceMe {
   // out their host traces based on verbosity.
   explicit TraceMe(absl::string_view activity_name, int level = 1) {
     DCHECK_GE(level, 1);
+#if !defined(IS_MOBILE_PLATFORM)
     if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
       new (&no_init_.name) string(activity_name);
       start_time_ = EnvTime::NowNanos();
-    } else {
-      start_time_ = kUntracedActivity;
     }
+#endif
   }
 
   // string&& constructor to prevent an unnecessary string copy, e.g. when a
@@ -95,12 +96,12 @@ class TraceMe {
   // constructor so we avoid copying them when tracing is disabled.
   explicit TraceMe(string &&activity_name, int level = 1) {
     DCHECK_GE(level, 1);
+#if !defined(IS_MOBILE_PLATFORM)
     if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
       new (&no_init_.name) string(std::move(activity_name));
       start_time_ = EnvTime::NowNanos();
-    } else {
-      start_time_ = kUntracedActivity;
     }
+#endif
   }
 
   // Do not allow passing strings by reference or value since the caller
@@ -125,12 +126,12 @@ class TraceMe {
   template <typename NameGeneratorT>
   explicit TraceMe(NameGeneratorT name_generator, int level = 1) {
     DCHECK_GE(level, 1);
+#if !defined(IS_MOBILE_PLATFORM)
     if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
       new (&no_init_.name) string(name_generator());
       start_time_ = EnvTime::NowNanos();
-    } else {
-      start_time_ = kUntracedActivity;
     }
+#endif
   }
 
   // Stop tracing the activity. Called by the destructor, but exposed to allow
@@ -145,6 +146,7 @@ class TraceMe {
     //   spuriously record the event. This is extremely rare, and acceptable as
     //   event will be discarded when its start timestamp fall outside of the
     //   start/stop session timestamp.
+#if !defined(IS_MOBILE_PLATFORM)
     if (TF_PREDICT_FALSE(start_time_ != kUntracedActivity)) {
       if (TF_PREDICT_TRUE(TraceMeRecorder::Active())) {
         TraceMeRecorder::Record({kCompleteActivity, std::move(no_init_.name),
@@ -153,6 +155,7 @@ class TraceMe {
       no_init_.name.~string();
       start_time_ = kUntracedActivity;
     }
+#endif
   }
 
   ~TraceMe() { Stop(); }
@@ -162,6 +165,7 @@ class TraceMe {
   // Record the start time of an activity.
   // Returns the activity ID, which is used to stop the activity.
   static uint64 ActivityStart(absl::string_view name, int level = 1) {
+#if !defined(IS_MOBILE_PLATFORM)
     if (TF_PREDICT_FALSE(TraceMeRecorder::Active(level))) {
       uint64 activity_id = TraceMeRecorder::NewActivityId();
       TraceMeRecorder::Record({activity_id, string(name),
@@ -169,21 +173,30 @@ class TraceMe {
                                /*end_time=*/0});
       return activity_id;
     }
+#endif
     return kUntracedActivity;
   }
 
   // Record the end time of an activity started by ActivityStart().
   static void ActivityEnd(uint64 activity_id) {
-    // We don't check the level again (see ~TraceMe()).
+#if !defined(IS_MOBILE_PLATFORM)
+    // We don't check the level again (see TraceMe::Stop()).
     if (TF_PREDICT_FALSE(activity_id != kUntracedActivity)) {
       if (TF_PREDICT_TRUE(TraceMeRecorder::Active())) {
         TraceMeRecorder::Record({activity_id, /*name=*/"", /*start_time=*/0,
                                  /*end_time=*/EnvTime::NowNanos()});
       }
     }
+#endif
   }
 
-  static bool Active(int level = 1) { return TraceMeRecorder::Active(level); }
+  static bool Active(int level = 1) {
+#if !defined(IS_MOBILE_PLATFORM)
+    return TraceMeRecorder::Active(level);
+#else
+    return false;
+#endif
+  }
 
  private:
   // Activity ID or start time used when tracing is disabled.
@@ -201,7 +214,7 @@ class TraceMe {
     string name;
   } no_init_;
 
-  uint64 start_time_;
+  uint64 start_time_ = kUntracedActivity;
 };
 
 }  // namespace profiler
diff --git a/tensorflow/core/profiler/utils/BUILD b/tensorflow/core/profiler/utils/BUILD
index edba7b3aa27..73fc56dfd11 100644
--- a/tensorflow/core/profiler/utils/BUILD
+++ b/tensorflow/core/profiler/utils/BUILD
@@ -36,13 +36,26 @@ cc_library(
     deps = [
         ":math_utils",
         ":tf_op_utils",
-        "//tensorflow/core:tflite_portable_logging",
+        "//tensorflow/core:lib",
         "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
         "@com_google_absl//absl/container:flat_hash_map",
         "@com_google_absl//absl/strings",
     ],
 )
 
+cc_library(
+    name = "op_utils",
+    srcs = ["op_utils.cc"],
+    hdrs = ["op_utils.h"],
+    deps = [
+        ":op_metrics_db_utils",
+        ":tf_op_utils",
+        "//tensorflow/core:lib",
+        "//tensorflow/core/profiler/protobuf:op_metrics_proto_cc",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
 cc_library(
     name = "tf_op_utils",
     srcs = ["tf_op_utils.cc"],
diff --git a/tensorflow/core/profiler/utils/op_utils.cc b/tensorflow/core/profiler/utils/op_utils.cc
new file mode 100644
index 00000000000..3a899e47e87
--- /dev/null
+++ b/tensorflow/core/profiler/utils/op_utils.cc
@@ -0,0 +1,47 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/profiler/utils/op_utils.h"
+
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
+
+namespace tensorflow {
+namespace profiler {
+
+void HostOpMetricsDbBuilder::EnterOp(absl::string_view name,
+                                     absl::string_view category, uint64 time_ps,
+                                     uint64 children_time_ps) {
+  uint64 self_time_ps = time_ps - children_time_ps;
+  DCHECK_GE(time_ps, self_time_ps);
+  OpMetrics* op_metrics = LookupOrInsertNewOpMetrics(/*hlo_module_id=*/0, name);
+  if (op_metrics->category().empty())
+    op_metrics->set_category(category.data(), category.size());
+  op_metrics->set_occurrences(op_metrics->occurrences() + 1);
+  op_metrics->set_time_ps(op_metrics->time_ps() + time_ps);
+  op_metrics->set_self_time_ps(op_metrics->self_time_ps() + self_time_ps);
+  db()->set_total_op_time_ps(db()->total_op_time_ps() + self_time_ps);
+}
+
+void HostOpMetricsDbBuilder::UpdateHostInfeedEnqInfo(
+    uint64 duration_ps, uint64 start_timestamp_ps_diff) {
+  db()->set_total_host_infeed_enq_duration_ps(
+      db()->total_host_infeed_enq_duration_ps() + duration_ps);
+  db()->set_total_host_infeed_enq_start_timestamp_ps_diff(
+      db()->total_host_infeed_enq_start_timestamp_ps_diff() +
+      start_timestamp_ps_diff);
+}
+}  // namespace profiler
+}  // namespace tensorflow
diff --git a/tensorflow/core/profiler/utils/op_utils.h b/tensorflow/core/profiler/utils/op_utils.h
new file mode 100644
index 00000000000..44bfa508b09
--- /dev/null
+++ b/tensorflow/core/profiler/utils/op_utils.h
@@ -0,0 +1,78 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_PROFILER_UTILS_OP_UTILS_H_
+#define TENSORFLOW_CORE_PROFILER_UTILS_OP_UTILS_H_
+
+#include <string>
+
+#include "absl/strings/string_view.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/protobuf/op_metrics.pb.h"
+#include "tensorflow/core/profiler/utils/op_metrics_db_utils.h"
+#include "tensorflow/core/profiler/utils/tf_op_utils.h"
+
+namespace tensorflow {
+namespace profiler {
+
+class HostOpMetricsDbBuilder : public OpMetricsDbBuilder {
+ public:
+  explicit HostOpMetricsDbBuilder(OpMetricsDb* db) : OpMetricsDbBuilder(db) {}
+
+  // A function that will be called when the end of an OP is
+  // observed on a trace, where:
+  //   name = the OP name.
+  //   category = the OP category.
+  //   time_ps = the total execution time of the OP in picoseconds, including
+  //             the execution time of its children.
+  //   children_time_ps = the execution time of the children of this OP in
+  //                      picoseconds
+  void EnterOp(absl::string_view name, absl::string_view category,
+               uint64 time_ps, uint64 children_time_ps);
+
+  // Updates total_host_infeed_enq_duration_ps_ and
+  // total_host_infeed_enq_duration_ps_.
+  void UpdateHostInfeedEnqInfo(uint64 duration_ps,
+                               uint64 start_timestamp_ps_diff);
+};
+
+// Type of a TensorFlow Op activity, which is either beginning or ending an Op.
+enum TfActivityType { kTfOpBegin, kTfOpEnd };
+
+// Instant activity representing the begin or end of a host-side TF Op.
+struct TfActivity {
+  // The timestamp in picoseconds when this activity happened.
+  uint64 timestamp_ps;
+  // The ID of this Op.
+  uint32 tf_op_id;
+  // Type of this activity.
+  TfActivityType activity_type;
+  // Full TF op name and type of this activity (backed by XEvent::name).
+  TfOp tf_op;
+};
+
+// TF Op metrics stored as element in OpStack.
+struct TfOpInfo {
+  explicit TfOpInfo(uint64 ts) : start_timestamp_ps(ts) {}
+
+  // Start timestamp in picoseconds.
+  uint64 start_timestamp_ps;
+  // Children duration in picoseconds.
+  uint64 children_duration_ps = 0;
+};
+}  // namespace profiler
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_PROFILER_UTILS_OP_UTILS_H_
diff --git a/tensorflow/examples/saved_model/integration_tests/distribution_strategy_utils.py b/tensorflow/examples/saved_model/integration_tests/distribution_strategy_utils.py
index ea91c97a5a0..1501e61d475 100644
--- a/tensorflow/examples/saved_model/integration_tests/distribution_strategy_utils.py
+++ b/tensorflow/examples/saved_model/integration_tests/distribution_strategy_utils.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import collections
+import sys
 
 from tensorflow.python.distribute import strategy_combinations
 
@@ -28,11 +29,19 @@ _strategies = [
     strategy_combinations.mirrored_strategy_with_one_gpu,
     strategy_combinations.mirrored_strategy_with_gpu_and_cpu,
     strategy_combinations.mirrored_strategy_with_two_gpus,
+    strategy_combinations.tpu_strategy,
 ]
 
+# TODO(b/145386854): The presence of GPU strategies upsets TPU initialization,
+# despite their test instances being skipped early on.
+if "test_tpu" in sys.argv[0]:
+  _strategies = [s for s in _strategies if "GPU" not in str(s)]
+
 
 named_strategies = collections.OrderedDict(
-    [(None, None)] + [(str(s), s) for s in _strategies])
+    [(None, None)] +
+    [(str(s), s) for s in _strategies]
+)
 
 
 class MaybeDistributionScope(object):
diff --git a/tensorflow/examples/saved_model/integration_tests/integration_scripts.py b/tensorflow/examples/saved_model/integration_tests/integration_scripts.py
index fae61ddbd9a..b4e37fba5bc 100644
--- a/tensorflow/examples/saved_model/integration_tests/integration_scripts.py
+++ b/tensorflow/examples/saved_model/integration_tests/integration_scripts.py
@@ -34,6 +34,7 @@ import subprocess
 import sys
 
 from absl import app
+from absl import flags as absl_flags
 import tensorflow.compat.v2 as tf
 
 from tensorflow.python.platform import tf_logging as logging
@@ -49,10 +50,21 @@ class TestCase(tf.test.TestCase):
       command_parts = [sys.executable, run_script]
     else:
       command_parts = [run_script]
+    command_parts.append("--alsologtostderr")  # For visibility in sponge.
     for flag_key, flag_value in flags.items():
       command_parts.append("--%s=%s" % (flag_key, flag_value))
+
+    # TODO(b/143247229): Remove forwarding this flag once the BUILD rule
+    # `distribute_py_test()` stops setting it.
+    deepsea_flag_name = "register_deepsea_platform"
+    deepsea_flag_value = getattr(absl_flags.FLAGS, deepsea_flag_name, None)
+    if deepsea_flag_value is not None:
+      command_parts.append("--%s=%s" % (deepsea_flag_name,
+                                        str(deepsea_flag_value).lower()))
+
     env = dict(TF2_BEHAVIOR="enabled", SCRIPT_NAME=script_name)
-    logging.info("Running: %s with environment flags %s" % (command_parts, env))
+    logging.info("Running %s with added environment variables %s" %
+                 (command_parts, env))
     subprocess.check_call(command_parts, env=dict(os.environ, **env))
 
 
diff --git a/tensorflow/examples/saved_model/integration_tests/mnist_util.py b/tensorflow/examples/saved_model/integration_tests/mnist_util.py
index 8e4cdac748f..9770c849603 100644
--- a/tensorflow/examples/saved_model/integration_tests/mnist_util.py
+++ b/tensorflow/examples/saved_model/integration_tests/mnist_util.py
@@ -33,7 +33,7 @@ def _load_random_data(num_train_and_test):
 
 def load_reshaped_data(use_fashion_mnist=False, fake_tiny_data=False):
   """Returns MNIST or Fashion MNIST or fake train and test data."""
-  load = ((lambda: _load_random_data([16, 128])) if fake_tiny_data else
+  load = ((lambda: _load_random_data([128, 128])) if fake_tiny_data else
           tf.keras.datasets.fashion_mnist.load_data if use_fashion_mnist else
           tf.keras.datasets.mnist.load_data)
   (x_train, y_train), (x_test, y_test) = load()
diff --git a/tensorflow/examples/saved_model/integration_tests/saved_model_test.py b/tensorflow/examples/saved_model/integration_tests/saved_model_test.py
index 232a5b5e1ba..d97b93418af 100644
--- a/tensorflow/examples/saved_model/integration_tests/saved_model_test.py
+++ b/tensorflow/examples/saved_model/integration_tests/saved_model_test.py
@@ -89,7 +89,8 @@ class SavedModelTest(scripts.TestCase, parameterized.TestCase):
               retrain_flag_value=["true", "false"],
               regularization_loss_multiplier=[None, 2],  # Test for b/134528831.
           )),
-      test_combinations=[combinations.NamedGPUCombination()])
+      test_combinations=(combinations.NamedGPUCombination(),
+                         combinations.NamedTPUCombination()))
 
   @combinations.generate(**TEST_MNIST_CNN_GENERATE_KWARGS)
   def test_mnist_cnn(self, use_keras_save_api, named_strategy,
diff --git a/tensorflow/go/genop/internal/api_def_map.go b/tensorflow/go/genop/internal/api_def_map.go
index 0bbd88b61c3..ad28df399f6 100644
--- a/tensorflow/go/genop/internal/api_def_map.go
+++ b/tensorflow/go/genop/internal/api_def_map.go
@@ -31,7 +31,8 @@ import (
 	"unsafe"
 
 	"github.com/golang/protobuf/proto"
-	pb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/github.com/tensorflow/tensorflow/tensorflow/go/core/framework"
+	adpb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/github.com/tensorflow/tensorflow/tensorflow/go/core/framework/api_def_go_proto"
+	odpb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/github.com/tensorflow/tensorflow/tensorflow/go/core/framework/op_def_go_proto"
 )
 
 // Encapsulates a collection of API definitions.
@@ -50,7 +51,7 @@ type apiDefMap struct {
 // https://www.tensorflow.org/code/tensorflow/core/framework/op_def.proto
 // for OpList proto definition).
 
-func newAPIDefMap(oplist *pb.OpList) (*apiDefMap, error) {
+func newAPIDefMap(oplist *odpb.OpList) (*apiDefMap, error) {
 	// Create a buffer containing the serialized OpList.
 	opdefSerialized, err := proto.Marshal(oplist)
 	if err != nil {
@@ -97,7 +98,7 @@ func (m *apiDefMap) Put(data string) error {
 
 // Returns ApiDef proto instance for the TensorFlow operation
 // named `opname`.
-func (m *apiDefMap) Get(opname string) (*pb.ApiDef, error) {
+func (m *apiDefMap) Get(opname string) (*adpb.ApiDef, error) {
 	cname := C.CString(opname)
 	defer C.free(unsafe.Pointer(cname))
 	status := C.TF_NewStatus()
@@ -113,7 +114,7 @@ func (m *apiDefMap) Get(opname string) (*pb.ApiDef, error) {
 	}
 
 	var (
-		apidef = new(pb.ApiDef)
+		apidef = new(adpb.ApiDef)
 		size   = int(apidefBuf.length)
 		// A []byte backed by C memory.
 		// See: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
diff --git a/tensorflow/go/genop/internal/genop.go b/tensorflow/go/genop/internal/genop.go
index 1c05715a1a2..230462b6e7d 100644
--- a/tensorflow/go/genop/internal/genop.go
+++ b/tensorflow/go/genop/internal/genop.go
@@ -47,7 +47,8 @@ import (
 	"unsafe"
 
 	"github.com/golang/protobuf/proto"
-	pb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/github.com/tensorflow/tensorflow/tensorflow/go/core/framework"
+	adpb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/github.com/tensorflow/tensorflow/tensorflow/go/core/framework/api_def_go_proto"
+	odpb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/github.com/tensorflow/tensorflow/tensorflow/go/core/framework/op_def_go_proto"
 )
 
 // GenerateFunctionsForRegisteredOps writes a Go source code file to w
@@ -69,11 +70,11 @@ func GenerateFunctionsForRegisteredOps(
 	return generateFunctionsForOps(w, ops, apimap)
 }
 
-func registeredOps() (*pb.OpList, *apiDefMap, error) {
+func registeredOps() (*odpb.OpList, *apiDefMap, error) {
 	buf := C.TF_GetAllOpList()
 	defer C.TF_DeleteBuffer(buf)
 	var (
-		list = new(pb.OpList)
+		list = new(odpb.OpList)
 		size = int(buf.length)
 		// A []byte backed by C memory.
 		// See: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices
@@ -104,7 +105,7 @@ func updateAPIDefs(m *apiDefMap, dir string) error {
 	return nil
 }
 
-func generateFunctionsForOps(w io.Writer, ops *pb.OpList, apimap *apiDefMap) error {
+func generateFunctionsForOps(w io.Writer, ops *odpb.OpList, apimap *apiDefMap) error {
 	thisPackage := reflect.TypeOf(tmplArgs{}).PkgPath()
 	if err := tmplHeader.Execute(w, thisPackage); err != nil {
 		return err
@@ -129,7 +130,7 @@ func generateFunctionsForOps(w io.Writer, ops *pb.OpList, apimap *apiDefMap) err
 	return nil
 }
 
-func generateFunctionForOp(w io.Writer, op *pb.OpDef, apidef *pb.ApiDef) error {
+func generateFunctionForOp(w io.Writer, op *odpb.OpDef, apidef *adpb.ApiDef) error {
 	if strings.HasPrefix(op.Name, "_") { // Internal operation
 		return nil
 	}
@@ -264,7 +265,7 @@ func {{$.Op.Name}}{{CamelCase .RenameTo}}(value {{GoType .Type}}) {{$.Op.Name}}A
 {{- else }}
 {{- if .DescribeOutputs}}
 //
-{{- if ((len .OutArgs) eq 1) }}
+{{- if eq (len .OutArgs) 1 }}
 // Returns {{range .OutArgs}}{{MakeComment .Description}}{{end}}
 {{- else }}
 // Returns:
@@ -355,8 +356,8 @@ func {{.Op.Name}}
 )
 
 type attrWrapper struct {
-	op  *pb.OpDef_AttrDef
-	api *pb.ApiDef_Attr
+	op  *odpb.OpDef_AttrDef
+	api *adpb.ApiDef_Attr
 }
 
 func (a *attrWrapper) Name() string              { return a.api.Name }
@@ -369,8 +370,8 @@ func (a *attrWrapper) Minimum() int64            { return a.op.Minimum }
 func (a *attrWrapper) DefaultValue() interface{} { return a.api.DefaultValue }
 
 type argWrapper struct {
-	op  *pb.OpDef_ArgDef
-	api *pb.ApiDef_Arg
+	op  *odpb.OpDef_ArgDef
+	api *adpb.ApiDef_Arg
 }
 
 func (a *argWrapper) Name() string        { return a.api.Name }
@@ -379,8 +380,8 @@ func (a *argWrapper) Description() string { return a.api.Description }
 func (a *argWrapper) IsListArg() bool     { return isListArg(a.op) }
 
 type tmplArgs struct {
-	Op     *pb.OpDef
-	APIDef *pb.ApiDef
+	Op     *odpb.OpDef
+	APIDef *adpb.ApiDef
 	// Op.Attr is split into two categories
 	// (1) Required: These must be specified by the client and are thus
 	//     included in the function signature.
@@ -394,7 +395,7 @@ type tmplArgs struct {
 	OutArgs         []*argWrapper
 }
 
-func newTmplArgs(op *pb.OpDef, apidef *pb.ApiDef) (*tmplArgs, error) {
+func newTmplArgs(op *odpb.OpDef, apidef *adpb.ApiDef) (*tmplArgs, error) {
 	ret := tmplArgs{Op: op, APIDef: apidef}
 
 	// Setup InArgs field
@@ -552,11 +553,11 @@ func identifier(s string) string {
 	return s
 }
 
-func isListArg(argdef *pb.OpDef_ArgDef) bool {
+func isListArg(argdef *odpb.OpDef_ArgDef) bool {
 	return argdef.TypeListAttr != "" || argdef.NumberAttr != ""
 }
 
-func isListAttr(attrdef *pb.OpDef_AttrDef) bool {
+func isListAttr(attrdef *odpb.OpDef_AttrDef) bool {
 	list, _ := parseTFType(attrdef.Type)
 	return list
 }
diff --git a/tensorflow/go/genop/internal/genop_test.go b/tensorflow/go/genop/internal/genop_test.go
index acce6dea67c..51f3878f919 100644
--- a/tensorflow/go/genop/internal/genop_test.go
+++ b/tensorflow/go/genop/internal/genop_test.go
@@ -22,13 +22,14 @@ import (
 	"testing"
 
 	"github.com/golang/protobuf/proto"
-	pb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/github.com/tensorflow/tensorflow/tensorflow/go/core/framework"
+	adpb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/github.com/tensorflow/tensorflow/tensorflow/go/core/framework/api_def_go_proto"
+	odpb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/github.com/tensorflow/tensorflow/tensorflow/go/core/framework/op_def_go_proto"
 )
 
 // Creates an ApiDef based on opdef and applies overrides
 // from apidefText (ApiDef text proto).
-func GetAPIDef(t *testing.T, opdef *pb.OpDef, apidefText string) *pb.ApiDef {
-	opdefList := &pb.OpList{Op: []*pb.OpDef{opdef}}
+func GetAPIDef(t *testing.T, opdef *odpb.OpDef, apidefText string) *adpb.ApiDef {
+	opdefList := &odpb.OpList{Op: []*odpb.OpDef{opdef}}
 	apimap, err := newAPIDefMap(opdefList)
 	if err != nil {
 		t.Fatal(err)
@@ -538,8 +539,8 @@ func TestOp(scope *Scope, bb tf.Output, aa tf.Output, optional ...TestOpAttr) (c
 
 	for _, test := range testdata {
 		t.Run(test.tag, func(t *testing.T) {
-			var opdef pb.OpDef
-			var apidef *pb.ApiDef
+			var opdef odpb.OpDef
+			var apidef *adpb.ApiDef
 			var buf bytes.Buffer
 			if err := proto.UnmarshalText(test.opdef, &opdef); err != nil {
 				t.Fatal(err)
diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go
index f1b2f3ee2e7..98e11497915 100644
--- a/tensorflow/go/op/wrappers.go
+++ b/tensorflow/go/op/wrappers.go
@@ -38,119 +38,6 @@ func makeOutputList(op *tf.Operation, start int, output string) ([]tf.Output, in
 	return list, start + size, nil
 }
 
-// Generates fingerprint values.
-//
-// Generates fingerprint values of `data`.
-//
-// Fingerprint op considers the first dimension of `data` as the batch dimension,
-// and `output[i]` contains the fingerprint value generated from contents in
-// `data[i, ...]` for all `i`.
-//
-// Fingerprint op writes fingerprint values as byte arrays. For example, the
-// default method `farmhash64` generates a 64-bit fingerprint value at a time.
-// This 8-byte value is written out as an `uint8` array of size 8, in little-endian
-// order.
-//
-// For example, suppose that `data` has data type `DT_INT32` and shape (2, 3, 4),
-// and that the fingerprint method is `farmhash64`. In this case, the output shape
-// is (2, 8), where 2 is the batch dimension size of `data`, and 8 is the size of
-// each fingerprint value in bytes. `output[0, :]` is generated from 12 integers in
-// `data[0, :, :]` and similarly `output[1, :]` is generated from other 12 integers
-// in `data[1, :, :]`.
-//
-// Note that this op fingerprints the raw underlying buffer, and it does not
-// fingerprint Tensor's metadata such as data type and/or shape. For example, the
-// fingerprint values are invariant under reshapes and bitcasts as long as the
-// batch dimension remain the same:
-//
-// ```
-// Fingerprint(data) == Fingerprint(Reshape(data, ...))
-// Fingerprint(data) == Fingerprint(Bitcast(data, ...))
-// ```
-//
-// For string data, one should expect `Fingerprint(data) !=
-// Fingerprint(ReduceJoin(data))` in general.
-//
-// Arguments:
-//	data: Must have rank 1 or higher.
-//	method: Fingerprint method used by this op. Currently available method is
-// `farmhash::fingerprint64`.
-//
-// Returns A two-dimensional `Tensor` of type `tf.uint8`. The first dimension equals to
-// `data`'s first dimension, and the second dimension size depends on the
-// fingerprint algorithm.
-func Fingerprint(scope *Scope, data tf.Output, method tf.Output) (fingerprint tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Fingerprint",
-		Input: []tf.Input{
-			data, method,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// FakeQuantWithMinMaxVarsPerChannelGradientAttr is an optional argument to FakeQuantWithMinMaxVarsPerChannelGradient.
-type FakeQuantWithMinMaxVarsPerChannelGradientAttr func(optionalAttr)
-
-// FakeQuantWithMinMaxVarsPerChannelGradientNumBits sets the optional num_bits attribute to value.
-//
-// value: The bitwidth of the quantization; between 2 and 16, inclusive.
-// If not specified, defaults to 8
-func FakeQuantWithMinMaxVarsPerChannelGradientNumBits(value int64) FakeQuantWithMinMaxVarsPerChannelGradientAttr {
-	return func(m optionalAttr) {
-		m["num_bits"] = value
-	}
-}
-
-// FakeQuantWithMinMaxVarsPerChannelGradientNarrowRange sets the optional narrow_range attribute to value.
-//
-// value: Whether to quantize into 2^num_bits - 1 distinct values.
-// If not specified, defaults to false
-func FakeQuantWithMinMaxVarsPerChannelGradientNarrowRange(value bool) FakeQuantWithMinMaxVarsPerChannelGradientAttr {
-	return func(m optionalAttr) {
-		m["narrow_range"] = value
-	}
-}
-
-// Compute gradients for a FakeQuantWithMinMaxVarsPerChannel operation.
-//
-// Arguments:
-//	gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation,
-// shape one of: `[d]`, `[b, d]`,  `[b, h, w, d]`.
-//	inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation, shape
-//   same as `gradients`.
-// min, max: Quantization interval, floats of shape `[d]`.
-//
-//
-//
-// Returns Backpropagated gradients w.r.t. inputs, shape same as
-// `inputs`:
-//   `gradients * (inputs >= min && inputs <= max)`.Backpropagated gradients w.r.t. min parameter, shape `[d]`:
-// `sum_per_d(gradients * (inputs < min))`.Backpropagated gradients w.r.t. max parameter, shape `[d]`:
-// `sum_per_d(gradients * (inputs > max))`.
-func FakeQuantWithMinMaxVarsPerChannelGradient(scope *Scope, gradients tf.Output, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsPerChannelGradientAttr) (backprops_wrt_input tf.Output, backprop_wrt_min tf.Output, backprop_wrt_max tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FakeQuantWithMinMaxVarsPerChannelGradient",
-		Input: []tf.Input{
-			gradients, inputs, min, max,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
 // FakeQuantWithMinMaxVarsGradientAttr is an optional argument to FakeQuantWithMinMaxVarsGradient.
 type FakeQuantWithMinMaxVarsGradientAttr func(optionalAttr)
 
@@ -183,9 +70,12 @@ func FakeQuantWithMinMaxVarsGradientNarrowRange(value bool) FakeQuantWithMinMaxV
 //
 //
 //
-// Returns Backpropagated gradients w.r.t. inputs:
-// `gradients * (inputs >= min && inputs <= max)`.Backpropagated gradients w.r.t. min parameter:
-// `sum(gradients * (inputs < min))`.Backpropagated gradients w.r.t. max parameter:
+// Returns:
+//	backprops_wrt_input: Backpropagated gradients w.r.t. inputs:
+// `gradients * (inputs >= min && inputs <= max)`.
+//	backprop_wrt_min: Backpropagated gradients w.r.t. min parameter:
+// `sum(gradients * (inputs < min))`.
+//	backprop_wrt_max: Backpropagated gradients w.r.t. max parameter:
 // `sum(gradients * (inputs > max))`.
 func FakeQuantWithMinMaxVarsGradient(scope *Scope, gradients tf.Output, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsGradientAttr) (backprops_wrt_input tf.Output, backprop_wrt_min tf.Output, backprop_wrt_max tf.Output) {
 	if scope.Err() != nil {
@@ -206,47 +96,50 @@ func FakeQuantWithMinMaxVarsGradient(scope *Scope, gradients tf.Output, inputs t
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// FakeQuantWithMinMaxVarsAttr is an optional argument to FakeQuantWithMinMaxVars.
-type FakeQuantWithMinMaxVarsAttr func(optionalAttr)
+// FakeQuantWithMinMaxArgsGradientAttr is an optional argument to FakeQuantWithMinMaxArgsGradient.
+type FakeQuantWithMinMaxArgsGradientAttr func(optionalAttr)
 
-// FakeQuantWithMinMaxVarsNumBits sets the optional num_bits attribute to value.
+// FakeQuantWithMinMaxArgsGradientMin sets the optional min attribute to value.
+// If not specified, defaults to -6
+func FakeQuantWithMinMaxArgsGradientMin(value float32) FakeQuantWithMinMaxArgsGradientAttr {
+	return func(m optionalAttr) {
+		m["min"] = value
+	}
+}
+
+// FakeQuantWithMinMaxArgsGradientMax sets the optional max attribute to value.
+// If not specified, defaults to 6
+func FakeQuantWithMinMaxArgsGradientMax(value float32) FakeQuantWithMinMaxArgsGradientAttr {
+	return func(m optionalAttr) {
+		m["max"] = value
+	}
+}
+
+// FakeQuantWithMinMaxArgsGradientNumBits sets the optional num_bits attribute to value.
 // If not specified, defaults to 8
-func FakeQuantWithMinMaxVarsNumBits(value int64) FakeQuantWithMinMaxVarsAttr {
+func FakeQuantWithMinMaxArgsGradientNumBits(value int64) FakeQuantWithMinMaxArgsGradientAttr {
 	return func(m optionalAttr) {
 		m["num_bits"] = value
 	}
 }
 
-// FakeQuantWithMinMaxVarsNarrowRange sets the optional narrow_range attribute to value.
+// FakeQuantWithMinMaxArgsGradientNarrowRange sets the optional narrow_range attribute to value.
 // If not specified, defaults to false
-func FakeQuantWithMinMaxVarsNarrowRange(value bool) FakeQuantWithMinMaxVarsAttr {
+func FakeQuantWithMinMaxArgsGradientNarrowRange(value bool) FakeQuantWithMinMaxArgsGradientAttr {
 	return func(m optionalAttr) {
 		m["narrow_range"] = value
 	}
 }
 
-// Fake-quantize the 'inputs' tensor of type float via global float scalars `min`
+// Compute gradients for a FakeQuantWithMinMaxArgs operation.
 //
-// and `max` to 'outputs' tensor of same shape as `inputs`.
+// Arguments:
+//	gradients: Backpropagated gradients above the FakeQuantWithMinMaxArgs operation.
+//	inputs: Values passed as inputs to the FakeQuantWithMinMaxArgs operation.
 //
-// `[min; max]` define the clamping range for the `inputs` data.
-// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
-// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
-// then de-quantized and output as floats in `[min; max]` interval.
-// `num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive.
-//
-// Before quantization, `min` and `max` values are adjusted with the following
-// logic.
-// It is suggested to have `min <= 0 <= max`. If `0` is not in the range of values,
-// the behavior can be unexpected:
-// If `0 < min < max`: `min_adj = 0` and `max_adj = max - min`.
-// If `min < max < 0`: `min_adj = min - max` and `max_adj = 0`.
-// If `min <= 0 <= max`: `scale = (max - min) / (2^num_bits - 1) `,
-// `min_adj = scale * round(min / scale)` and `max_adj = max + min_adj - min`.
-//
-// This operation has a gradient and thus allows for training `min` and `max`
-// values.
-func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsAttr) (outputs tf.Output) {
+// Returns Backpropagated gradients below the FakeQuantWithMinMaxArgs operation:
+// `gradients * (inputs >= min && inputs <= max)`.
+func FakeQuantWithMinMaxArgsGradient(scope *Scope, gradients tf.Output, inputs tf.Output, optional ...FakeQuantWithMinMaxArgsGradientAttr) (backprops tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -255,9 +148,9 @@ func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "FakeQuantWithMinMaxVars",
+		Type: "FakeQuantWithMinMaxArgsGradient",
 		Input: []tf.Input{
-			inputs, min, max,
+			gradients, inputs,
 		},
 		Attrs: attrs,
 	}
@@ -265,287 +158,263 @@ func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max
 	return op.Output(0)
 }
 
-// LowerBoundAttr is an optional argument to LowerBound.
-type LowerBoundAttr func(optionalAttr)
-
-// LowerBoundOutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_INT32
-func LowerBoundOutType(value tf.DataType) LowerBoundAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Applies lower_bound(sorted_search_values, values) along each row.
+// Applies sparse addition to `input` using individual values or slices
 //
-// Each set of rows with the same index in (sorted_inputs, values) is treated
-// independently.  The resulting row is the equivalent of calling
-// `np.searchsorted(sorted_inputs, values, side='left')`.
+// from `updates` according to indices `indices`.  The updates are non-aliasing:
+// `input` is only modified in-place if no other operations will use it.
+// Otherwise, a copy of `input` is made.  This operation has a gradient with
+// respect to both `input` and `updates`.
 //
-// The result is not a global index to the entire
-// `Tensor`, but rather just the index in the last dimension.
+// `input` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
 //
-// A 2-D example:
-//   sorted_sequence = [[0, 3, 9, 9, 10],
-//                      [1, 2, 3, 4, 5]]
-//   values = [[2, 4, 9],
-//             [0, 2, 6]]
+// `indices` must be integer tensor, containing indices into `input`.
+// It must be shape \\([d_0, ..., d_{Q-2}, K]\\) where `0 < K <= P`.
 //
-//   result = LowerBound(sorted_sequence, values)
+// The innermost dimension of `indices` (with length `K`) corresponds to
+// indices into elements (if `K = P`) or `(P-K)`-dimensional slices
+// (if `K < P`) along the `K`th dimension of `input`.
 //
-//   result == [[1, 2, 2],
-//              [0, 1, 5]]
+// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
+//
+// $$[d_0, ..., d_{Q-2}, input.shape[K], ..., input.shape[P-1]].$$
+//
+// For example, say we want to add 4 scattered elements to a rank-1 tensor to 8
+// elements. In Python, that addition would look like this:
+//
+//     input = tf.constant([1, 2, 3, 4, 5, 6, 7, 8])
+//     indices = tf.constant([[4], [3], [1], [7]])
+//     updates = tf.constant([9, 10, 11, 12])
+//     output = tf.scatter_nd_non_aliasing_add(input, indices, updates)
+//     with tf.Session() as sess:
+//       print(sess.run(output))
+//
+// The resulting value `output` would look like this:
+//
+//     [1, 13, 3, 14, 14, 6, 7, 20]
+//
+// See `tf.scatter_nd` for more details about how to make updates to slices.
 //
 // Arguments:
-//	sorted_inputs: 2-D Tensor where each row is ordered.
-//	values: 2-D Tensor with the same numbers of rows as `sorted_search_values`. Contains
-// the values that will be searched for in `sorted_search_values`.
+//	input: A Tensor.
+//	indices: A Tensor. Must be one of the following types: `int32`, `int64`.
+// A tensor of indices into `input`.
+//	updates: A Tensor. Must have the same type as ref. A tensor of updated values
+// to add to `input`.
 //
-// Returns A `Tensor` with the same shape as `values`.  It contains the first scalar index
-// into the last dimension where values can be inserted without changing the
-// ordered property.
-func LowerBound(scope *Scope, sorted_inputs tf.Output, values tf.Output, optional ...LowerBoundAttr) (output tf.Output) {
+// Returns A `Tensor` with the same shape as `input`, containing values of `input`
+// updated with `updates`.
+func ScatterNdNonAliasingAdd(scope *Scope, input tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "LowerBound",
+		Type: "ScatterNdNonAliasingAdd",
 		Input: []tf.Input{
-			sorted_inputs, values,
+			input, indices, updates,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Concatenates quantized tensors along one dimension.
+// Subtracts sparse `updates` from an existing tensor according to `indices`.
+//
+// This operation creates a new tensor by subtracting sparse `updates` from the
+// passed in `tensor`.
+// This operation is very similar to `tf.scatter_nd_sub`, except that the updates
+// are subtracted from an existing tensor (as opposed to a variable). If the memory
+// for the existing tensor cannot be re-used, a copy is made and updated.
+//
+// `indices` is an integer tensor containing indices into a new tensor of shape
+// `shape`.  The last dimension of `indices` can be at most the rank of `shape`:
+//
+//     indices.shape[-1] <= shape.rank
+//
+// The last dimension of `indices` corresponds to indices into elements
+// (if `indices.shape[-1] = shape.rank`) or slices
+// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of
+// `shape`.  `updates` is a tensor with shape
+//
+//     indices.shape[:-1] + shape[indices.shape[-1]:]
+//
+// The simplest form of tensor_scatter_sub is to subtract individual elements
+// from a tensor by index. For example, say we want to insert 4 scattered elements
+// in a rank-1 tensor with 8 elements.
+//
+// In Python, this scatter subtract operation would look like this:
+//
+// ```python
+//     indices = tf.constant([[4], [3], [1], [7]])
+//     updates = tf.constant([9, 10, 11, 12])
+//     tensor = tf.ones([8], dtype=tf.int32)
+//     updated = tf.tensor_scatter_nd_sub(tensor, indices, updates)
+//     print(updated)
+// ```
+//
+// The resulting tensor would look like this:
+//
+//     [1, -10, 1, -9, -8, 1, 1, -11]
+//
+// We can also, insert entire slices of a higher rank tensor all at once. For
+// example, if we wanted to insert two slices in the first dimension of a
+// rank-3 tensor with two matrices of new values.
+//
+// In Python, this scatter add operation would look like this:
+//
+// ```python
+//     indices = tf.constant([[0], [2]])
+//     updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6],
+//                             [7, 7, 7, 7], [8, 8, 8, 8]],
+//                            [[5, 5, 5, 5], [6, 6, 6, 6],
+//                             [7, 7, 7, 7], [8, 8, 8, 8]]])
+//     tensor = tf.ones([4, 4, 4],dtype=tf.int32)
+//     updated = tf.tensor_scatter_nd_sub(tensor, indices, updates)
+//     print(updated)
+// ```
+//
+// The resulting tensor would look like this:
+//
+//     [[[-4, -4, -4, -4], [-5, -5, -5, -5], [-6, -6, -6, -6], [-7, -7, -7, -7]],
+//      [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
+//      [[-4, -4, -4, -4], [-5, -5, -5, -5], [-6, -6, -6, -6], [-7, -7, -7, -7]],
+//      [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]]
+//
+// Note that on CPU, if an out of bound index is found, an error is returned.
+// On GPU, if an out of bound index is found, the index is ignored.
 //
 // Arguments:
-//	concat_dim: 0-D.  The dimension along which to concatenate.  Must be in the
-// range [0, rank(values)).
-//	values: The `N` Tensors to concatenate. Their ranks and types must match,
-// and their sizes must match in all dimensions except `concat_dim`.
-//	input_mins: The minimum scalar values for each of the input tensors.
-//	input_maxes: The maximum scalar values for each of the input tensors.
+//	tensor: Tensor to copy/update.
+//	indices: Index tensor.
+//	updates: Updates to scatter into output.
 //
-// Returns A `Tensor` with the concatenation of values stacked along the
-// `concat_dim` dimension.  This tensor's shape matches that of `values` except
-// in `concat_dim` where it has the sum of the sizes.The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents.
-func QuantizedConcat(scope *Scope, concat_dim tf.Output, values []tf.Output, input_mins []tf.Output, input_maxes []tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) {
+// Returns A new tensor copied from tensor and updates subtracted according to the indices.
+func TensorScatterSub(scope *Scope, tensor tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "QuantizedConcat",
+		Type: "TensorScatterSub",
 		Input: []tf.Input{
-			concat_dim, tf.OutputList(values), tf.OutputList(input_mins), tf.OutputList(input_maxes),
+			tensor, indices, updates,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Adds sparse `updates` to an existing tensor according to `indices`.
+//
+// This operation creates a new tensor by adding sparse `updates` to the passed
+// in `tensor`.
+// This operation is very similar to `tf.scatter_nd_add`, except that the updates
+// are added onto an existing tensor (as opposed to a variable). If the memory
+// for the existing tensor cannot be re-used, a copy is made and updated.
+//
+// `indices` is an integer tensor containing indices into a new tensor of shape
+// `shape`.  The last dimension of `indices` can be at most the rank of `shape`:
+//
+//     indices.shape[-1] <= shape.rank
+//
+// The last dimension of `indices` corresponds to indices into elements
+// (if `indices.shape[-1] = shape.rank`) or slices
+// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of
+// `shape`.  `updates` is a tensor with shape
+//
+//     indices.shape[:-1] + shape[indices.shape[-1]:]
+//
+// The simplest form of tensor_scatter_add is to add individual elements to a
+// tensor by index. For example, say we want to add 4 elements in a rank-1
+// tensor with 8 elements.
+//
+// In Python, this scatter add operation would look like this:
+//
+// ```python
+//     indices = tf.constant([[4], [3], [1], [7]])
+//     updates = tf.constant([9, 10, 11, 12])
+//     tensor = tf.ones([8], dtype=tf.int32)
+//     updated = tf.tensor_scatter_nd_add(tensor, indices, updates)
+//     print(updated)
+// ```
+//
+// The resulting tensor would look like this:
+//
+//     [1, 12, 1, 11, 10, 1, 1, 13]
+//
+// We can also, insert entire slices of a higher rank tensor all at once. For
+// example, if we wanted to insert two slices in the first dimension of a
+// rank-3 tensor with two matrices of new values.
+//
+// In Python, this scatter add operation would look like this:
+//
+// ```python
+//     indices = tf.constant([[0], [2]])
+//     updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6],
+//                             [7, 7, 7, 7], [8, 8, 8, 8]],
+//                            [[5, 5, 5, 5], [6, 6, 6, 6],
+//                             [7, 7, 7, 7], [8, 8, 8, 8]]])
+//     tensor = tf.ones([4, 4, 4],dtype=tf.int32)
+//     updated = tf.tensor_scatter_nd_add(tensor, indices, updates)
+//     print(updated)
+// ```
+//
+// The resulting tensor would look like this:
+//
+//     [[[6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8], [9, 9, 9, 9]],
+//      [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
+//      [[6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8], [9, 9, 9, 9]],
+//      [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]]
+//
+// Note that on CPU, if an out of bound index is found, an error is returned.
+// On GPU, if an out of bound index is found, the index is ignored.
+//
+// Arguments:
+//	tensor: Tensor to copy/update.
+//	indices: Index tensor.
+//	updates: Updates to scatter into output.
+//
+// Returns A new tensor copied from tensor and updates added according to the indices.
+func TensorScatterAdd(scope *Scope, tensor tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorScatterAdd",
+		Input: []tf.Input{
+			tensor, indices, updates,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Reshapes a quantized tensor as per the Reshape op.
+//
+// ```
+//
+// Arguments:
+//
+//	shape: Defines the shape of the output tensor.
+//	input_min: The minimum value of the input.
+//	input_max: The maximum value of the input.
+//
+// Returns:
+//	output
+//	output_min: This value is copied from input_min.
+//	output_max: This value is copied from input_max.
+func QuantizedReshape(scope *Scope, tensor tf.Output, shape tf.Output, input_min tf.Output, input_max tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizedReshape",
+		Input: []tf.Input{
+			tensor, shape, input_min, input_max,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// QuantizeV2Attr is an optional argument to QuantizeV2.
-type QuantizeV2Attr func(optionalAttr)
-
-// QuantizeV2Mode sets the optional mode attribute to value.
-// If not specified, defaults to "MIN_COMBINED"
-func QuantizeV2Mode(value string) QuantizeV2Attr {
-	return func(m optionalAttr) {
-		m["mode"] = value
-	}
-}
-
-// QuantizeV2RoundMode sets the optional round_mode attribute to value.
-// If not specified, defaults to "HALF_AWAY_FROM_ZERO"
-func QuantizeV2RoundMode(value string) QuantizeV2Attr {
-	return func(m optionalAttr) {
-		m["round_mode"] = value
-	}
-}
-
-// Quantize the 'input' tensor of type float to 'output' tensor of type 'T'.
-//
-// [min_range, max_range] are scalar floats that specify the range for
-// the 'input' data. The 'mode' attribute controls exactly which calculations are
-// used to convert the float values to their quantized equivalents.  The
-// 'round_mode' attribute controls which rounding tie-breaking algorithm is used
-// when rounding float values to their quantized equivalents.
-//
-// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
-//
-// ```
-// out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
-// if T == qint8: out[i] -= (range(T) + 1) / 2.0
-// ```
-//
-// here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
-//
-// *MIN_COMBINED Mode Example*
-//
-// Assume the input is type float and has a possible range of [0.0, 6.0] and the
-// output type is quint8 ([0, 255]). The min_range and max_range values should be
-// specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each
-// value of the input by 255/6 and cast to quint8.
-//
-// If the output type was qint8 ([-128, 127]), the operation will additionally
-// subtract each value by 128 prior to casting, so that the range of values aligns
-// with the range of qint8.
-//
-// If the mode is 'MIN_FIRST', then this approach is used:
-//
-// ```
-// num_discrete_values = 1 << (# of bits in T)
-// range_adjust = num_discrete_values / (num_discrete_values - 1)
-// range = (range_max - range_min) * range_adjust
-// range_scale = num_discrete_values / range
-// quantized = round(input * range_scale) - round(range_min * range_scale) +
-//   numeric_limits<T>::min()
-// quantized = max(quantized, numeric_limits<T>::min())
-// quantized = min(quantized, numeric_limits<T>::max())
-// ```
-//
-// The biggest difference between this and MIN_COMBINED is that the minimum range
-// is rounded first, before it's subtracted from the rounded value. With
-// MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing
-// and dequantizing will introduce a larger and larger error.
-//
-// *SCALED mode Example*
-//
-// `SCALED` mode matches the quantization approach used in
-// `QuantizeAndDequantize{V2|V3}`.
-//
-// If the mode is `SCALED`, we do not use the full range of the output type,
-// choosing to elide the lowest possible value for symmetry (e.g., output range is
-// -127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to
-// 0.
-//
-// We first find the range of values in our tensor. The
-// range we use is always centered on 0, so we find m such that
-//
-// ```c++
-//   m = max(abs(input_min), abs(input_max))
-// ```
-//
-// Our input tensor range is then `[-m, m]`.
-//
-// Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`.
-// If T is signed, this is
-//
-// ```
-//   num_bits = sizeof(T) * 8
-//   [min_fixed, max_fixed] =
-//       [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1]
-// ```
-//
-// Otherwise, if T is unsigned, the fixed-point range is
-//
-// ```
-//   [min_fixed, max_fixed] = [0, (1 << num_bits) - 1]
-// ```
-//
-// From this we compute our scaling factor, s:
-//
-// ```c++
-//   s = (max_fixed - min_fixed) / (2 * m)
-// ```
-//
-// Now we can quantize the elements of our tensor:
-//
-// ```c++
-// result = round(input * s)
-// ```
-//
-// One thing to watch out for is that the operator may choose to adjust the
-// requested minimum and maximum values slightly during the quantization process,
-// so you should always use the output ports as the range for further calculations.
-// For example, if the requested minimum and maximum values are close to equal,
-// they will be separated by a small epsilon value to prevent ill-formed quantized
-// buffers from being created. Otherwise, you can end up with buffers where all the
-// quantized values map to the same float value, which causes problems for
-// operations that have to perform further calculations on them.
-//
-// Arguments:
-//
-//	min_range: The minimum scalar value possibly produced for the input.
-//	max_range: The maximum scalar value possibly produced for the input.
-//
-//
-// Returns The quantized data produced from the float input.The actual minimum scalar value used for the output.The actual maximum scalar value used for the output.
-func QuantizeV2(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, T tf.DataType, optional ...QuantizeV2Attr) (output tf.Output, output_min tf.Output, output_max tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"T": T}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizeV2",
-		Input: []tf.Input{
-			input, min_range, max_range,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// QuantizeAndDequantizeV3Attr is an optional argument to QuantizeAndDequantizeV3.
-type QuantizeAndDequantizeV3Attr func(optionalAttr)
-
-// QuantizeAndDequantizeV3SignedInput sets the optional signed_input attribute to value.
-// If not specified, defaults to true
-func QuantizeAndDequantizeV3SignedInput(value bool) QuantizeAndDequantizeV3Attr {
-	return func(m optionalAttr) {
-		m["signed_input"] = value
-	}
-}
-
-// QuantizeAndDequantizeV3RangeGiven sets the optional range_given attribute to value.
-// If not specified, defaults to true
-func QuantizeAndDequantizeV3RangeGiven(value bool) QuantizeAndDequantizeV3Attr {
-	return func(m optionalAttr) {
-		m["range_given"] = value
-	}
-}
-
-// QuantizeAndDequantizeV3NarrowRange sets the optional narrow_range attribute to value.
-// If not specified, defaults to false
-func QuantizeAndDequantizeV3NarrowRange(value bool) QuantizeAndDequantizeV3Attr {
-	return func(m optionalAttr) {
-		m["narrow_range"] = value
-	}
-}
-
-// Quantizes then dequantizes a tensor.
-//
-// This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a
-// tensor, so its value can change during training.
-func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, num_bits tf.Output, optional ...QuantizeAndDequantizeV3Attr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizeAndDequantizeV3",
-		Input: []tf.Input{
-			input, input_min, input_max, num_bits,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // QuantizeAndDequantizeV2Attr is an optional argument to QuantizeAndDequantizeV2.
 type QuantizeAndDequantizeV2Attr func(optionalAttr)
 
@@ -609,6 +478,17 @@ func QuantizeAndDequantizeV2NarrowRange(value bool) QuantizeAndDequantizeV2Attr
 	}
 }
 
+// QuantizeAndDequantizeV2Axis sets the optional axis attribute to value.
+//
+// value: If specified, this axis is treated as a channel or slice axis, and a separate
+// quantization range is used for each channel or slice along this axis.
+// If not specified, defaults to -1
+func QuantizeAndDequantizeV2Axis(value int64) QuantizeAndDequantizeV2Attr {
+	return func(m optionalAttr) {
+		m["axis"] = value
+	}
+}
+
 // Quantizes then dequantizes a tensor.
 //
 // This op simulates the precision loss from the quantized forward pass by:
@@ -691,34 +571,355 @@ func QuantizeAndDequantizeV2(scope *Scope, input tf.Output, input_min tf.Output,
 	return op.Output(0)
 }
 
-// Extract `patches` from `images` and put them in the "depth" output dimension.
+// QuantizeAndDequantizeAttr is an optional argument to QuantizeAndDequantize.
+type QuantizeAndDequantizeAttr func(optionalAttr)
+
+// QuantizeAndDequantizeSignedInput sets the optional signed_input attribute to value.
+// If not specified, defaults to true
+func QuantizeAndDequantizeSignedInput(value bool) QuantizeAndDequantizeAttr {
+	return func(m optionalAttr) {
+		m["signed_input"] = value
+	}
+}
+
+// QuantizeAndDequantizeNumBits sets the optional num_bits attribute to value.
+// If not specified, defaults to 8
+func QuantizeAndDequantizeNumBits(value int64) QuantizeAndDequantizeAttr {
+	return func(m optionalAttr) {
+		m["num_bits"] = value
+	}
+}
+
+// QuantizeAndDequantizeRangeGiven sets the optional range_given attribute to value.
+// If not specified, defaults to false
+func QuantizeAndDequantizeRangeGiven(value bool) QuantizeAndDequantizeAttr {
+	return func(m optionalAttr) {
+		m["range_given"] = value
+	}
+}
+
+// QuantizeAndDequantizeInputMin sets the optional input_min attribute to value.
+// If not specified, defaults to 0
+func QuantizeAndDequantizeInputMin(value float32) QuantizeAndDequantizeAttr {
+	return func(m optionalAttr) {
+		m["input_min"] = value
+	}
+}
+
+// QuantizeAndDequantizeInputMax sets the optional input_max attribute to value.
+// If not specified, defaults to 0
+func QuantizeAndDequantizeInputMax(value float32) QuantizeAndDequantizeAttr {
+	return func(m optionalAttr) {
+		m["input_max"] = value
+	}
+}
+
+// Use QuantizeAndDequantizeV2 instead.
 //
-// Arguments:
-//	images: 4-D Tensor with shape `[batch, in_rows, in_cols, depth]`.
-//	ksizes: The size of the sliding window for each dimension of `images`.
-//	strides: How far the centers of two consecutive patches are in
-// the images. Must be: `[1, stride_rows, stride_cols, 1]`.
-//	rates: Must be: `[1, rate_rows, rate_cols, 1]`. This is the
-// input stride, specifying how far two consecutive patch samples are in the
-// input. Equivalent to extracting patches with
-// `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
-// subsampling them spatially by a factor of `rates`. This is equivalent to
-// `rate` in dilated (a.k.a. Atrous) convolutions.
-//	padding: The type of padding algorithm to use.
-//
-// Returns 4-D Tensor with shape `[batch, out_rows, out_cols, ksize_rows *
-// ksize_cols * depth]` containing image patches with size
-// `ksize_rows x ksize_cols x depth` vectorized in the "depth" dimension. Note
-// `out_rows` and `out_cols` are the dimensions of the output patches.
-func ExtractImagePatches(scope *Scope, images tf.Output, ksizes []int64, strides []int64, rates []int64, padding string) (patches tf.Output) {
+// DEPRECATED at GraphDef version 22: Replaced by QuantizeAndDequantizeV2
+func QuantizeAndDequantize(scope *Scope, input tf.Output, optional ...QuantizeAndDequantizeAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksizes": ksizes, "strides": strides, "rates": rates, "padding": padding}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "ExtractImagePatches",
+		Type: "QuantizeAndDequantize",
 		Input: []tf.Input{
-			images,
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// OneHotAttr is an optional argument to OneHot.
+type OneHotAttr func(optionalAttr)
+
+// OneHotAxis sets the optional axis attribute to value.
+//
+// value: The axis to fill (default: -1, a new inner-most axis).
+// If not specified, defaults to -1
+func OneHotAxis(value int64) OneHotAttr {
+	return func(m optionalAttr) {
+		m["axis"] = value
+	}
+}
+
+// Returns a one-hot tensor.
+//
+// The locations represented by indices in `indices` take value `on_value`,
+// while all other locations take value `off_value`.
+//
+// If the input `indices` is rank `N`, the output will have rank `N+1`,
+// The new axis is created at dimension `axis` (default: the new axis is
+// appended at the end).
+//
+// If `indices` is a scalar the output shape will be a vector of length `depth`.
+//
+// If `indices` is a vector of length `features`, the output shape will be:
+// ```
+//   features x depth if axis == -1
+//   depth x features if axis == 0
+// ```
+//
+// If `indices` is a matrix (batch) with shape `[batch, features]`,
+// the output shape will be:
+// ```
+//   batch x features x depth if axis == -1
+//   batch x depth x features if axis == 1
+//   depth x batch x features if axis == 0
+// ```
+//
+//
+// Examples
+// =========
+//
+// Suppose that
+// ```
+//   indices = [0, 2, -1, 1]
+//   depth = 3
+//   on_value = 5.0
+//   off_value = 0.0
+//   axis = -1
+// ```
+//
+// Then output is `[4 x 3]`:
+// ```
+// output =
+//   [5.0 0.0 0.0]  // one_hot(0)
+//   [0.0 0.0 5.0]  // one_hot(2)
+//   [0.0 0.0 0.0]  // one_hot(-1)
+//   [0.0 5.0 0.0]  // one_hot(1)
+// ```
+//
+// Suppose that
+// ```
+//   indices = [0, 2, -1, 1]
+//   depth = 3
+//   on_value = 0.0
+//   off_value = 3.0
+//   axis = 0
+// ```
+//
+// Then output is `[3 x 4]`:
+// ```
+// output =
+//   [0.0 3.0 3.0 3.0]
+//   [3.0 3.0 3.0 0.0]
+//   [3.0 3.0 3.0 3.0]
+//   [3.0 0.0 3.0 3.0]
+// //  ^                one_hot(0)
+// //      ^            one_hot(2)
+// //          ^        one_hot(-1)
+// //              ^    one_hot(1)
+// ```
+//
+// Suppose that
+// ```
+//   indices = [[0, 2], [1, -1]]
+//   depth = 3
+//   on_value = 1.0
+//   off_value = 0.0
+//   axis = -1
+// ```
+//
+// Then output is `[2 x 2 x 3]`:
+// ```
+// output =
+//   [
+//     [1.0, 0.0, 0.0]  // one_hot(0)
+//     [0.0, 0.0, 1.0]  // one_hot(2)
+//   ][
+//     [0.0, 1.0, 0.0]  // one_hot(1)
+//     [0.0, 0.0, 0.0]  // one_hot(-1)
+//   ]
+// ```
+//
+// Arguments:
+//	indices: A tensor of indices.
+//	depth: A scalar defining the depth of the one hot dimension.
+//	on_value: A scalar defining the value to fill in output when `indices[j] = i`.
+//	off_value: A scalar defining the value to fill in output when `indices[j] != i`.
+//
+// Returns The one-hot tensor.
+func OneHot(scope *Scope, indices tf.Output, depth tf.Output, on_value tf.Output, off_value tf.Output, optional ...OneHotAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "OneHot",
+		Input: []tf.Input{
+			indices, depth, on_value, off_value,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Extract `patches` from `input` and put them in the "depth" output dimension. 3D extension of `extract_image_patches`.
+//
+// Arguments:
+//	input: 5-D Tensor with shape `[batch, in_planes, in_rows, in_cols, depth]`.
+//	ksizes: The size of the sliding window for each dimension of `input`.
+//	strides: 1-D of length 5. How far the centers of two consecutive patches are in
+// `input`. Must be: `[1, stride_planes, stride_rows, stride_cols, 1]`.
+//	padding: The type of padding algorithm to use.
+//
+// We specify the size-related attributes as:
+//
+// ```python
+//       ksizes = [1, ksize_planes, ksize_rows, ksize_cols, 1]
+//       strides = [1, stride_planes, strides_rows, strides_cols, 1]
+// ```
+//
+// Returns 5-D Tensor with shape `[batch, out_planes, out_rows, out_cols,
+// ksize_planes * ksize_rows * ksize_cols * depth]` containing patches
+// with size `ksize_planes x ksize_rows x ksize_cols x depth` vectorized
+// in the "depth" dimension. Note `out_planes`, `out_rows` and `out_cols`
+// are the dimensions of the output patches.
+func ExtractVolumePatches(scope *Scope, input tf.Output, ksizes []int64, strides []int64, padding string) (patches tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksizes": ksizes, "strides": strides, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "ExtractVolumePatches",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// DepthToSpaceAttr is an optional argument to DepthToSpace.
+type DepthToSpaceAttr func(optionalAttr)
+
+// DepthToSpaceDataFormat sets the optional data_format attribute to value.
+// If not specified, defaults to "NHWC"
+func DepthToSpaceDataFormat(value string) DepthToSpaceAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// DepthToSpace for tensors of type T.
+//
+// Rearranges data from depth into blocks of spatial data.
+// This is the reverse transformation of SpaceToDepth. More specifically,
+// this op outputs a copy of the input tensor where values from the `depth`
+// dimension are moved in spatial blocks to the `height` and `width` dimensions.
+// The attr `block_size` indicates the input block size and how the data is moved.
+//
+//   * Chunks of data of size `block_size * block_size` from depth are rearranged
+//     into non-overlapping blocks of size `block_size x block_size`
+//   * The width the output tensor is `input_depth * block_size`, whereas the
+//     height is `input_height * block_size`.
+//   * The Y, X coordinates within each block of the output image are determined
+//     by the high order component of the input channel index.
+//   * The depth of the input tensor must be divisible by
+//     `block_size * block_size`.
+//
+// The `data_format` attr specifies the layout of the input and output tensors
+// with the following options:
+//   "NHWC": `[ batch, height, width, channels ]`
+//   "NCHW": `[ batch, channels, height, width ]`
+//   "NCHW_VECT_C":
+//       `qint8 [ batch, channels / 4, height, width, 4 ]`
+//
+// It is useful to consider the operation as transforming a 6-D Tensor.
+// e.g. for data_format = NHWC,
+//      Each element in the input tensor can be specified via 6 coordinates,
+//      ordered by decreasing memory layout significance as:
+//      n,iY,iX,bY,bX,oC  (where n=batch index, iX, iY means X or Y coordinates
+//                         within the input image, bX, bY means coordinates
+//                         within the output block, oC means output channels).
+//      The output would be the input transposed to the following layout:
+//      n,iY,bY,iX,bX,oC
+//
+// This operation is useful for resizing the activations between convolutions
+// (but keeping all data), e.g. instead of pooling. It is also useful for training
+// purely convolutional models.
+//
+// For example, given an input of shape `[1, 1, 1, 4]`, data_format = "NHWC" and
+// block_size = 2:
+//
+// ```
+// x = [[[[1, 2, 3, 4]]]]
+//
+// ```
+//
+// This operation will output a tensor of shape `[1, 2, 2, 1]`:
+//
+// ```
+//    [[[[1], [2]],
+//      [[3], [4]]]]
+// ```
+//
+// Here, the input has a batch of 1 and each batch element has shape `[1, 1, 4]`,
+// the corresponding output will have 2x2 elements and will have a depth of
+// 1 channel (1 = `4 / (block_size * block_size)`).
+// The output element shape is `[2, 2, 1]`.
+//
+// For an input tensor with larger depth, here of shape `[1, 1, 1, 12]`, e.g.
+//
+// ```
+// x = [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]]
+// ```
+//
+// This operation, for block size of 2, will return the following tensor of shape
+// `[1, 2, 2, 3]`
+//
+// ```
+//    [[[[1, 2, 3], [4, 5, 6]],
+//      [[7, 8, 9], [10, 11, 12]]]]
+//
+// ```
+//
+// Similarly, for the following input of shape `[1 2 2 4]`, and a block size of 2:
+//
+// ```
+// x =  [[[[1, 2, 3, 4],
+//        [5, 6, 7, 8]],
+//       [[9, 10, 11, 12],
+//        [13, 14, 15, 16]]]]
+// ```
+//
+// the operator will return the following tensor of shape `[1 4 4 1]`:
+//
+// ```
+// x = [[[ [1],   [2],  [5],  [6]],
+//       [ [3],   [4],  [7],  [8]],
+//       [ [9],  [10], [13],  [14]],
+//       [ [11], [12], [15],  [16]]]]
+//
+// ```
+//
+// Arguments:
+//
+//	block_size: The size of the spatial block, same as in Space2Depth.
+func DepthToSpace(scope *Scope, input tf.Output, block_size int64, optional ...DepthToSpaceAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"block_size": block_size}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DepthToSpace",
+		Input: []tf.Input{
+			input,
 		},
 		Attrs: attrs,
 	}
@@ -950,64 +1151,43 @@ func BatchToSpace(scope *Scope, input tf.Output, crops tf.Output, block_size int
 	return op.Output(0)
 }
 
-// SpaceToBatch for N-D tensors of type T.
+// SpaceToBatch for 4-D tensors of type T.
 //
-// This operation divides "spatial" dimensions `[1, ..., M]` of the input into a
-// grid of blocks of shape `block_shape`, and interleaves these blocks with the
-// "batch" dimension (0) such that in the output, the spatial dimensions
-// `[1, ..., M]` correspond to the position within the grid, and the batch
-// dimension combines both the position within a spatial block and the original
-// batch position.  Prior to division into blocks, the spatial dimensions of the
-// input are optionally zero padded according to `paddings`.  See below for a
-// precise description.
+// This is a legacy version of the more general SpaceToBatchND.
+//
+// Zero-pads and then rearranges (permutes) blocks of spatial data into batch.
+// More specifically, this op outputs a copy of the input tensor where values from
+// the `height` and `width` dimensions are moved to the `batch` dimension. After
+// the zero-padding, both `height` and `width` of the input must be divisible by the
+// block size.
 //
 // Arguments:
-//	input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`,
-// where spatial_shape has `M` dimensions.
-//	block_shape: 1-D with shape `[M]`, all values must be >= 1.
-//	paddings: 2-D with shape `[M, 2]`, all values must be >= 0.
-//   `paddings[i] = [pad_start, pad_end]` specifies the padding for input dimension
-//   `i + 1`, which corresponds to spatial dimension `i`.  It is required that
-//   `block_shape[i]` divides `input_shape[i + 1] + pad_start + pad_end`.
+//	input: 4-D with shape `[batch, height, width, depth]`.
+//	paddings: 2-D tensor of non-negative integers with shape `[2, 2]`. It specifies
+//   the padding of the input with zeros across the spatial dimensions as follows:
 //
-// This operation is equivalent to the following steps:
+//       paddings = [[pad_top, pad_bottom], [pad_left, pad_right]]
 //
-// 1. Zero-pad the start and end of dimensions `[1, ..., M]` of the
-//    input according to `paddings` to produce `padded` of shape `padded_shape`.
+//   The effective spatial dimensions of the zero-padded input tensor will be:
 //
-// 2. Reshape `padded` to `reshaped_padded` of shape:
+//       height_pad = pad_top + height + pad_bottom
+//       width_pad = pad_left + width + pad_right
 //
-//      [batch] +
-//      [padded_shape[1] / block_shape[0],
-//        block_shape[0],
-//       ...,
-//       padded_shape[M] / block_shape[M-1],
-//       block_shape[M-1]] +
-//      remaining_shape
+// The attr `block_size` must be greater than one. It indicates the block size.
 //
-// 3. Permute dimensions of `reshaped_padded` to produce
-//    `permuted_reshaped_padded` of shape:
+//   * Non-overlapping blocks of size `block_size x block size` in the height and
+//     width dimensions are rearranged into the batch dimension at each location.
+//   * The batch of the output tensor is `batch * block_size * block_size`.
+//   * Both height_pad and width_pad must be divisible by block_size.
 //
-//      block_shape +
-//      [batch] +
-//      [padded_shape[1] / block_shape[0],
-//       ...,
-//       padded_shape[M] / block_shape[M-1]] +
-//      remaining_shape
+// The shape of the output will be:
 //
-// 4. Reshape `permuted_reshaped_padded` to flatten `block_shape` into the batch
-//    dimension, producing an output tensor of shape:
-//
-//      [batch * prod(block_shape)] +
-//      [padded_shape[1] / block_shape[0],
-//       ...,
-//       padded_shape[M] / block_shape[M-1]] +
-//      remaining_shape
+//     [batch*block_size*block_size, height_pad/block_size, width_pad/block_size,
+//      depth]
 //
 // Some examples:
 //
-// (1) For the following input of shape `[1, 2, 2, 1]`, `block_shape = [2, 2]`, and
-//     `paddings = [[0, 0], [0, 0]]`:
+// (1) For the following input of shape `[1, 2, 2, 1]` and block_size of 2:
 //
 // ```
 // x = [[[[1], [2]], [[3], [4]]]]
@@ -1019,8 +1199,7 @@ func BatchToSpace(scope *Scope, input tf.Output, crops tf.Output, block_size int
 // [[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
 // ```
 //
-// (2) For the following input of shape `[1, 2, 2, 3]`, `block_shape = [2, 2]`, and
-//     `paddings = [[0, 0], [0, 0]]`:
+// (2) For the following input of shape `[1, 2, 2, 3]` and block_size of 2:
 //
 // ```
 // x = [[[[1, 2, 3], [4, 5, 6]],
@@ -1033,8 +1212,7 @@ func BatchToSpace(scope *Scope, input tf.Output, crops tf.Output, block_size int
 // [[[[1, 2, 3]]], [[[4, 5, 6]]], [[[7, 8, 9]]], [[[10, 11, 12]]]]
 // ```
 //
-// (3) For the following input of shape `[1, 4, 4, 1]`, `block_shape = [2, 2]`, and
-//     `paddings = [[0, 0], [0, 0]]`:
+// (3) For the following input of shape `[1, 4, 4, 1]` and block_size of 2:
 //
 // ```
 // x = [[[[1],   [2],  [3],  [4]],
@@ -1052,8 +1230,7 @@ func BatchToSpace(scope *Scope, input tf.Output, crops tf.Output, block_size int
 //      [[[6], [8]], [[14], [16]]]]
 // ```
 //
-// (4) For the following input of shape `[2, 2, 4, 1]`, block_shape = `[2, 2]`, and
-//     paddings = `[[0, 0], [2, 0]]`:
+// (4) For the following input of shape `[2, 2, 4, 1]` and block_size of 2:
 //
 // ```
 // x = [[[[1],   [2],  [3],  [4]],
@@ -1062,147 +1239,30 @@ func BatchToSpace(scope *Scope, input tf.Output, crops tf.Output, block_size int
 //       [[13], [14], [15],  [16]]]]
 // ```
 //
-// The output tensor has shape `[8, 1, 3, 1]` and value:
+// The output tensor has shape `[8, 1, 2, 1]` and value:
 //
 // ```
-// x = [[[[0], [1], [3]]], [[[0], [9], [11]]],
-//      [[[0], [2], [4]]], [[[0], [10], [12]]],
-//      [[[0], [5], [7]]], [[[0], [13], [15]]],
-//      [[[0], [6], [8]]], [[[0], [14], [16]]]]
+// x = [[[[1], [3]]], [[[9], [11]]], [[[2], [4]]], [[[10], [12]]],
+//      [[[5], [7]]], [[[13], [15]]], [[[6], [8]]], [[[14], [16]]]]
 // ```
 //
 // Among others, this operation is useful for reducing atrous convolution into
 // regular convolution.
-func SpaceToBatchND(scope *Scope, input tf.Output, block_shape tf.Output, paddings tf.Output) (output tf.Output) {
+//
+func SpaceToBatch(scope *Scope, input tf.Output, paddings tf.Output, block_size int64) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"block_size": block_size}
 	opspec := tf.OpSpec{
-		Type: "SpaceToBatchND",
+		Type: "SpaceToBatch",
 		Input: []tf.Input{
-			input, block_shape, paddings,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Applies sparse addition to `input` using individual values or slices
-//
-// from `updates` according to indices `indices`.  The updates are non-aliasing:
-// `input` is only modified in-place if no other operations will use it.
-// Otherwise, a copy of `input` is made.  This operation has a gradient with
-// respect to both `input` and `updates`.
-//
-// `input` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
-//
-// `indices` must be integer tensor, containing indices into `input`.
-// It must be shape \\([d_0, ..., d_{Q-2}, K]\\) where `0 < K <= P`.
-//
-// The innermost dimension of `indices` (with length `K`) corresponds to
-// indices into elements (if `K = P`) or `(P-K)`-dimensional slices
-// (if `K < P`) along the `K`th dimension of `input`.
-//
-// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
-//
-// $$[d_0, ..., d_{Q-2}, input.shape[K], ..., input.shape[P-1]].$$
-//
-// For example, say we want to add 4 scattered elements to a rank-1 tensor to 8
-// elements. In Python, that addition would look like this:
-//
-//     input = tf.constant([1, 2, 3, 4, 5, 6, 7, 8])
-//     indices = tf.constant([[4], [3], [1], [7]])
-//     updates = tf.constant([9, 10, 11, 12])
-//     output = tf.scatter_nd_non_aliasing_add(input, indices, updates)
-//     with tf.Session() as sess:
-//       print(sess.run(output))
-//
-// The resulting value `output` would look like this:
-//
-//     [1, 13, 3, 14, 14, 6, 7, 20]
-//
-// See `tf.scatter_nd` for more details about how to make updates to slices.
-//
-// Arguments:
-//	input: A Tensor.
-//	indices: A Tensor. Must be one of the following types: `int32`, `int64`.
-// A tensor of indices into `input`.
-//	updates: A Tensor. Must have the same type as ref. A tensor of updated values
-// to add to `input`.
-//
-// Returns A `Tensor` with the same shape as `input`, containing values of `input`
-// updated with `updates`.
-func ScatterNdNonAliasingAdd(scope *Scope, input tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ScatterNdNonAliasingAdd",
-		Input: []tf.Input{
-			input, indices, updates,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ListDiffAttr is an optional argument to ListDiff.
-type ListDiffAttr func(optionalAttr)
-
-// ListDiffOutIdx sets the optional out_idx attribute to value.
-// If not specified, defaults to DT_INT32
-func ListDiffOutIdx(value tf.DataType) ListDiffAttr {
-	return func(m optionalAttr) {
-		m["out_idx"] = value
-	}
-}
-
-// Computes the difference between two lists of numbers or strings.
-//
-// Given a list `x` and a list `y`, this operation returns a list `out` that
-// represents all values that are in `x` but not in `y`. The returned list `out`
-// is sorted in the same order that the numbers appear in `x` (duplicates are
-// preserved). This operation also returns a list `idx` that represents the
-// position of each `out` element in `x`. In other words:
-//
-// `out[i] = x[idx[i]] for i in [0, 1, ..., len(out) - 1]`
-//
-// For example, given this input:
-//
-// ```
-// x = [1, 2, 3, 4, 5, 6]
-// y = [1, 3, 5]
-// ```
-//
-// This operation would return:
-//
-// ```
-// out ==> [2, 4, 6]
-// idx ==> [1, 3, 5]
-// ```
-//
-// Arguments:
-//	x: 1-D. Values to keep.
-//	y: 1-D. Values to remove.
-//
-// Returns 1-D. Values present in `x` but not in `y`.1-D. Positions of `x` values preserved in `out`.
-func ListDiff(scope *Scope, x tf.Output, y tf.Output, optional ...ListDiffAttr) (out tf.Output, idx tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ListDiff",
-		Input: []tf.Input{
-			x, y,
+			input, paddings,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
 // SqueezeAttr is an optional argument to Squeeze.
@@ -1213,7 +1273,7 @@ type SqueezeAttr func(optionalAttr)
 // value: If specified, only squeezes the dimensions listed. The dimension
 // index starts at 0. It is an error to squeeze a dimension that is not 1. Must
 // be in the range `[-rank(input), rank(input))`.
-// If not specified, defaults to <>
+// If not specified, defaults to {}
 //
 // REQUIRES: len(value) >= 0
 func SqueezeAxis(value []int64) SqueezeAttr {
@@ -1267,61 +1327,6 @@ func Squeeze(scope *Scope, input tf.Output, optional ...SqueezeAttr) (output tf.
 	return op.Output(0)
 }
 
-// Inserts a dimension of 1 into a tensor's shape.
-//
-// Given a tensor `input`, this operation inserts a dimension of 1 at the
-// dimension index `axis` of `input`'s shape. The dimension index `axis` starts at
-// zero; if you specify a negative number for `axis` it is counted backward from
-// the end.
-//
-// This operation is useful if you want to add a batch dimension to a single
-// element. For example, if you have a single image of shape `[height, width,
-// channels]`, you can make it a batch of 1 image with `expand_dims(image, 0)`,
-// which will make the shape `[1, height, width, channels]`.
-//
-// Other examples:
-//
-// ```
-// # 't' is a tensor of shape [2]
-// shape(expand_dims(t, 0)) ==> [1, 2]
-// shape(expand_dims(t, 1)) ==> [2, 1]
-// shape(expand_dims(t, -1)) ==> [2, 1]
-//
-// # 't2' is a tensor of shape [2, 3, 5]
-// shape(expand_dims(t2, 0)) ==> [1, 2, 3, 5]
-// shape(expand_dims(t2, 2)) ==> [2, 3, 1, 5]
-// shape(expand_dims(t2, 3)) ==> [2, 3, 5, 1]
-// ```
-//
-// This operation requires that:
-//
-// `-1-input.dims() <= dim <= input.dims()`
-//
-// This operation is related to `squeeze()`, which removes dimensions of
-// size 1.
-//
-// Arguments:
-//
-//	axis: 0-D (scalar). Specifies the dimension index at which to
-// expand the shape of `input`. Must be in the range
-// `[-rank(input) - 1, rank(input)]`.
-//
-// Returns Contains the same data as `input`, but its shape has an additional
-// dimension of size 1 added.
-func ExpandDims(scope *Scope, input tf.Output, axis tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ExpandDims",
-		Input: []tf.Input{
-			input, axis,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // A placeholder op that passes through `input` when its output is not fed.
 //
 // Arguments:
@@ -1345,34 +1350,6 @@ func PlaceholderWithDefault(scope *Scope, input tf.Output, shape tf.Shape) (outp
 	return op.Output(0)
 }
 
-// A placeholder op for a value that will be fed into the computation.
-//
-// DEPRECATED at GraphDef version 23: Placeholder now behaves the same as PlaceholderV2.
-//
-// N.B. This operation will fail with an error if it is executed. It is
-// intended as a way to represent a value that will always be fed, and to
-// provide attrs that enable the fed value to be checked at runtime.
-//
-// Arguments:
-//	dtype: The type of elements in the tensor.
-//	shape: The shape of the tensor. The shape can be any partially-specified
-// shape.  To be unconstrained, pass in a shape with unknown rank.
-//
-// Returns A placeholder tensor that must be replaced using the feed mechanism.
-func PlaceholderV2(scope *Scope, dtype tf.DataType, shape tf.Shape) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype, "shape": shape}
-	opspec := tf.OpSpec{
-		Type: "PlaceholderV2",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // PlaceholderAttr is an optional argument to Placeholder.
 type PlaceholderAttr func(optionalAttr)
 
@@ -1380,7 +1357,7 @@ type PlaceholderAttr func(optionalAttr)
 //
 // value: (Optional) The shape of the tensor. If the shape has 0 dimensions, the
 // shape is unconstrained.
-// If not specified, defaults to <unknown_rank:true >
+// If not specified, defaults to {unknown_rank:true}
 func PlaceholderShape(value tf.Shape) PlaceholderAttr {
 	return func(m optionalAttr) {
 		m["shape"] = value
@@ -1414,150 +1391,93 @@ func Placeholder(scope *Scope, dtype tf.DataType, optional ...PlaceholderAttr) (
 	return op.Output(0)
 }
 
-// Gradient op for `MirrorPad` op. This op folds a mirror-padded tensor.
+// Return the reduction indices for computing gradients of s0 op s1 with broadcast.
 //
-// This operation folds the padded areas of `input` by `MirrorPad` according to the
-// `paddings` you specify. `paddings` must be the same as `paddings` argument
-// given to the corresponding `MirrorPad` op.
-//
-// The folded size of each dimension D of the output is:
-//
-// `input.dim_size(D) - paddings(D, 0) - paddings(D, 1)`
-//
-// For example:
-//
-// ```
-// # 't' is [[1, 2, 3], [4, 5, 6], [7, 8, 9]].
-// # 'paddings' is [[0, 1]], [0, 1]].
-// # 'mode' is SYMMETRIC.
-// # rank of 't' is 2.
-// pad(t, paddings) ==> [[ 1,  5]
-//                       [11, 28]]
-// ```
-//
-// Arguments:
-//	input: The input tensor to be folded.
-//	paddings: A two-column matrix specifying the padding sizes. The number of
-// rows must be the same as the rank of `input`.
-//	mode: The mode used in the `MirrorPad` op.
-//
-// Returns The folded tensor.
-func MirrorPadGrad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) {
+// This is typically used by gradient computations for a broadcasting operation.
+func BroadcastGradientArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output, r1 tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"mode": mode}
 	opspec := tf.OpSpec{
-		Type: "MirrorPadGrad",
+		Type: "BroadcastGradientArgs",
 		Input: []tf.Input{
-			input, paddings,
+			s0, s1,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1)
 }
 
-// Returns the gradient of `Tile`.
+// Return the shape of s0 op s1 with broadcast.
 //
-// DEPRECATED at GraphDef version 3: TileGrad has been replaced with reduce_sum
-//
-// Since `Tile` takes an input and repeats the input `multiples` times
-// along each dimension, `TileGrad` takes in `multiples` and aggregates
-// each repeated tile of `input` into `output`.
-func TileGrad(scope *Scope, input tf.Output, multiples tf.Output) (output tf.Output) {
+// Given `s0` and `s1`, tensors that represent shapes, compute `r0`, the
+// broadcasted shape. `s0`, `s1` and `r0` are all integer vectors.
+func BroadcastArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "TileGrad",
+		Type: "BroadcastArgs",
 		Input: []tf.Input{
-			input, multiples,
+			s0, s1,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Constructs a tensor by tiling a given tensor.
-//
-// This operation creates a new tensor by replicating `input` `multiples` times.
-// The output tensor's i'th dimension has `input.dims(i) * multiples[i]` elements,
-// and the values of `input` are replicated `multiples[i]` times along the 'i'th
-// dimension. For example, tiling `[a b c d]` by `[2]` produces
-// `[a b c d a b c d]`.
-//
-// Arguments:
-//	input: 1-D or higher.
-//	multiples: 1-D. Length must be the same as the number of dimensions in `input`
-func Tile(scope *Scope, input tf.Output, multiples tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Tile",
-		Input: []tf.Input{
-			input, multiples,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
+// TensorStridedSliceUpdateAttr is an optional argument to TensorStridedSliceUpdate.
+type TensorStridedSliceUpdateAttr func(optionalAttr)
 
-// ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign.
-type ResourceStridedSliceAssignAttr func(optionalAttr)
-
-// ResourceStridedSliceAssignBeginMask sets the optional begin_mask attribute to value.
+// TensorStridedSliceUpdateBeginMask sets the optional begin_mask attribute to value.
 // If not specified, defaults to 0
-func ResourceStridedSliceAssignBeginMask(value int64) ResourceStridedSliceAssignAttr {
+func TensorStridedSliceUpdateBeginMask(value int64) TensorStridedSliceUpdateAttr {
 	return func(m optionalAttr) {
 		m["begin_mask"] = value
 	}
 }
 
-// ResourceStridedSliceAssignEndMask sets the optional end_mask attribute to value.
+// TensorStridedSliceUpdateEndMask sets the optional end_mask attribute to value.
 // If not specified, defaults to 0
-func ResourceStridedSliceAssignEndMask(value int64) ResourceStridedSliceAssignAttr {
+func TensorStridedSliceUpdateEndMask(value int64) TensorStridedSliceUpdateAttr {
 	return func(m optionalAttr) {
 		m["end_mask"] = value
 	}
 }
 
-// ResourceStridedSliceAssignEllipsisMask sets the optional ellipsis_mask attribute to value.
+// TensorStridedSliceUpdateEllipsisMask sets the optional ellipsis_mask attribute to value.
 // If not specified, defaults to 0
-func ResourceStridedSliceAssignEllipsisMask(value int64) ResourceStridedSliceAssignAttr {
+func TensorStridedSliceUpdateEllipsisMask(value int64) TensorStridedSliceUpdateAttr {
 	return func(m optionalAttr) {
 		m["ellipsis_mask"] = value
 	}
 }
 
-// ResourceStridedSliceAssignNewAxisMask sets the optional new_axis_mask attribute to value.
+// TensorStridedSliceUpdateNewAxisMask sets the optional new_axis_mask attribute to value.
 // If not specified, defaults to 0
-func ResourceStridedSliceAssignNewAxisMask(value int64) ResourceStridedSliceAssignAttr {
+func TensorStridedSliceUpdateNewAxisMask(value int64) TensorStridedSliceUpdateAttr {
 	return func(m optionalAttr) {
 		m["new_axis_mask"] = value
 	}
 }
 
-// ResourceStridedSliceAssignShrinkAxisMask sets the optional shrink_axis_mask attribute to value.
+// TensorStridedSliceUpdateShrinkAxisMask sets the optional shrink_axis_mask attribute to value.
 // If not specified, defaults to 0
-func ResourceStridedSliceAssignShrinkAxisMask(value int64) ResourceStridedSliceAssignAttr {
+func TensorStridedSliceUpdateShrinkAxisMask(value int64) TensorStridedSliceUpdateAttr {
 	return func(m optionalAttr) {
 		m["shrink_axis_mask"] = value
 	}
 }
 
-// Assign `value` to the sliced l-value reference of `ref`.
+// Assign `value` to the sliced l-value reference of `input`.
 //
-// The values of `value` are assigned to the positions in the variable
-// `ref` that are selected by the slice parameters. The slice parameters
-// `begin, `end`, `strides`, etc. work exactly as in `StridedSlice`.
+// The values of `value` are assigned to the positions in the tensor `input` that
+// are selected by the slice parameters. The slice parameters `begin` `end`
+// `strides` etc. work exactly as in `StridedSlice`.
 //
-// NOTE this op currently does not support broadcasting and so `value`'s
-// shape must be exactly the shape produced by the slice of `ref`.
-//
-// Returns the created operation.
-func ResourceStridedSliceAssign(scope *Scope, ref tf.Output, begin tf.Output, end tf.Output, strides tf.Output, value tf.Output, optional ...ResourceStridedSliceAssignAttr) (o *tf.Operation) {
+// NOTE this op currently does not support broadcasting and so `value`'s shape
+// must be exactly the shape produced by the slice of `input`.
+func TensorStridedSliceUpdate(scope *Scope, input tf.Output, begin tf.Output, end tf.Output, strides tf.Output, value tf.Output, optional ...TensorStridedSliceUpdateAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -1566,201 +1486,9 @@ func ResourceStridedSliceAssign(scope *Scope, ref tf.Output, begin tf.Output, en
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceStridedSliceAssign",
+		Type: "TensorStridedSliceUpdate",
 		Input: []tf.Input{
-			ref, begin, end, strides, value,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// StridedSliceAttr is an optional argument to StridedSlice.
-type StridedSliceAttr func(optionalAttr)
-
-// StridedSliceBeginMask sets the optional begin_mask attribute to value.
-//
-// value: a bitmask where a bit i being 1 means to ignore the begin
-// value and instead use the largest interval possible. At runtime
-// begin[i] will be replaced with `[0, n-1)` if `stride[i] > 0` or
-// `[-1, n-1]` if `stride[i] < 0`
-// If not specified, defaults to 0
-func StridedSliceBeginMask(value int64) StridedSliceAttr {
-	return func(m optionalAttr) {
-		m["begin_mask"] = value
-	}
-}
-
-// StridedSliceEndMask sets the optional end_mask attribute to value.
-//
-// value: analogous to `begin_mask`
-// If not specified, defaults to 0
-func StridedSliceEndMask(value int64) StridedSliceAttr {
-	return func(m optionalAttr) {
-		m["end_mask"] = value
-	}
-}
-
-// StridedSliceEllipsisMask sets the optional ellipsis_mask attribute to value.
-//
-// value: a bitmask where bit `i` being 1 means the `i`th
-// position is actually an ellipsis. One bit at most can be 1.
-// If `ellipsis_mask == 0`, then an implicit ellipsis mask of `1 << (m+1)`
-// is provided. This means that `foo[3:5] == foo[3:5, ...]`. An ellipsis
-// implicitly creates as many range specifications as necessary to fully
-// specify the sliced range for every dimension. For example for a 4-dimensional
-// tensor `foo` the slice `foo[2, ..., 5:8]` implies `foo[2, :, :, 5:8]`.
-// If not specified, defaults to 0
-func StridedSliceEllipsisMask(value int64) StridedSliceAttr {
-	return func(m optionalAttr) {
-		m["ellipsis_mask"] = value
-	}
-}
-
-// StridedSliceNewAxisMask sets the optional new_axis_mask attribute to value.
-//
-// value: a bitmask where bit `i` being 1 means the `i`th
-// specification creates a new shape 1 dimension. For example
-// `foo[:4, tf.newaxis, :2]` would produce a shape `(4, 1, 2)` tensor.
-// If not specified, defaults to 0
-func StridedSliceNewAxisMask(value int64) StridedSliceAttr {
-	return func(m optionalAttr) {
-		m["new_axis_mask"] = value
-	}
-}
-
-// StridedSliceShrinkAxisMask sets the optional shrink_axis_mask attribute to value.
-//
-// value: a bitmask where bit `i` implies that the `i`th
-// specification should shrink the dimensionality. begin and end
-// must imply a slice of size 1 in the dimension. For example in
-// python one might do `foo[:, 3, :]` which would result in
-// `shrink_axis_mask` being 2.
-// If not specified, defaults to 0
-func StridedSliceShrinkAxisMask(value int64) StridedSliceAttr {
-	return func(m optionalAttr) {
-		m["shrink_axis_mask"] = value
-	}
-}
-
-// Return a strided slice from `input`.
-//
-// Note, most python users will want to use the Python `Tensor.__getitem__`
-// or `Variable.__getitem__` rather than this op directly.
-//
-// The goal of this op is to produce a new tensor with a subset of
-// the elements from the `n` dimensional `input` tensor. The subset is chosen using
-// a sequence of `m` sparse range specifications encoded into the arguments
-// of this function. Note, in some cases
-// `m` could be equal to `n`, but this need not be the case. Each
-// range specification entry can be one of the following:
-//
-// - An ellipsis (...). Ellipses are used to imply zero or more
-//   dimensions of full-dimension selection and are produced using
-//   `ellipsis_mask`. For example, `foo[...]` is the identity slice.
-//
-// - A new axis. This is used to insert a new shape=1 dimension and is
-//   produced using `new_axis_mask`. For example, `foo[:, ...]` where
-//   `foo` is shape `(3, 4)` produces a `(1, 3, 4)` tensor.
-//
-//
-// - A range `begin:end:stride`. This is used to specify how much to choose from
-//   a given dimension. `stride` can be any integer but 0.  `begin` is an integer
-//   which represents the index of the first value to select while `end` represents
-//   the index of the last value to select. The number of values selected in each
-//   dimension is `end - begin` if `stride > 0` and `begin - end` if `stride < 0`.
-//   `begin` and `end` can be negative where `-1` is the last element, `-2` is
-//   the second to last. `begin_mask` controls whether to replace the explicitly
-//   given `begin` with an implicit effective value of `0` if `stride > 0` and
-//   `-1` if `stride < 0`. `end_mask` is analogous but produces the number
-//   required to create the largest open interval. For example, given a shape
-//   `(3,)` tensor `foo[:]`, the effective `begin` and `end` are `0` and `3`. Do
-//   not assume this is equivalent to `foo[0:-1]` which has an effective `begin`
-//   and `end` of `0` and `2`. Another example is `foo[-2::-1]` which reverses the
-//   first dimension of a tensor while dropping the last two (in the original
-//   order elements). For example `foo = [1,2,3,4]; foo[-2::-1]` is `[4,3]`.
-//
-// - A single index. This is used to keep only elements that have a given
-//   index. For example (`foo[2, :]` on a shape `(5,6)` tensor produces a
-//   shape `(6,)` tensor. This is encoded in `begin` and `end` and
-//   `shrink_axis_mask`.
-//
-// Each conceptual range specification is encoded in the op's argument. This
-// encoding is best understand by considering a non-trivial example. In
-// particular,
-// `foo[1, 2:4, None, ..., :-3:-1, :]` will be encoded as
-//
-// ```
-// begin = [1, 2, x, x, 0, x] # x denotes don't care (usually 0)
-// end = [2, 4, x, x, -3, x]
-// strides = [1, 1, x, x, -1, 1]
-// begin_mask = 1<<4 | 1 << 5 = 48
-// end_mask = 1<<5 = 32
-// ellipsis_mask = 1<<3 = 8
-// new_axis_mask = 1<<2 4
-// shrink_axis_mask = 1<<0
-// ```
-//
-// In this case if `foo.shape` is (5, 5, 5, 5, 5, 5) the final shape of
-// the slice becomes (2, 1, 5, 5, 2, 5).
-// Let us walk step by step through each argument specification.
-//
-// 1.  The first argument in the example slice is turned into `begin = 1` and
-// `end = begin + 1 = 2`. To disambiguate from the original spec `2:4` we
-// also set the appropriate bit in `shrink_axis_mask`.
-//
-// 2. `2:4` is contributes 2, 4, 1 to begin, end, and stride. All masks have
-// zero bits contributed.
-//
-// 3. None is a synonym for `tf.newaxis`. This means insert a dimension of size 1
-// dimension in the final shape. Dummy values are contributed to begin,
-// end and stride, while the new_axis_mask bit is set.
-//
-// 4. `...` grab the full ranges from as many dimensions as needed to
-// fully specify a slice for every dimension of the input shape.
-//
-// 5. `:-3:-1` shows the use of negative indices. A negative index `i` associated
-// with a dimension that has shape `s` is converted to a positive index
-// `s + i`. So `-1` becomes `s-1` (i.e. the last element). This conversion
-// is done internally so begin, end and strides receive x, -3, and -1.
-// The appropriate begin_mask bit is set to indicate the start range is the
-// full range (ignoring the x).
-//
-// 6. `:` indicates that the entire contents of the corresponding dimension
-// is selected. This is equivalent to `::` or `0::1`. begin, end, and strides
-// receive 0, 0, and 1, respectively. The appropriate bits in `begin_mask` and
-// `end_mask` are also set.
-//
-// *Requirements*:
-//   `0 != strides[i] for i in [0, m)`
-//   `ellipsis_mask must be a power of two (only one ellipsis)`
-//
-// Arguments:
-//
-//	begin: `begin[k]` specifies the offset into the `k`th range specification.
-// The exact dimension this corresponds to will be determined by context.
-// Out-of-bounds values will be silently clamped. If the `k`th bit of
-// `begin_mask` then `begin[k]` is ignored and the full range of the
-// appropriate dimension is used instead. Negative values causes indexing
-// to start from the highest element e.g. If `foo==[1,2,3]` then `foo[-1]==3`.
-//	end: `end[i]` is like `begin` with the exception that `end_mask` is
-// used to determine full ranges.
-//	strides: `strides[i]` specifies the increment in the `i`th specification
-// after extracting a given element. Negative indices will reverse
-// the original order. Out or range values are
-// clamped to `[0,dim[i]) if slice[i]>0` or `[-1,dim[i]-1] if slice[i] < 0`
-func StridedSlice(scope *Scope, input tf.Output, begin tf.Output, end tf.Output, strides tf.Output, optional ...StridedSliceAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StridedSlice",
-		Input: []tf.Input{
-			input, begin, end, strides,
+			input, begin, end, strides, value,
 		},
 		Attrs: attrs,
 	}
@@ -1768,69 +1496,23 @@ func StridedSlice(scope *Scope, input tf.Output, begin tf.Output, end tf.Output,
 	return op.Output(0)
 }
 
-// Return a slice from 'input'.
+// Ensures that the tensor's shape matches the expected shape.
 //
-// The output tensor is a tensor with dimensions described by 'size'
-// whose values are extracted from 'input' starting at the offsets in
-// 'begin'.
-//
-// *Requirements*:
-//   0 <= begin[i] <= begin[i] + size[i] <= Di  for i in [0, n)
+// Raises an error if the input tensor's shape does not match the specified shape.
+// Returns the input tensor otherwise.
 //
 // Arguments:
+//	input: A tensor, whose shape is to be validated.
+//	shape: The expected (possibly partially specified) shape of the input tensor.
 //
-//	begin: begin[i] specifies the offset into the 'i'th dimension of
-// 'input' to slice from.
-//	size: size[i] specifies the number of elements of the 'i'th dimension
-// of 'input' to slice. If size[i] is -1, all remaining elements in dimension
-// i are included in the slice (i.e. this is equivalent to setting
-// size[i] = input.dim_size(i) - begin[i]).
-func Slice(scope *Scope, input tf.Output, begin tf.Output, size tf.Output) (output tf.Output) {
+// Returns A tensor with the same shape and contents as the input tensor or value.
+func EnsureShape(scope *Scope, input tf.Output, shape tf.Shape) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"shape": shape}
 	opspec := tf.OpSpec{
-		Type: "Slice",
-		Input: []tf.Input{
-			input, begin, size,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SizeAttr is an optional argument to Size.
-type SizeAttr func(optionalAttr)
-
-// SizeOutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_INT32
-func SizeOutType(value tf.DataType) SizeAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Returns the size of a tensor.
-//
-// This operation returns an integer representing the number of elements in
-// `input`.
-//
-// For example:
-//
-// ```
-// # 't' is [[[1, 1,, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]]
-// size(t) ==> 12
-// ```
-func Size(scope *Scope, input tf.Output, optional ...SizeAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Size",
+		Type: "EnsureShape",
 		Input: []tf.Input{
 			input,
 		},
@@ -1840,35 +1522,6 @@ func Size(scope *Scope, input tf.Output, optional ...SizeAttr) (output tf.Output
 	return op.Output(0)
 }
 
-// Returns the rank of a tensor.
-//
-// This operation returns an integer representing the rank of `input`.
-//
-// For example:
-//
-// ```
-// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
-// # shape of tensor 't' is [2, 2, 3]
-// rank(t) ==> 3
-// ```
-//
-// **Note**: The rank of a tensor is not the same as the rank of a matrix. The rank
-// of a tensor is the number of indices required to uniquely select each element
-// of the tensor. Rank is also known as "order", "degree", or "ndims."
-func Rank(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Rank",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // ShapeAttr is an optional argument to Shape.
 type ShapeAttr func(optionalAttr)
 
@@ -1909,24 +1562,27 @@ func Shape(scope *Scope, input tf.Output, optional ...ShapeAttr) (output tf.Outp
 	return op.Output(0)
 }
 
-// UniqueWithCountsAttr is an optional argument to UniqueWithCounts.
-type UniqueWithCountsAttr func(optionalAttr)
+// UniqueWithCountsV2Attr is an optional argument to UniqueWithCountsV2.
+type UniqueWithCountsV2Attr func(optionalAttr)
 
-// UniqueWithCountsOutIdx sets the optional out_idx attribute to value.
+// UniqueWithCountsV2OutIdx sets the optional out_idx attribute to value.
 // If not specified, defaults to DT_INT32
-func UniqueWithCountsOutIdx(value tf.DataType) UniqueWithCountsAttr {
+func UniqueWithCountsV2OutIdx(value tf.DataType) UniqueWithCountsV2Attr {
 	return func(m optionalAttr) {
 		m["out_idx"] = value
 	}
 }
 
-// Finds unique elements in a 1-D tensor.
+// Finds unique elements along an axis of a tensor.
 //
-// This operation returns a tensor `y` containing all of the unique elements of `x`
-// sorted in the same order that they occur in `x`. This operation also returns a
-// tensor `idx` the same size as `x` that contains the index of each value of `x`
-// in the unique output `y`. Finally, it returns a third tensor `count` that
-// contains the count of each element of `y` in `x`. In other words:
+// This operation either returns a tensor `y` containing unique elements
+// along the `axis` of a tensor. The returned unique elements is sorted
+// in the same order as they occur along `axis` in `x`.
+// This operation also returns a tensor `idx` and a tensor `count`
+// that are the same size as the number of the elements in `x` along the
+// `axis` dimension. The `idx` contains the index in the unique output `y`
+// and the `count` contains the count in the unique output `y`.
+// In other words, for an `1-D` tensor `x` with `axis = None:
 //
 // `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
 //
@@ -1940,71 +1596,17 @@ func UniqueWithCountsOutIdx(value tf.DataType) UniqueWithCountsAttr {
 // count ==> [2, 1, 3, 1, 2]
 // ```
 //
-// Arguments:
-//	x: 1-D.
-//
-// Returns 1-D.1-D.1-D.
-func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAttr) (y tf.Output, idx tf.Output, count tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "UniqueWithCounts",
-		Input: []tf.Input{
-			x,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// UniqueV2Attr is an optional argument to UniqueV2.
-type UniqueV2Attr func(optionalAttr)
-
-// UniqueV2OutIdx sets the optional out_idx attribute to value.
-// If not specified, defaults to DT_INT32
-func UniqueV2OutIdx(value tf.DataType) UniqueV2Attr {
-	return func(m optionalAttr) {
-		m["out_idx"] = value
-	}
-}
-
-// Finds unique elements along an axis of a tensor.
-//
-// This operation either returns a tensor `y` containing unique elements
-// along the `axis` of a tensor. The returned unique elements is sorted
-// in the same order as they occur along `axis` in `x`.
-// This operation also returns a tensor `idx` that is the same size as
-// the number of the elements in `x` along the `axis` dimension. It
-// contains the index in the unique output `y`.
-// In other words, for an `1-D` tensor `x` with `axis = None:
-//
-// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
-//
-// For example:
-//
-// ```
-// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
-// y, idx = unique(x)
-// y ==> [1, 2, 4, 7, 8]
-// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
-// ```
-//
 // For an `2-D` tensor `x` with `axis = 0`:
 //
 // ```
 // # tensor 'x' is [[1, 0, 0],
 // #                [1, 0, 0],
 // #                [2, 0, 0]]
-// y, idx = unique(x, axis=0)
+// y, idx, count = unique_with_counts(x, axis=0)
 // y ==> [[1, 0, 0],
 //        [2, 0, 0]]
 // idx ==> [0, 0, 1]
+// count ==> [2, 1]
 // ```
 //
 // For an `2-D` tensor `x` with `axis = 1`:
@@ -2013,11 +1615,12 @@ func UniqueV2OutIdx(value tf.DataType) UniqueV2Attr {
 // # tensor 'x' is [[1, 0, 0],
 // #                [1, 0, 0],
 // #                [2, 0, 0]]
-// y, idx = unique(x, axis=1)
+// y, idx, count = unique_with_counts(x, axis=1)
 // y ==> [[1, 0],
 //        [1, 0],
 //        [2, 0]]
 // idx ==> [0, 1, 1]
+// count ==> [1, 2]
 // ```
 //
 // Arguments:
@@ -2025,9 +1628,12 @@ func UniqueV2OutIdx(value tf.DataType) UniqueV2Attr {
 //	axis: A `Tensor` of type `int32` (default: None). The axis of the Tensor to
 // find the unique elements.
 //
-// Returns A `Tensor`. Unique elements along the `axis` of `Tensor` x.A 1-D Tensor. Has the same type as x that contains the index of each
+// Returns:
+//	y: A `Tensor`. Unique elements along the `axis` of `Tensor` x.
+//	idx: A 1-D Tensor. Has the same type as x that contains the index of each
 // value of x in the output y.
-func UniqueV2(scope *Scope, x tf.Output, axis tf.Output, optional ...UniqueV2Attr) (y tf.Output, idx tf.Output) {
+//	count: A 1-D Tensor. The count of each value of x in the output y.
+func UniqueWithCountsV2(scope *Scope, x tf.Output, axis tf.Output, optional ...UniqueWithCountsV2Attr) (y tf.Output, idx tf.Output, count tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -2036,14 +1642,14 @@ func UniqueV2(scope *Scope, x tf.Output, axis tf.Output, optional ...UniqueV2Att
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "UniqueV2",
+		Type: "UniqueWithCountsV2",
 		Input: []tf.Input{
 			x, axis,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
 // Shuffle dimensions of x according to a permutation and conjugate the result.
@@ -2101,99 +1707,46 @@ func InvertPermutation(scope *Scope, x tf.Output) (y tf.Output) {
 	return op.Output(0)
 }
 
-// Reshapes a tensor.
+// PreventGradientAttr is an optional argument to PreventGradient.
+type PreventGradientAttr func(optionalAttr)
+
+// PreventGradientMessage sets the optional message attribute to value.
 //
-// Given `tensor`, this operation returns a tensor that has the same values
-// as `tensor` with shape `shape`.
-//
-// If one component of `shape` is the special value -1, the size of that dimension
-// is computed so that the total size remains constant.  In particular, a `shape`
-// of `[-1]` flattens into 1-D.  At most one component of `shape` can be -1.
-//
-// If `shape` is 1-D or higher, then the operation returns a tensor with shape
-// `shape` filled with the values of `tensor`. In this case, the number of elements
-// implied by `shape` must be the same as the number of elements in `tensor`.
-//
-// For example:
-//
-// ```
-// # tensor 't' is [1, 2, 3, 4, 5, 6, 7, 8, 9]
-// # tensor 't' has shape [9]
-// reshape(t, [3, 3]) ==> [[1, 2, 3],
-//                         [4, 5, 6],
-//                         [7, 8, 9]]
-//
-// # tensor 't' is [[[1, 1], [2, 2]],
-// #                [[3, 3], [4, 4]]]
-// # tensor 't' has shape [2, 2, 2]
-// reshape(t, [2, 4]) ==> [[1, 1, 2, 2],
-//                         [3, 3, 4, 4]]
-//
-// # tensor 't' is [[[1, 1, 1],
-// #                 [2, 2, 2]],
-// #                [[3, 3, 3],
-// #                 [4, 4, 4]],
-// #                [[5, 5, 5],
-// #                 [6, 6, 6]]]
-// # tensor 't' has shape [3, 2, 3]
-// # pass '[-1]' to flatten 't'
-// reshape(t, [-1]) ==> [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6]
-//
-// # -1 can also be used to infer the shape
-//
-// # -1 is inferred to be 9:
-// reshape(t, [2, -1]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],
-//                          [4, 4, 4, 5, 5, 5, 6, 6, 6]]
-// # -1 is inferred to be 2:
-// reshape(t, [-1, 9]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],
-//                          [4, 4, 4, 5, 5, 5, 6, 6, 6]]
-// # -1 is inferred to be 3:
-// reshape(t, [ 2, -1, 3]) ==> [[[1, 1, 1],
-//                               [2, 2, 2],
-//                               [3, 3, 3]],
-//                              [[4, 4, 4],
-//                               [5, 5, 5],
-//                               [6, 6, 6]]]
-//
-// # tensor 't' is [7]
-// # shape `[]` reshapes to a scalar
-// reshape(t, []) ==> 7
-// ```
-//
-// Arguments:
-//
-//	shape: Defines the shape of the output tensor.
-func Reshape(scope *Scope, tensor tf.Output, shape tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
+// value: Will be printed in the error when anyone tries to differentiate
+// this operation.
+// If not specified, defaults to ""
+func PreventGradientMessage(value string) PreventGradientAttr {
+	return func(m optionalAttr) {
+		m["message"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "Reshape",
-		Input: []tf.Input{
-			tensor, shape,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Checks a tensor for NaN and Inf values.
+// An identity op that triggers an error if a gradient is requested.
 //
-// When run, reports an `InvalidArgument` error if `tensor` has any values
-// that are not a number (NaN) or infinity (Inf). Otherwise, passes `tensor` as-is.
+// When executed in a graph, this op outputs its input tensor as-is.
+//
+// When building ops to compute gradients, the TensorFlow gradient system
+// will return an error when trying to lookup the gradient of this op,
+// because no gradient must ever be registered for this function.  This
+// op exists to prevent subtle bugs from silently returning unimplemented
+// gradients in some corner cases.
 //
 // Arguments:
+//	input: any tensor.
 //
-//	message: Prefix of the error message.
-func CheckNumerics(scope *Scope, tensor tf.Output, message string) (output tf.Output) {
+// Returns the same input tensor.
+func PreventGradient(scope *Scope, input tf.Output, optional ...PreventGradientAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"message": message}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "CheckNumerics",
+		Type: "PreventGradient",
 		Input: []tf.Input{
-			tensor,
+			input,
 		},
 		Attrs: attrs,
 	}
@@ -2255,105 +1808,15 @@ func DebugGradientIdentity(scope *Scope, input tf.Output) (output tf.Output) {
 	return op.Output(0)
 }
 
-// Returns locations of nonzero / true values in a tensor.
-//
-// This operation returns the coordinates of true elements in `condition`. The
-// coordinates are returned in a 2-D tensor where the first dimension (rows)
-// represents the number of true elements, and the second dimension (columns)
-// represents the coordinates of the true elements. Keep in mind, the shape of
-// the output tensor can vary depending on how many true values there are in
-// `condition`. Indices are output in row-major order.
-//
-// For example:
-//
-// ```
-// # 'input' tensor is [[True, False]
-// #                    [True, False]]
-// # 'input' has two true values, so output has two coordinates.
-// # 'input' has rank of 2, so coordinates have two indices.
-// where(input) ==> [[0, 0],
-//                   [1, 0]]
-//
-// # `condition` tensor is [[[True, False]
-// #                     [True, False]]
-// #                    [[False, True]
-// #                     [False, True]]
-// #                    [[False, False]
-// #                     [False, True]]]
-// # 'input' has 5 true values, so output has 5 coordinates.
-// # 'input' has rank of 3, so coordinates have three indices.
-// where(input) ==> [[0, 0, 0],
-//                   [0, 1, 0],
-//                   [1, 0, 1],
-//                   [1, 1, 1],
-//                   [2, 1, 1]]
-//
-// # `condition` tensor is [[[1.5,  0.0]
-// #                     [-0.5, 0.0]]
-// #                    [[0.0,  0.25]
-// #                     [0.0,  0.75]]
-// #                    [[0.0,  0.0]
-// #                     [0.0,  0.01]]]
-// # 'input' has 5 nonzero values, so output has 5 coordinates.
-// # 'input' has rank of 3, so coordinates have three indices.
-// where(input) ==> [[0, 0, 0],
-//                   [0, 1, 0],
-//                   [1, 0, 1],
-//                   [1, 1, 1],
-//                   [2, 1, 1]]
-//
-// # `condition` tensor is [[[1.5 + 0.0j, 0.0  + 0.0j]
-// #                     [0.0 + 0.5j, 0.0  + 0.0j]]
-// #                    [[0.0 + 0.0j, 0.25 + 1.5j]
-// #                     [0.0 + 0.0j, 0.75 + 0.0j]]
-// #                    [[0.0 + 0.0j, 0.0  + 0.0j]
-// #                     [0.0 + 0.0j, 0.01 + 0.0j]]]
-// # 'input' has 5 nonzero magnitude values, so output has 5 coordinates.
-// # 'input' has rank of 3, so coordinates have three indices.
-// where(input) ==> [[0, 0, 0],
-//                   [0, 1, 0],
-//                   [1, 0, 1],
-//                   [1, 1, 1],
-//                   [2, 1, 1]]
-// ```
-func Where(scope *Scope, condition tf.Output) (index tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Where",
-		Input: []tf.Input{
-			condition,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns a copy of the input tensor.
-func Snapshot(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Snapshot",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Gather slices from `params` into a Tensor with shape specified by `indices`.
 //
-// `indices` is an K-dimensional integer tensor, best thought of as a
+// `indices` is a K-dimensional integer tensor, best thought of as a
 // (K-1)-dimensional tensor of indices into `params`, where each element defines a
 // slice of `params`:
 //
 //     output[\\(i_0, ..., i_{K-2}\\)] = params[indices[\\(i_0, ..., i_{K-2}\\)]]
 //
-// Whereas in `tf.gather` `indices` defines slices into the first
+// Whereas in `tf.gather` `indices` defines slices into the `axis`
 // dimension of `params`, in `tf.gather_nd`, `indices` defines slices into the
 // first `N` dimensions of `params`, where `N = indices.shape[-1]`.
 //
@@ -2472,74 +1935,57 @@ func GatherNd(scope *Scope, params tf.Output, indices tf.Output) (output tf.Outp
 	return op.Output(0)
 }
 
-// EditDistanceAttr is an optional argument to EditDistance.
-type EditDistanceAttr func(optionalAttr)
+// GatherV2Attr is an optional argument to GatherV2.
+type GatherV2Attr func(optionalAttr)
 
-// EditDistanceNormalize sets the optional normalize attribute to value.
-//
-// value: boolean (if true, edit distances are normalized by length of truth).
-//
-// The output is:
-// If not specified, defaults to true
-func EditDistanceNormalize(value bool) EditDistanceAttr {
+// GatherV2BatchDims sets the optional batch_dims attribute to value.
+// If not specified, defaults to 0
+func GatherV2BatchDims(value int64) GatherV2Attr {
 	return func(m optionalAttr) {
-		m["normalize"] = value
+		m["batch_dims"] = value
 	}
 }
 
-// Computes the (possibly normalized) Levenshtein Edit Distance.
+// Gather slices from `params` axis `axis` according to `indices`.
 //
-// The inputs are variable-length sequences provided by SparseTensors
-//   (hypothesis_indices, hypothesis_values, hypothesis_shape)
-// and
-//   (truth_indices, truth_values, truth_shape).
+// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
+// Produces an output tensor with shape `params.shape[:axis] + indices.shape +
+// params.shape[axis + 1:]` where:
 //
-// The inputs are:
+// ```python
+//     # Scalar indices (output is rank(params) - 1).
+//     output[a_0, ..., a_n, b_0, ..., b_n] =
+//       params[a_0, ..., a_n, indices, b_0, ..., b_n]
+//
+//     # Vector indices (output is rank(params)).
+//     output[a_0, ..., a_n, i, b_0, ..., b_n] =
+//       params[a_0, ..., a_n, indices[i], b_0, ..., b_n]
+//
+//     # Higher rank indices (output is rank(params) + rank(indices) - 1).
+//     output[a_0, ..., a_n, i, ..., j, b_0, ... b_n] =
+//       params[a_0, ..., a_n, indices[i, ..., j], b_0, ..., b_n]
+// ```
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/Gather.png" alt>
+// </div>
+//
+// Note that on CPU, if an out of bound index is found, an error is returned.
+// On GPU, if an out of bound index is found, a 0 is stored in the
+// corresponding output value.
+//
+// See also `tf.batch_gather` and `tf.gather_nd`.
 //
 // Arguments:
-//	hypothesis_indices: The indices of the hypothesis list SparseTensor.
-// This is an N x R int64 matrix.
-//	hypothesis_values: The values of the hypothesis list SparseTensor.
-// This is an N-length vector.
-//	hypothesis_shape: The shape of the hypothesis list SparseTensor.
-// This is an R-length vector.
-//	truth_indices: The indices of the truth list SparseTensor.
-// This is an M x R int64 matrix.
-//	truth_values: The values of the truth list SparseTensor.
-// This is an M-length vector.
-//	truth_shape: truth indices, vector.
+//	params: The tensor from which to gather values. Must be at least rank
+// `axis + 1`.
+//	indices: Index tensor. Must be in range `[0, params.shape[axis])`.
+//	axis: The axis in `params` to gather `indices` from. Defaults to the first
+// dimension. Supports negative indexes.
 //
-// Returns A dense float tensor with rank R - 1.
-//
-// For the example input:
-//
-//     // hypothesis represents a 2x1 matrix with variable-length values:
-//     //   (0,0) = ["a"]
-//     //   (1,0) = ["b"]
-//     hypothesis_indices = [[0, 0, 0],
-//                           [1, 0, 0]]
-//     hypothesis_values = ["a", "b"]
-//     hypothesis_shape = [2, 1, 1]
-//
-//     // truth represents a 2x2 matrix with variable-length values:
-//     //   (0,0) = []
-//     //   (0,1) = ["a"]
-//     //   (1,0) = ["b", "c"]
-//     //   (1,1) = ["a"]
-//     truth_indices = [[0, 1, 0],
-//                      [1, 0, 0],
-//                      [1, 0, 1],
-//                      [1, 1, 0]]
-//     truth_values = ["a", "b", "c", "a"]
-//     truth_shape = [2, 2, 2]
-//     normalize = true
-//
-// The output will be:
-//
-//     // output is a 2x2 matrix with edit distances normalized by truth lengths.
-//     output = [[inf, 1.0],  // (0,0): no truth, (0,1): no hypothesis
-//               [0.5, 1.0]]  // (1,0): addition, (1,1): no hypothesis
-func EditDistance(scope *Scope, hypothesis_indices tf.Output, hypothesis_values tf.Output, hypothesis_shape tf.Output, truth_indices tf.Output, truth_values tf.Output, truth_shape tf.Output, optional ...EditDistanceAttr) (output tf.Output) {
+// Returns Values from `params` gathered from indices given by `indices`, with
+// shape `params.shape[:axis] + indices.shape + params.shape[axis + 1:]`.
+func GatherV2(scope *Scope, params tf.Output, indices tf.Output, axis tf.Output, optional ...GatherV2Attr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -2548,9 +1994,9 @@ func EditDistance(scope *Scope, hypothesis_indices tf.Output, hypothesis_values
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "EditDistance",
+		Type: "GatherV2",
 		Input: []tf.Input{
-			hypothesis_indices, hypothesis_values, hypothesis_shape, truth_indices, truth_values, truth_shape,
+			params, indices, axis,
 		},
 		Attrs: attrs,
 	}
@@ -2627,231 +2073,6 @@ func ReverseV2(scope *Scope, tensor tf.Output, axis tf.Output) (output tf.Output
 	return op.Output(0)
 }
 
-// Reverses specific dimensions of a tensor.
-//
-// Given a `tensor`, and a `bool` tensor `dims` representing the dimensions
-// of `tensor`, this operation reverses each dimension i of `tensor` where
-// `dims[i]` is `True`.
-//
-// `tensor` can have up to 8 dimensions. The number of dimensions
-// of `tensor` must equal the number of elements in `dims`. In other words:
-//
-// `rank(tensor) = size(dims)`
-//
-// For example:
-//
-// ```
-// # tensor 't' is [[[[ 0,  1,  2,  3],
-// #                  [ 4,  5,  6,  7],
-// #                  [ 8,  9, 10, 11]],
-// #                 [[12, 13, 14, 15],
-// #                  [16, 17, 18, 19],
-// #                  [20, 21, 22, 23]]]]
-// # tensor 't' shape is [1, 2, 3, 4]
-//
-// # 'dims' is [False, False, False, True]
-// reverse(t, dims) ==> [[[[ 3,  2,  1,  0],
-//                         [ 7,  6,  5,  4],
-//                         [ 11, 10, 9, 8]],
-//                        [[15, 14, 13, 12],
-//                         [19, 18, 17, 16],
-//                         [23, 22, 21, 20]]]]
-//
-// # 'dims' is [False, True, False, False]
-// reverse(t, dims) ==> [[[[12, 13, 14, 15],
-//                         [16, 17, 18, 19],
-//                         [20, 21, 22, 23]
-//                        [[ 0,  1,  2,  3],
-//                         [ 4,  5,  6,  7],
-//                         [ 8,  9, 10, 11]]]]
-//
-// # 'dims' is [False, False, True, False]
-// reverse(t, dims) ==> [[[[8, 9, 10, 11],
-//                         [4, 5, 6, 7],
-//                         [0, 1, 2, 3]]
-//                        [[20, 21, 22, 23],
-//                         [16, 17, 18, 19],
-//                         [12, 13, 14, 15]]]]
-// ```
-//
-// Arguments:
-//	tensor: Up to 8-D.
-//	dims: 1-D. The dimensions to reverse.
-//
-// Returns The same shape as `tensor`.
-func Reverse(scope *Scope, tensor tf.Output, dims tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Reverse",
-		Input: []tf.Input{
-			tensor, dims,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Copy a tensor setting everything outside a central band in each innermost matrix
-//
-// to zero.
-//
-// The `band` part is computed as follows:
-// Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a
-// tensor with the same shape where
-//
-// `band[i, j, k, ..., m, n] = in_band(m, n) * input[i, j, k, ..., m, n]`.
-//
-// The indicator function
-//
-// `in_band(m, n) = (num_lower < 0 || (m-n) <= num_lower)) &&
-//                  (num_upper < 0 || (n-m) <= num_upper)`.
-//
-// For example:
-//
-// ```
-// # if 'input' is [[ 0,  1,  2, 3]
-//                  [-1,  0,  1, 2]
-//                  [-2, -1,  0, 1]
-//                  [-3, -2, -1, 0]],
-//
-// tf.matrix_band_part(input, 1, -1) ==> [[ 0,  1,  2, 3]
-//                                        [-1,  0,  1, 2]
-//                                        [ 0, -1,  0, 1]
-//                                        [ 0,  0, -1, 0]],
-//
-// tf.matrix_band_part(input, 2, 1) ==> [[ 0,  1,  0, 0]
-//                                       [-1,  0,  1, 0]
-//                                       [-2, -1,  0, 1]
-//                                       [ 0, -2, -1, 0]]
-// ```
-//
-// Useful special cases:
-//
-// ```
-//  tf.matrix_band_part(input, 0, -1) ==> Upper triangular part.
-//  tf.matrix_band_part(input, -1, 0) ==> Lower triangular part.
-//  tf.matrix_band_part(input, 0, 0) ==> Diagonal.
-// ```
-//
-// Arguments:
-//	input: Rank `k` tensor.
-//	num_lower: 0-D tensor. Number of subdiagonals to keep. If negative, keep entire
-// lower triangle.
-//	num_upper: 0-D tensor. Number of superdiagonals to keep. If negative, keep
-// entire upper triangle.
-//
-// Returns Rank `k` tensor of the same shape as input. The extracted banded tensor.
-func MatrixBandPart(scope *Scope, input tf.Output, num_lower tf.Output, num_upper tf.Output) (band tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MatrixBandPart",
-		Input: []tf.Input{
-			input, num_lower, num_upper,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the batched diagonal part of a batched tensor.
-//
-// Returns a tensor with the `k[0]`-th to `k[1]`-th diagonals of the batched
-// `input`.
-//
-// Assume `input` has `r` dimensions `[I, J, ..., L, M, N]`.
-// Let `max_diag_len` be the maximum length among all diagonals to be extracted,
-// `max_diag_len = min(M + min(k[1], 0), N + min(-k[0], 0))`
-// Let `num_diags` be the number of diagonals to extract,
-// `num_diags = k[1] - k[0] + 1`.
-//
-// If `num_diags == 1`, the output tensor is of rank `r - 1` with shape
-// `[I, J, ..., L, max_diag_len]` and values:
-//
-// ```
-// diagonal[i, j, ..., l, n]
-//   = input[i, j, ..., l, n+y, n+x] ; when 0 <= n-y < M and 0 <= n-x < N,
-//     0                             ; otherwise.
-// ```
-// where `y = max(-k[1], 0)`, `x = max(k[1], 0)`.
-//
-// Otherwise, the output tensor has rank `r` with dimensions
-// `[I, J, ..., L, num_diags, max_diag_len]` with values:
-//
-// ```
-// diagonal[i, j, ..., l, m, n]
-//   = input[i, j, ..., l, n+y, n+x] ; when 0 <= n-y < M and 0 <= n-x < N,
-//     0                             ; otherwise.
-// ```
-// where `d = k[1] - m`, `y = max(-d, 0)`, and `x = max(d, 0)`.
-//
-// The input must be at least a matrix.
-//
-// For example:
-//
-// ```
-// input = np.array([[[1, 2, 3, 4],  # Input shape: (2, 3, 4)
-//                    [5, 6, 7, 8],
-//                    [9, 8, 7, 6]],
-//                   [[5, 4, 3, 2],
-//                    [1, 2, 3, 4],
-//                    [5, 6, 7, 8]]])
-//
-// # A main diagonal from each batch.
-// tf.matrix_diag_part(input) ==> [[1, 6, 7],  # Output shape: (2, 3)
-//                                 [5, 2, 7]]
-//
-// # A superdiagonal from each batch.
-// tf.matrix_diag_part(input, k = 1)
-//   ==> [[2, 7, 6],  # Output shape: (2, 3)
-//        [4, 3, 8]]
-//
-// # A tridiagonal band from each batch.
-// tf.matrix_diag_part(input, k = (-1, 1))
-//   ==> [[[2, 7, 6],  # Output shape: (2, 3, 3)
-//         [1, 6, 7],
-//         [5, 8, 0]],
-//        [[4, 3, 8],
-//         [5, 2, 7],
-//         [1, 6, 0]]]
-//
-// # Padding = 9
-// tf.matrix_diag_part(input, k = (1, 3), padding = 9)
-//   ==> [[[4, 9, 9],  # Output shape: (2, 3, 3)
-//         [3, 8, 9],
-//         [2, 7, 6]],
-//        [[2, 9, 9],
-//         [3, 4, 9],
-//         [4, 3, 8]]]
-// ```
-//
-// Arguments:
-//	input: Rank `r` tensor where `r >= 2`.
-//	k: Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main
-// diagonal, and negative value means subdiagonals. `k` can be a single integer
-// (for a single diagonal) or a pair of integers specifying the low and high ends
-// of a matrix band. `k[0]` must not be larger than `k[1]`.
-//	padding_value: The value to fill the area outside the specified diagonal band with.
-// Default is 0.
-//
-// Returns The extracted diagonal(s).
-func MatrixDiagPartV2(scope *Scope, input tf.Output, k tf.Output, padding_value tf.Output) (diagonal tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MatrixDiagPartV2",
-		Input: []tf.Input{
-			input, k, padding_value,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Returns the batched diagonal part of a batched tensor.
 //
 // This operation returns a tensor with the `diagonal` part
@@ -2902,6 +2123,25 @@ func MatrixDiagPart(scope *Scope, input tf.Output) (diagonal tf.Output) {
 	return op.Output(0)
 }
 
+// MatrixSetDiagV3Attr is an optional argument to MatrixSetDiagV3.
+type MatrixSetDiagV3Attr func(optionalAttr)
+
+// MatrixSetDiagV3Align sets the optional align attribute to value.
+//
+// value: Some diagonals are shorter than `max_diag_len` and need to be padded. `align` is
+// a string specifying how superdiagonals and subdiagonals should be aligned,
+// respectively. There are four possible alignments: "RIGHT_LEFT" (default),
+// "LEFT_RIGHT", "LEFT_LEFT", and "RIGHT_RIGHT". "RIGHT_LEFT" aligns superdiagonals
+// to the right (left-pads the row) and subdiagonals to the left (right-pads the
+// row). It is the packing format LAPACK uses. cuSPARSE uses "LEFT_RIGHT", which is
+// the opposite alignment.
+// If not specified, defaults to "RIGHT_LEFT"
+func MatrixSetDiagV3Align(value string) MatrixSetDiagV3Attr {
+	return func(m optionalAttr) {
+		m["align"] = value
+	}
+}
+
 // Returns a batched matrix tensor with new batched diagonal values.
 //
 // Given `input` and `diagonal`, this operation returns a tensor with the
@@ -2921,17 +2161,28 @@ func MatrixDiagPart(scope *Scope, input tf.Output) (diagonal tf.Output) {
 // ```
 // output[i, j, ..., l, m, n]
 //   = diagonal[i, j, ..., l, n-max(k[1], 0)] ; if n - m == k[1]
-//     output[i, j, ..., l, m, n]             ; otherwise
+//     input[i, j, ..., l, m, n]              ; otherwise
 // ```
 //
 // Otherwise,
 //
 // ```
 // output[i, j, ..., l, m, n]
-//   = diagonal[i, j, ..., l, k[1]-d, n-max(d, 0)] ; if d_lower <= d <= d_upper
-//     input[i, j, ..., l, m, n]                   ; otherwise
+//   = diagonal[i, j, ..., l, diag_index, index_in_diag] ; if k[0] <= d <= k[1]
+//     input[i, j, ..., l, m, n]                         ; otherwise
 // ```
-// where `d = n - m`
+// where `d = n - m`, `diag_index = k[1] - d`, and
+// `index_in_diag = n - max(d, 0) + offset`.
+//
+// `offset` is zero except when the alignment of the diagonal is to the right.
+// ```
+// offset = max_diag_len - diag_len(d) ; if (`align` in {RIGHT_LEFT, RIGHT_RIGHT}
+//                                            and `d >= 0`) or
+//                                          (`align` in {LEFT_RIGHT, RIGHT_RIGHT}
+//                                            and `d <= 0`)
+//          0                          ; otherwise
+// ```
+// where `diag_len(d) = min(cols - max(d, 0), rows + min(d, 0))`.
 //
 // For example:
 //
@@ -2945,15 +2196,140 @@ func MatrixDiagPart(scope *Scope, input tf.Output) (diagonal tf.Output) {
 //                    [7, 7, 7, 7]]])
 // diagonal = np.array([[1, 2, 3],               # Diagonal shape: (2, 3)
 //                      [4, 5, 6]])
-// tf.matrix_diag(diagonal) ==> [[[1, 7, 7, 7],  # Output shape: (2, 3, 4)
-//                                [7, 2, 7, 7],
-//                                [7, 7, 3, 7]],
-//                               [[4, 7, 7, 7],
-//                                [7, 5, 7, 7],
-//                                [7, 7, 6, 7]]]
+// tf.matrix_set_diag(input, diagonal)
+//   ==> [[[1, 7, 7, 7],  # Output shape: (2, 3, 4)
+//         [7, 2, 7, 7],
+//         [7, 7, 3, 7]],
+//        [[4, 7, 7, 7],
+//         [7, 5, 7, 7],
+//         [7, 7, 6, 7]]]
 //
 // # A superdiagonal (per batch).
-// tf.matrix_diag(diagonal, k = 1)
+// tf.matrix_set_diag(input, diagonal, k = 1)
+//   ==> [[[7, 1, 7, 7],  # Output shape: (2, 3, 4)
+//         [7, 7, 2, 7],
+//         [7, 7, 7, 3]],
+//        [[7, 4, 7, 7],
+//         [7, 7, 5, 7],
+//         [7, 7, 7, 6]]]
+//
+// # A band of diagonals.
+// diagonals = np.array([[[0, 9, 1],  # Diagonal shape: (2, 4, 3)
+//                        [6, 5, 8],
+//                        [1, 2, 3],
+//                        [4, 5, 0]],
+//                       [[0, 1, 2],
+//                        [5, 6, 4],
+//                        [6, 1, 2],
+//                        [3, 4, 0]]])
+// tf.matrix_set_diag(input, diagonals, k = (-1, 2))
+//   ==> [[[1, 6, 9, 7],  # Output shape: (2, 3, 4)
+//         [4, 2, 5, 1],
+//         [7, 5, 3, 8]],
+//        [[6, 5, 1, 7],
+//         [3, 1, 6, 2],
+//         [7, 4, 2, 4]]]
+//
+// # LEFT_RIGHT alignment.
+// diagonals = np.array([[[9, 1, 0],  # Diagonal shape: (2, 4, 3)
+//                        [6, 5, 8],
+//                        [1, 2, 3],
+//                        [0, 4, 5]],
+//                       [[1, 2, 0],
+//                        [5, 6, 4],
+//                        [6, 1, 2],
+//                        [0, 3, 4]]])
+// tf.matrix_set_diag(input, diagonals, k = (-1, 2), align="LEFT_RIGHT")
+//   ==> [[[1, 6, 9, 7],  # Output shape: (2, 3, 4)
+//         [4, 2, 5, 1],
+//         [7, 5, 3, 8]],
+//        [[6, 5, 1, 7],
+//         [3, 1, 6, 2],
+//         [7, 4, 2, 4]]]
+//
+// ```
+//
+// Arguments:
+//	input: Rank `r+1`, where `r >= 1`.
+//	diagonal: Rank `r` when `k` is an integer or `k[0] == k[1]`. Otherwise, it has rank `r+1`.
+// `k >= 1`.
+//	k: Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main
+// diagonal, and negative value means subdiagonals. `k` can be a single integer
+// (for a single diagonal) or a pair of integers specifying the low and high ends
+// of a matrix band. `k[0]` must not be larger than `k[1]`.
+//
+// Returns Rank `r+1`, with `output.shape = input.shape`.
+func MatrixSetDiagV3(scope *Scope, input tf.Output, diagonal tf.Output, k tf.Output, optional ...MatrixSetDiagV3Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixSetDiagV3",
+		Input: []tf.Input{
+			input, diagonal, k,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns a batched matrix tensor with new batched diagonal values.
+//
+// Given `input` and `diagonal`, this operation returns a tensor with the
+// same shape and values as `input`, except for the specified diagonals of the
+// innermost matrices. These will be overwritten by the values in `diagonal`.
+//
+// `input` has `r+1` dimensions `[I, J, ..., L, M, N]`. When `k` is scalar or
+// `k[0] == k[1]`, `diagonal` has `r` dimensions `[I, J, ..., L, max_diag_len]`.
+// Otherwise, it has `r+1` dimensions `[I, J, ..., L, num_diags, max_diag_len]`.
+// `num_diags` is the number of diagonals, `num_diags = k[1] - k[0] + 1`.
+// `max_diag_len` is the longest diagonal in the range `[k[0], k[1]]`,
+// `max_diag_len = min(M + min(k[1], 0), N + min(-k[0], 0))`
+//
+// The output is a tensor of rank `k+1` with dimensions `[I, J, ..., L, M, N]`.
+// If `k` is scalar or `k[0] == k[1]`:
+//
+// ```
+// output[i, j, ..., l, m, n]
+//   = diagonal[i, j, ..., l, n-max(k[1], 0)] ; if n - m == k[1]
+//     input[i, j, ..., l, m, n]              ; otherwise
+// ```
+//
+// Otherwise,
+//
+// ```
+// output[i, j, ..., l, m, n]
+//   = diagonal[i, j, ..., l, diag_index, index_in_diag] ; if k[0] <= d <= k[1]
+//     input[i, j, ..., l, m, n]                         ; otherwise
+// ```
+// where `d = n - m`, `diag_index = k[1] - d`, and `index_in_diag = n - max(d, 0)`.
+//
+// For example:
+//
+// ```
+// # The main diagonal.
+// input = np.array([[[7, 7, 7, 7],              # Input shape: (2, 3, 4)
+//                    [7, 7, 7, 7],
+//                    [7, 7, 7, 7]],
+//                   [[7, 7, 7, 7],
+//                    [7, 7, 7, 7],
+//                    [7, 7, 7, 7]]])
+// diagonal = np.array([[1, 2, 3],               # Diagonal shape: (2, 3)
+//                      [4, 5, 6]])
+// tf.matrix_set_diag(diagonal) ==> [[[1, 7, 7, 7],  # Output shape: (2, 3, 4)
+//                                    [7, 2, 7, 7],
+//                                    [7, 7, 3, 7]],
+//                                   [[4, 7, 7, 7],
+//                                    [7, 5, 7, 7],
+//                                    [7, 7, 6, 7]]]
+//
+// # A superdiagonal (per batch).
+// tf.matrix_set_diag(diagonal, k = 1)
 //   ==> [[[7, 1, 7, 7],  # Output shape: (2, 3, 4)
 //         [7, 7, 2, 7],
 //         [7, 7, 7, 3]],
@@ -2966,7 +2342,7 @@ func MatrixDiagPart(scope *Scope, input tf.Output) (diagonal tf.Output) {
 //                        [4, 5, 0]],
 //                       [[6, 1, 2],
 //                        [3, 4, 0]]])
-// tf.matrix_diag(diagonals, k = (-1, 0))
+// tf.matrix_set_diag(diagonals, k = (-1, 0))
 //   ==> [[[1, 7, 7, 7],  # Output shape: (2, 3, 4)
 //         [4, 2, 7, 7],
 //         [0, 5, 3, 7]],
@@ -3034,53 +2410,6 @@ func MatrixSetDiag(scope *Scope, input tf.Output, diagonal tf.Output) (output tf
 	return op.Output(0)
 }
 
-// Returns a batched diagonal tensor with a given batched diagonal values.
-//
-// Given a `diagonal`, this operation returns a tensor with the `diagonal` and
-// everything else padded with zeros. The diagonal is computed as follows:
-//
-// Assume `diagonal` has `k` dimensions `[I, J, K, ..., N]`, then the output is a
-// tensor of rank `k+1` with dimensions [I, J, K, ..., N, N]` where:
-//
-// `output[i, j, k, ..., m, n] = 1{m=n} * diagonal[i, j, k, ..., n]`.
-//
-// For example:
-//
-// ```
-// # 'diagonal' is [[1, 2, 3, 4], [5, 6, 7, 8]]
-//
-// and diagonal.shape = (2, 4)
-//
-// tf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0]
-//                                      [0, 2, 0, 0]
-//                                      [0, 0, 3, 0]
-//                                      [0, 0, 0, 4]],
-//                                     [[5, 0, 0, 0]
-//                                      [0, 6, 0, 0]
-//                                      [0, 0, 7, 0]
-//                                      [0, 0, 0, 8]]]
-//
-// which has shape (2, 4, 4)
-// ```
-//
-// Arguments:
-//	diagonal: Rank `k`, where `k >= 1`.
-//
-// Returns Rank `k+1`, with `output.shape = diagonal.shape + [diagonal.shape[-1]]`.
-func MatrixDiag(scope *Scope, diagonal tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MatrixDiag",
-		Input: []tf.Input{
-			diagonal,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Returns a diagonal tensor with a given diagonal values.
 //
 // Given a `diagonal`, this operation returns a tensor with the `diagonal` and
@@ -3235,6 +2564,30 @@ func ConcatOffset(scope *Scope, concat_dim tf.Output, shape []tf.Output) (offset
 	return offset
 }
 
+// Checks a tensor for NaN and Inf values.
+//
+// When run, reports an `InvalidArgument` error if `tensor` has any values
+// that are not a number (NaN) or infinity (Inf). Otherwise, passes `tensor` as-is.
+//
+// Arguments:
+//
+//	message: Prefix of the error message.
+func CheckNumerics(scope *Scope, tensor tf.Output, message string) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"message": message}
+	opspec := tf.OpSpec{
+		Type: "CheckNumerics",
+		Input: []tf.Input{
+			tensor,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Broadcast an array for a compatible shape.
 //
 // Broadcasting is the process of making arrays to have compatible shapes
@@ -3245,14 +2598,13 @@ func ConcatOffset(scope *Scope, concat_dim tf.Output, shape []tf.Output) (offset
 //
 // For example,
 //
-// ```python
 // >>> x = tf.constant([1, 2, 3])
 // >>> y = tf.broadcast_to(x, [3, 3])
-// >>> sess.run(y)
-// array([[1, 2, 3],
-//        [1, 2, 3],
-//        [1, 2, 3]], dtype=int32)
-// ```
+// >>> print(y)
+// tf.Tensor(
+//     [[1 2 3]
+//      [1 2 3]
+//      [1 2 3]], shape=(3, 3), dtype=int32)
 //
 // In the above example, the input Tensor with the shape of `[1, 3]`
 // is broadcasted to output Tensor with shape of `[3, 3]`.
@@ -3276,6 +2628,51 @@ func BroadcastTo(scope *Scope, input tf.Output, shape tf.Output) (output tf.Outp
 	return op.Output(0)
 }
 
+// Converts an array of flat indices into a tuple of coordinate arrays.
+//
+//
+// Example:
+//
+// ```
+// y = tf.unravel_index(indices=[2, 5, 7], dims=[3, 3])
+// # 'dims' represent a hypothetical (3, 3) tensor of indices:
+// # [[0, 1, *2*],
+// #  [3, 4, *5*],
+// #  [6, *7*, 8]]
+// # For each entry from 'indices', this operation returns
+// # its coordinates (marked with '*'), such as
+// # 2 ==> (0, 2)
+// # 5 ==> (1, 2)
+// # 7 ==> (2, 1)
+// y ==> [[0, 1, 2], [2, 2, 1]]
+// ```
+//
+// @compatibility(numpy)
+// Equivalent to np.unravel_index
+// @end_compatibility
+//
+// Arguments:
+//	indices: An 0-D or 1-D `int` Tensor whose elements are indices into the
+// flattened version of an array of dimensions dims.
+//	dims: An 1-D `int` Tensor. The shape of the array to use for unraveling
+// indices.
+//
+// Returns An 2-D (or 1-D if indices is 0-D) tensor where each row has the
+// same shape as the indices array.
+func UnravelIndex(scope *Scope, indices tf.Output, dims tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "UnravelIndex",
+		Input: []tf.Input{
+			indices, dims,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // EmptyAttr is an optional argument to Empty.
 type EmptyAttr func(optionalAttr)
 
@@ -3341,6 +2738,30 @@ func InplaceSub(scope *Scope, x tf.Output, i tf.Output, v tf.Output) (y tf.Outpu
 	return op.Output(0)
 }
 
+//     Adds v into specified rows of x.
+//
+//     Computes y = x; y[i, :] += v; return y.
+//
+// Arguments:
+//	x: A `Tensor` of type T.
+//	i: A vector. Indices into the left-most dimension of `x`.
+//	v: A `Tensor` of type T. Same dimension sizes as x except the first dimension, which must be the same as i's size.
+//
+// Returns A `Tensor` of type T. An alias of `x`. The content of `y` is undefined if there are duplicates in `i`.
+func InplaceAdd(scope *Scope, x tf.Output, i tf.Output, v tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "InplaceAdd",
+		Input: []tf.Input{
+			x, i, v,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Makes a copy of `x`.
 //
 // Arguments:
@@ -3362,39 +2783,56 @@ func DeepCopy(scope *Scope, x tf.Output) (y tf.Output) {
 	return op.Output(0)
 }
 
-// Concatenates a list of `N` tensors along the first dimension.
+// PackAttr is an optional argument to Pack.
+type PackAttr func(optionalAttr)
+
+// PackAxis sets the optional axis attribute to value.
 //
-// The input tensors are all required to have size 1 in the first dimension.
+// value: Dimension along which to pack.  Negative values wrap around, so the
+// valid range is `[-(R+1), R+1)`.
+// If not specified, defaults to 0
+func PackAxis(value int64) PackAttr {
+	return func(m optionalAttr) {
+		m["axis"] = value
+	}
+}
+
+// Packs a list of `N` rank-`R` tensors into one rank-`(R+1)` tensor.
+//
+// Packs the `N` tensors in `values` into a tensor with rank one higher than each
+// tensor in `values`, by packing them along the `axis` dimension.
+// Given a list of tensors of shape `(A, B, C)`;
+//
+// if `axis == 0` then the `output` tensor will have the shape `(N, A, B, C)`.
+// if `axis == 1` then the `output` tensor will have the shape `(A, N, B, C)`.
+// Etc.
 //
 // For example:
 //
 // ```
-// # 'x' is [[1, 4]]
-// # 'y' is [[2, 5]]
-// # 'z' is [[3, 6]]
-// parallel_concat([x, y, z]) => [[1, 4], [2, 5], [3, 6]]  # Pack along first dim.
+// # 'x' is [1, 4]
+// # 'y' is [2, 5]
+// # 'z' is [3, 6]
+// pack([x, y, z]) => [[1, 4], [2, 5], [3, 6]]  # Pack along first dim.
+// pack([x, y, z], axis=1) => [[1, 2, 3], [4, 5, 6]]
 // ```
 //
-// The difference between concat and parallel_concat is that concat requires all
-// of the inputs be computed before the operation will begin but doesn't require
-// that the input shapes be known during graph construction.  Parallel concat
-// will copy pieces of the input into the output as they become available, in
-// some situations this can provide a performance benefit.
+// This is the opposite of `unpack`.
 //
 // Arguments:
-//	values: Tensors to be concatenated. All must have size 1 in the first dimension
-// and same shape.
-//	shape: the final shape of the result; should be equal to the shapes of any input
-// but with the number of input values in the first dimension.
+//	values: Must be of same shape and type.
 //
-// Returns The concatenated tensor.
-func ParallelConcat(scope *Scope, values []tf.Output, shape tf.Shape) (output tf.Output) {
+// Returns The packed tensor.
+func Pack(scope *Scope, values []tf.Output, optional ...PackAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"shape": shape}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "ParallelConcat",
+		Type: "Pack",
 		Input: []tf.Input{
 			tf.OutputList(values),
 		},
@@ -3404,49 +2842,6 @@ func ParallelConcat(scope *Scope, values []tf.Output, shape tf.Shape) (output tf
 	return op.Output(0)
 }
 
-// Creates a tensor filled with a scalar value.
-//
-// This operation creates a tensor of shape `dims` and fills it with `value`.
-//
-// For example:
-//
-// ```
-// # Output tensor has shape [2, 3].
-// fill([2, 3], 9) ==> [[9, 9, 9]
-//                      [9, 9, 9]]
-// ```
-//
-// `tf.fill` differs from `tf.constant` in a few ways:
-//
-// *   `tf.fill` only supports scalar contents, whereas `tf.constant` supports
-//     Tensor values.
-// *   `tf.fill` creates an Op in the computation graph that constructs the actual
-//     Tensor value at runtime. This is in contrast to `tf.constant` which embeds
-//     the entire Tensor into the graph with a `Const` node.
-// *   Because `tf.fill` evaluates at graph runtime, it supports dynamic shapes
-//     based on other runtime Tensors, unlike `tf.constant`.
-//
-// Arguments:
-//	dims: 1-D. Represents the shape of the output tensor.
-//	value: 0-D (scalar). Value to fill the returned tensor.
-//
-// @compatibility(numpy)
-// Equivalent to np.full
-// @end_compatibility
-func Fill(scope *Scope, dims tf.Output, value tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Fill",
-		Input: []tf.Input{
-			dims, value,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // MfccAttr is an optional argument to Mfcc.
 type MfccAttr func(optionalAttr)
 
@@ -3524,6 +2919,139 @@ func Mfcc(scope *Scope, spectrogram tf.Output, sample_rate tf.Output, optional .
 	return op.Output(0)
 }
 
+// AudioSpectrogramAttr is an optional argument to AudioSpectrogram.
+type AudioSpectrogramAttr func(optionalAttr)
+
+// AudioSpectrogramMagnitudeSquared sets the optional magnitude_squared attribute to value.
+//
+// value: Whether to return the squared magnitude or just the
+// magnitude. Using squared magnitude can avoid extra calculations.
+// If not specified, defaults to false
+func AudioSpectrogramMagnitudeSquared(value bool) AudioSpectrogramAttr {
+	return func(m optionalAttr) {
+		m["magnitude_squared"] = value
+	}
+}
+
+// Produces a visualization of audio data over time.
+//
+// Spectrograms are a standard way of representing audio information as a series of
+// slices of frequency information, one slice for each window of time. By joining
+// these together into a sequence, they form a distinctive fingerprint of the sound
+// over time.
+//
+// This op expects to receive audio data as an input, stored as floats in the range
+// -1 to 1, together with a window width in samples, and a stride specifying how
+// far to move the window between slices. From this it generates a three
+// dimensional output. The first dimension is for the channels in the input, so a
+// stereo audio input would have two here for example. The second dimension is time,
+// with successive frequency slices. The third dimension has an amplitude value for
+// each frequency during that time slice.
+//
+// This means the layout when converted and saved as an image is rotated 90 degrees
+// clockwise from a typical spectrogram. Time is descending down the Y axis, and
+// the frequency decreases from left to right.
+//
+// Each value in the result represents the square root of the sum of the real and
+// imaginary parts of an FFT on the current window of samples. In this way, the
+// lowest dimension represents the power of each frequency in the current window,
+// and adjacent windows are concatenated in the next dimension.
+//
+// To get a more intuitive and visual look at what this operation does, you can run
+// tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the
+// resulting spectrogram as a PNG image.
+//
+// Arguments:
+//	input: Float representation of audio data.
+//	window_size: How wide the input window is in samples. For the highest efficiency
+// this should be a power of two, but other values are accepted.
+//	stride: How widely apart the center of adjacent sample windows should be.
+//
+// Returns 3D representation of the audio frequencies as an image.
+func AudioSpectrogram(scope *Scope, input tf.Output, window_size int64, stride int64, optional ...AudioSpectrogramAttr) (spectrogram tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"window_size": window_size, "stride": stride}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "AudioSpectrogram",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// DecodeWavAttr is an optional argument to DecodeWav.
+type DecodeWavAttr func(optionalAttr)
+
+// DecodeWavDesiredChannels sets the optional desired_channels attribute to value.
+//
+// value: Number of sample channels wanted.
+// If not specified, defaults to -1
+func DecodeWavDesiredChannels(value int64) DecodeWavAttr {
+	return func(m optionalAttr) {
+		m["desired_channels"] = value
+	}
+}
+
+// DecodeWavDesiredSamples sets the optional desired_samples attribute to value.
+//
+// value: Length of audio requested.
+// If not specified, defaults to -1
+func DecodeWavDesiredSamples(value int64) DecodeWavAttr {
+	return func(m optionalAttr) {
+		m["desired_samples"] = value
+	}
+}
+
+// Decode a 16-bit PCM WAV file to a float tensor.
+//
+// The -32768 to 32767 signed 16-bit values will be scaled to -1.0 to 1.0 in float.
+//
+// When desired_channels is set, if the input contains fewer channels than this
+// then the last channel will be duplicated to give the requested number, else if
+// the input has more channels than requested then the additional channels will be
+// ignored.
+//
+// If desired_samples is set, then the audio will be cropped or padded with zeroes
+// to the requested length.
+//
+// The first output contains a Tensor with the content of the audio samples. The
+// lowest dimension will be the number of channels, and the second will be the
+// number of samples. For example, a ten-sample-long stereo WAV file should give an
+// output shape of [10, 2].
+//
+// Arguments:
+//	contents: The WAV-encoded audio, usually from a file.
+//
+// Returns:
+//	audio: 2-D with shape `[length, channels]`.
+//	sample_rate: Scalar holding the sample rate found in the WAV header.
+func DecodeWav(scope *Scope, contents tf.Output, optional ...DecodeWavAttr) (audio tf.Output, sample_rate tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodeWav",
+		Input: []tf.Input{
+			contents,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
 // UnbatchGradAttr is an optional argument to UnbatchGrad.
 type UnbatchGradAttr func(optionalAttr)
 
@@ -3578,70 +3106,45 @@ func UnbatchGrad(scope *Scope, original_input tf.Output, batch_index tf.Output,
 	return op.Output(0)
 }
 
-// Elementwise computes the bitwise XOR of `x` and `y`.
+// Elementwise computes the bitwise left-shift of `x` and `y`.
 //
-// The result will have those bits set, that are different in `x` and `y`. The
-// computation is performed on the underlying representations of `x` and `y`.
+// If `y` is negative, or greater than or equal to the width of `x` in bits the
+// result is implementation defined.
 //
-// For example:
+// Example:
 //
 // ```python
 // import tensorflow as tf
 // from tensorflow.python.ops import bitwise_ops
-// dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64,
-//               tf.uint8, tf.uint16, tf.uint32, tf.uint64]
+// import numpy as np
+// dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64]
 //
 // for dtype in dtype_list:
-//   lhs = tf.constant([0, 5, 3, 14], dtype=dtype)
+//   lhs = tf.constant([-1, -5, -3, -14], dtype=dtype)
 //   rhs = tf.constant([5, 0, 7, 11], dtype=dtype)
-//   exp = tf.constant([5, 5, 4, 5],  dtype=tf.float32)
 //
-//   res = bitwise_ops.bitwise_xor(lhs, rhs)
-//   tf.assert_equal(tf.cast(res, tf.float32), exp) # TRUE
+//   left_shift_result = bitwise_ops.left_shift(lhs, rhs)
+//
+//   print(left_shift_result)
+//
+// # This will print:
+// # tf.Tensor([ -32   -5 -128    0], shape=(4,), dtype=int8)
+// # tf.Tensor([   -32     -5   -384 -28672], shape=(4,), dtype=int16)
+// # tf.Tensor([   -32     -5   -384 -28672], shape=(4,), dtype=int32)
+// # tf.Tensor([   -32     -5   -384 -28672], shape=(4,), dtype=int64)
+//
+// lhs = np.array([-2, 64, 101, 32], dtype=np.int8)
+// rhs = np.array([-1, -5, -3, -14], dtype=np.int8)
+// bitwise_ops.left_shift(lhs, rhs)
+// # <tf.Tensor: shape=(4,), dtype=int8, numpy=array([ -2,  64, 101,  32], dtype=int8)>
 // ```
 //
-func BitwiseXor(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+func LeftShift(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "BitwiseXor",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Elementwise computes the bitwise AND of `x` and `y`.
-//
-// The result will have those bits set, that are set in both `x` and `y`. The
-// computation is performed on the underlying representations of `x` and `y`.
-//
-// For example:
-//
-// ```python
-// import tensorflow as tf
-// from tensorflow.python.ops import bitwise_ops
-// dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64,
-//               tf.uint8, tf.uint16, tf.uint32, tf.uint64]
-//
-// for dtype in dtype_list:
-//   lhs = tf.constant([0, 5, 3, 14], dtype=dtype)
-//   rhs = tf.constant([5, 0, 7, 11], dtype=dtype)
-//   exp = tf.constant([0, 0, 3, 10], dtype=tf.float32)
-//
-//   res = bitwise_ops.bitwise_and(lhs, rhs)
-//   tf.assert_equal(tf.cast(res, tf.float32), exp) # TRUE
-// ```
-//
-func BitwiseAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BitwiseAnd",
+		Type: "LeftShift",
 		Input: []tf.Input{
 			x, y,
 		},
@@ -3672,25 +3175,256 @@ func PopulationCount(scope *Scope, x tf.Output) (y tf.Output) {
 	return op.Output(0)
 }
 
-// Deserialize bucket boundaries and ready flag into current QuantileAccumulator.
+// Bucketize each feature based on bucket boundaries.
 //
-// An op that deserializes bucket boundaries and are boundaries ready flag into current QuantileAccumulator.
+// An op that returns a list of float tensors, where each tensor represents the
+// bucketized values for a single feature.
 //
 // Arguments:
-//	quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource.
-//	bucket_boundaries: float; List of Rank 1 Tensors each containing the bucket boundaries for a feature.
+//	float_values: float; List of Rank 1 Tensor each containing float values for a single feature.
+//	bucket_boundaries: float; List of Rank 1 Tensors each containing the bucket boundaries for a single
+// feature.
 //
-// Returns the created operation.
-func BoostedTreesQuantileStreamResourceDeserialize(scope *Scope, quantile_stream_resource_handle tf.Output, bucket_boundaries []tf.Output) (o *tf.Operation) {
+// Returns int; List of Rank 1 Tensors each containing the bucketized values for a single feature.
+func BoostedTreesBucketize(scope *Scope, float_values []tf.Output, bucket_boundaries []tf.Output) (buckets []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "BoostedTreesQuantileStreamResourceDeserialize",
+		Type: "BoostedTreesBucketize",
 		Input: []tf.Input{
-			quantile_stream_resource_handle, tf.OutputList(bucket_boundaries),
+			tf.OutputList(float_values), tf.OutputList(bucket_boundaries),
 		},
 	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if buckets, idx, err = makeOutputList(op, idx, "buckets"); err != nil {
+		scope.UpdateErr("BoostedTreesBucketize", err)
+		return
+	}
+	return buckets
+}
+
+// Generate the bucket boundaries for each feature based on accumulated summaries.
+//
+// An op that returns a list of float tensors for a quantile stream resource. Each
+// tensor is Rank 1 containing bucket boundaries for a single feature.
+//
+// Arguments:
+//	quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource.
+//	num_features: inferred int; number of features to get bucket boundaries for.
+//
+// Returns float; List of Rank 1 Tensors each containing the bucket boundaries for a feature.
+func BoostedTreesQuantileStreamResourceGetBucketBoundaries(scope *Scope, quantile_stream_resource_handle tf.Output, num_features int64) (bucket_boundaries []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_features": num_features}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesQuantileStreamResourceGetBucketBoundaries",
+		Input: []tf.Input{
+			quantile_stream_resource_handle,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if bucket_boundaries, idx, err = makeOutputList(op, idx, "bucket_boundaries"); err != nil {
+		scope.UpdateErr("BoostedTreesQuantileStreamResourceGetBucketBoundaries", err)
+		return
+	}
+	return bucket_boundaries
+}
+
+// Returns immutable tensor from memory region.
+//
+// The current implementation memmaps the tensor from a file.
+//
+// Arguments:
+//	dtype: Type of the returned tensor.
+//	shape: Shape of the returned tensor.
+//	memory_region_name: Name of readonly memory region used by the tensor, see
+// NewReadOnlyMemoryRegionFromFile in tensorflow::Env.
+func ImmutableConst(scope *Scope, dtype tf.DataType, shape tf.Shape, memory_region_name string) (tensor tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype, "shape": shape, "memory_region_name": memory_region_name}
+	opspec := tf.OpSpec{
+		Type: "ImmutableConst",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Add the quantile summaries to each quantile stream resource.
+//
+// An op that adds a list of quantile summaries to a quantile stream resource. Each
+// summary Tensor is rank 2, containing summaries (value, weight, min_rank, max_rank)
+// for a single feature.
+//
+// Arguments:
+//	quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource.
+//	summaries: string; List of Rank 2 Tensor each containing the summaries for a single feature.
+//
+// Returns the created operation.
+func BoostedTreesQuantileStreamResourceAddSummaries(scope *Scope, quantile_stream_resource_handle tf.Output, summaries []tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesQuantileStreamResourceAddSummaries",
+		Input: []tf.Input{
+			quantile_stream_resource_handle, tf.OutputList(summaries),
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// BoostedTreesCreateQuantileStreamResourceAttr is an optional argument to BoostedTreesCreateQuantileStreamResource.
+type BoostedTreesCreateQuantileStreamResourceAttr func(optionalAttr)
+
+// BoostedTreesCreateQuantileStreamResourceMaxElements sets the optional max_elements attribute to value.
+//
+// value: int; The maximum number of data points that can be fed to the stream.
+// If not specified, defaults to 1099511627776
+func BoostedTreesCreateQuantileStreamResourceMaxElements(value int64) BoostedTreesCreateQuantileStreamResourceAttr {
+	return func(m optionalAttr) {
+		m["max_elements"] = value
+	}
+}
+
+// Create the Resource for Quantile Streams.
+//
+// Arguments:
+//	quantile_stream_resource_handle: resource; Handle to quantile stream resource.
+//	epsilon: float; The required approximation error of the stream resource.
+//	num_streams: int; The number of streams managed by the resource that shares the same epsilon.
+//
+// Returns the created operation.
+func BoostedTreesCreateQuantileStreamResource(scope *Scope, quantile_stream_resource_handle tf.Output, epsilon tf.Output, num_streams tf.Output, optional ...BoostedTreesCreateQuantileStreamResourceAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesCreateQuantileStreamResource",
+		Input: []tf.Input{
+			quantile_stream_resource_handle, epsilon, num_streams,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// BoostedTreesUpdateEnsembleV2Attr is an optional argument to BoostedTreesUpdateEnsembleV2.
+type BoostedTreesUpdateEnsembleV2Attr func(optionalAttr)
+
+// BoostedTreesUpdateEnsembleV2LogitsDimension sets the optional logits_dimension attribute to value.
+//
+// value: scalar, dimension of the logits
+// If not specified, defaults to 1
+func BoostedTreesUpdateEnsembleV2LogitsDimension(value int64) BoostedTreesUpdateEnsembleV2Attr {
+	return func(m optionalAttr) {
+		m["logits_dimension"] = value
+	}
+}
+
+// Updates the tree ensemble by adding a layer to the last tree being grown
+//
+// or by starting a new tree.
+//
+// Arguments:
+//	tree_ensemble_handle: Handle to the ensemble variable.
+//	feature_ids: Rank 1 tensor with ids for each feature. This is the real id of
+// the feature that will be used in the split.
+//	dimension_ids: List of rank 1 tensors representing the dimension in each feature.
+//	node_ids: List of rank 1 tensors representing the nodes for which this feature
+// has a split.
+//	gains: List of rank 1 tensors representing the gains for each of the feature's
+// split.
+//	thresholds: List of rank 1 tensors representing the thesholds for each of the
+// feature's split.
+//	left_node_contribs: List of rank 2 tensors with left leaf contribs for each of
+// the feature's splits. Will be added to the previous node values to constitute
+// the values of the left nodes.
+//	right_node_contribs: List of rank 2 tensors with right leaf contribs for each
+// of the feature's splits. Will be added to the previous node values to constitute
+// the values of the right nodes.
+//	split_types: List of rank 1 tensors representing the split type for each feature.
+//	max_depth: Max depth of the tree to build.
+//	learning_rate: shrinkage const for each new tree.
+//	pruning_mode: 0-No pruning, 1-Pre-pruning, 2-Post-pruning.
+//
+// Returns the created operation.
+func BoostedTreesUpdateEnsembleV2(scope *Scope, tree_ensemble_handle tf.Output, feature_ids tf.Output, dimension_ids []tf.Output, node_ids []tf.Output, gains []tf.Output, thresholds []tf.Output, left_node_contribs []tf.Output, right_node_contribs []tf.Output, split_types []tf.Output, max_depth tf.Output, learning_rate tf.Output, pruning_mode tf.Output, optional ...BoostedTreesUpdateEnsembleV2Attr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesUpdateEnsembleV2",
+		Input: []tf.Input{
+			tree_ensemble_handle, feature_ids, tf.OutputList(dimension_ids), tf.OutputList(node_ids), tf.OutputList(gains), tf.OutputList(thresholds), tf.OutputList(left_node_contribs), tf.OutputList(right_node_contribs), tf.OutputList(split_types), max_depth, learning_rate, pruning_mode,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Updates the tree ensemble by either adding a layer to the last tree being grown
+//
+// or by starting a new tree.
+//
+// Arguments:
+//	tree_ensemble_handle: Handle to the ensemble variable.
+//	feature_ids: Rank 1 tensor with ids for each feature. This is the real id of
+// the feature that will be used in the split.
+//	node_ids: List of rank 1 tensors representing the nodes for which this feature
+// has a split.
+//	gains: List of rank 1 tensors representing the gains for each of the feature's
+// split.
+//	thresholds: List of rank 1 tensors representing the thesholds for each of the
+// feature's split.
+//	left_node_contribs: List of rank 2 tensors with left leaf contribs for each of
+// the feature's splits. Will be added to the previous node values to constitute
+// the values of the left nodes.
+//	right_node_contribs: List of rank 2 tensors with right leaf contribs for each
+// of the feature's splits. Will be added to the previous node values to constitute
+// the values of the right nodes.
+//	max_depth: Max depth of the tree to build.
+//	learning_rate: shrinkage const for each new tree.
+//	pruning_mode: 0-No pruning, 1-Pre-pruning, 2-Post-pruning.
+//
+// Returns the created operation.
+func BoostedTreesUpdateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, feature_ids tf.Output, node_ids []tf.Output, gains []tf.Output, thresholds []tf.Output, left_node_contribs []tf.Output, right_node_contribs []tf.Output, max_depth tf.Output, learning_rate tf.Output, pruning_mode int64) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"pruning_mode": pruning_mode}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesUpdateEnsemble",
+		Input: []tf.Input{
+			tree_ensemble_handle, feature_ids, tf.OutputList(node_ids), tf.OutputList(gains), tf.OutputList(thresholds), tf.OutputList(left_node_contribs), tf.OutputList(right_node_contribs), max_depth, learning_rate,
+		},
+		Attrs: attrs,
+	}
 	return scope.AddOperation(opspec)
 }
 
@@ -3711,8 +3445,11 @@ func BoostedTreesQuantileStreamResourceDeserialize(scope *Scope, quantile_stream
 //	logits_dimension: scalar, dimension of the logits, to be used for partial logits
 // shape.
 //
-// Returns Rank 2 Tensor containing logits update (with respect to cached
-// values stored) for each example.Rank 1 Tensor containing new tree ids for each example.Rank 1 Tensor containing new node ids in the new tree_ids.
+// Returns:
+//	partial_logits: Rank 2 Tensor containing logits update (with respect to cached
+// values stored) for each example.
+//	tree_ids: Rank 1 Tensor containing new tree ids for each example.
+//	node_ids: Rank 1 Tensor containing new node ids in the new tree_ids.
 func BoostedTreesTrainingPredict(scope *Scope, tree_ensemble_handle tf.Output, cached_tree_ids tf.Output, cached_node_ids tf.Output, bucketized_features []tf.Output, logits_dimension int64) (partial_logits tf.Output, tree_ids tf.Output, node_ids tf.Output) {
 	if scope.Err() != nil {
 		return
@@ -3729,35 +3466,6 @@ func BoostedTreesTrainingPredict(scope *Scope, tree_ensemble_handle tf.Output, c
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Runs multiple additive regression ensemble predictors on input instances and
-//
-// computes the logits. It is designed to be used during prediction.
-// It traverses all the trees and calculates the final score for each instance.
-//
-// Arguments:
-//
-//	bucketized_features: A list of rank 1 Tensors containing bucket id for each
-// feature.
-//	logits_dimension: scalar, dimension of the logits, to be used for partial logits
-// shape.
-//
-// Returns Output rank 2 Tensor containing logits for each example.
-func BoostedTreesPredict(scope *Scope, tree_ensemble_handle tf.Output, bucketized_features []tf.Output, logits_dimension int64) (logits tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"logits_dimension": logits_dimension}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesPredict",
-		Input: []tf.Input{
-			tree_ensemble_handle, tf.OutputList(bucketized_features),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Aggregates the summary of accumulated stats for the batch.
 //
 // The summary stats contains gradients and hessians accumulated for each node, feature dimension id and bucket.
@@ -3817,41 +3525,22 @@ func BoostedTreesMakeStatsSummary(scope *Scope, node_ids tf.Output, gradients tf
 	return op.Output(0)
 }
 
-// Retrieves the tree ensemble resource stamp token, number of trees and growing statistics.
+// Deserializes a serialized tree ensemble config and replaces current tree
+//
+// ensemble.
 //
 // Arguments:
 //	tree_ensemble_handle: Handle to the tree ensemble.
-//
-// Returns Stamp token of the tree ensemble resource.The number of trees in the tree ensemble resource.The number of trees that were finished successfully.The number of layers we attempted to build (but not necessarily succeeded).Rank size 2 tensor that contains start and end ids of the nodes in the latest
-// layer.
-func BoostedTreesGetEnsembleStates(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, num_trees tf.Output, num_finalized_trees tf.Output, num_attempted_layers tf.Output, last_layer_nodes_range tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesGetEnsembleStates",
-		Input: []tf.Input{
-			tree_ensemble_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
-}
-
-// Creates a tree ensemble model and returns a handle to it.
-//
-// Arguments:
-//	tree_ensemble_handle: Handle to the tree ensemble resource to be created.
-//	stamp_token: Token to use as the initial value of the resource stamp.
-//	tree_ensemble_serialized: Serialized proto of the tree ensemble.
+//	stamp_token: Token to use as the new value of the resource stamp.
+//	tree_ensemble_serialized: Serialized proto of the ensemble.
 //
 // Returns the created operation.
-func BoostedTreesCreateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, stamp_token tf.Output, tree_ensemble_serialized tf.Output) (o *tf.Operation) {
+func BoostedTreesDeserializeEnsemble(scope *Scope, tree_ensemble_handle tf.Output, stamp_token tf.Output, tree_ensemble_serialized tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "BoostedTreesCreateEnsemble",
+		Type: "BoostedTreesDeserializeEnsemble",
 		Input: []tf.Input{
 			tree_ensemble_handle, stamp_token, tree_ensemble_serialized,
 		},
@@ -3859,6 +3548,191 @@ func BoostedTreesCreateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, st
 	return scope.AddOperation(opspec)
 }
 
+// Flush the quantile summaries from each quantile stream resource.
+//
+// An op that outputs a list of quantile summaries of a quantile stream resource.
+// Each summary Tensor is rank 2, containing summaries (value, weight, min_rank,
+// max_rank) for a single feature.
+//
+// Arguments:
+//	quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource.
+//
+func BoostedTreesFlushQuantileSummaries(scope *Scope, quantile_stream_resource_handle tf.Output, num_features int64) (summaries []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_features": num_features}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesFlushQuantileSummaries",
+		Input: []tf.Input{
+			quantile_stream_resource_handle,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if summaries, idx, err = makeOutputList(op, idx, "summaries"); err != nil {
+		scope.UpdateErr("BoostedTreesFlushQuantileSummaries", err)
+		return
+	}
+	return summaries
+}
+
+// BoostedTreesSparseCalculateBestFeatureSplitAttr is an optional argument to BoostedTreesSparseCalculateBestFeatureSplit.
+type BoostedTreesSparseCalculateBestFeatureSplitAttr func(optionalAttr)
+
+// BoostedTreesSparseCalculateBestFeatureSplitSplitType sets the optional split_type attribute to value.
+//
+// value: A string indicating if this Op should perform inequality split or equality split.
+// If not specified, defaults to "inequality"
+func BoostedTreesSparseCalculateBestFeatureSplitSplitType(value string) BoostedTreesSparseCalculateBestFeatureSplitAttr {
+	return func(m optionalAttr) {
+		m["split_type"] = value
+	}
+}
+
+// Calculates gains for each feature and returns the best possible split information for the feature.
+//
+// The split information is the best threshold (bucket id), gains and left/right node contributions per node for each feature.
+//
+// It is possible that not all nodes can be split on each feature. Hence, the list of possible nodes can differ between the features. Therefore, we return `node_ids_list` for each feature, containing the list of nodes that this feature can be used to split.
+//
+// In this manner, the output is the best split per features and per node, so that it needs to be combined later to produce the best split for each node (among all possible features).
+//
+// The output shapes are compatible in a way that the first dimension of all tensors are the same and equal to the number of possible split nodes for each feature.
+//
+// Arguments:
+//	node_id_range: A Rank 1 tensor (shape=[2]) to specify the range [first, last) of node ids to process within `stats_summary_list`. The nodes are iterated between the two nodes specified by the tensor, as like `for node_id in range(node_id_range[0], node_id_range[1])` (Note that the last index node_id_range[1] is exclusive).
+//	stats_summary_indices: A Rank 2 int64 tensor of dense shape [N, 4] (N specifies the number of non-zero values) for accumulated stats summary (gradient/hessian) per node per bucket for each feature. The second dimension contains node id, feature dimension, bucket id, and stats dim.
+// stats dim is the sum of logits dimension and hessian dimension, hessian dimension can either be logits dimension if diagonal hessian is used, or logits dimension^2 if full hessian is used.
+//	stats_summary_values: A Rank 1 float tensor of dense shape [N] (N specifies the number of non-zero values), which supplies the values for each element in summary_indices.
+//	stats_summary_shape: A Rank 1 float tensor of dense shape [4], which specifies the dense shape of the sparse tensor, which is [num tree nodes, feature dimensions, num buckets, stats dim].
+//	l1: l1 regularization factor on leaf weights, per instance based.
+//	l2: l2 regularization factor on leaf weights, per instance based.
+//	tree_complexity: adjustment to the gain, per leaf based.
+//	min_node_weight: mininum avg of hessians in a node before required for the node to be considered for splitting.
+//	logits_dimension: The dimension of logit, i.e., number of classes.
+//
+// Returns:
+//	node_ids: A Rank 1 tensor indicating possible node ids that can be split.
+//	gains: A Rank 1 tensor indicating the best gains to split each node.
+//	feature_dimensions: A Rank 1 tensor indicating the best feature dimension for each feature to split for each node.
+//	thresholds: A Rank 1 tensor indicating the bucket id to compare with (as a threshold) for split in each node.
+//	left_node_contribs: A Rank 2 tensor indicating the contribution of the left nodes when branching from parent nodes to the left direction by the given threshold for each feature.
+// This value will be used to make the left node value by adding to the parent node value. Second dimension size is logits dimension.
+//	right_node_contribs: A Rank 2 tensor, with the same shape/conditions as left_node_contribs_list, but just that the value is for the right node.
+//	split_with_default_directions: A Rank 1 tensor indicating which direction to go if data is missing.
+// Inequality with default left returns 0, inequality with default right returns 1, equality with default right returns 2.
+func BoostedTreesSparseCalculateBestFeatureSplit(scope *Scope, node_id_range tf.Output, stats_summary_indices tf.Output, stats_summary_values tf.Output, stats_summary_shape tf.Output, l1 tf.Output, l2 tf.Output, tree_complexity tf.Output, min_node_weight tf.Output, logits_dimension int64, optional ...BoostedTreesSparseCalculateBestFeatureSplitAttr) (node_ids tf.Output, gains tf.Output, feature_dimensions tf.Output, thresholds tf.Output, left_node_contribs tf.Output, right_node_contribs tf.Output, split_with_default_directions tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"logits_dimension": logits_dimension}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesSparseCalculateBestFeatureSplit",
+		Input: []tf.Input{
+			node_id_range, stats_summary_indices, stats_summary_values, stats_summary_shape, l1, l2, tree_complexity, min_node_weight,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6)
+}
+
+// Calculates gains for each feature and returns the best possible split information for the feature.
+//
+// The split information is the best threshold (bucket id), gains and left/right node contributions per node for each feature.
+//
+// It is possible that not all nodes can be split on each feature. Hence, the list of possible nodes can differ between the features. Therefore, we return `node_ids_list` for each feature, containing the list of nodes that this feature can be used to split.
+//
+// In this manner, the output is the best split per features and per node, so that it needs to be combined later to produce the best split for each node (among all possible features).
+//
+// The length of output lists are all of the same length, `num_features`.
+// The output shapes are compatible in a way that the first dimension of all tensors of all lists are the same and equal to the number of possible split nodes for each feature.
+//
+// Arguments:
+//	node_id_range: A Rank 1 tensor (shape=[2]) to specify the range [first, last) of node ids to process within `stats_summary_list`. The nodes are iterated between the two nodes specified by the tensor, as like `for node_id in range(node_id_range[0], node_id_range[1])` (Note that the last index node_id_range[1] is exclusive).
+//	stats_summary_list: A list of Rank 3 tensor (#shape=[max_splits, bucket, 2]) for accumulated stats summary (gradient/hessian) per node per buckets for each feature. The first dimension of the tensor is the maximum number of splits, and thus not all elements of it will be used, but only the indexes specified by node_ids will be used.
+//	l1: l1 regularization factor on leaf weights, per instance based.
+//	l2: l2 regularization factor on leaf weights, per instance based.
+//	tree_complexity: adjustment to the gain, per leaf based.
+//	min_node_weight: mininum avg of hessians in a node before required for the node to be considered for splitting.
+//	max_splits: the number of nodes that can be split in the whole tree. Used as a dimension of output tensors.
+//
+// Returns:
+//	node_ids_list: An output list of Rank 1 tensors indicating possible split node ids for each feature. The length of the list is num_features, but each tensor has different size as each feature provides different possible nodes. See above for details like shapes and sizes.
+//	gains_list: An output list of Rank 1 tensors indicating the best gains for each feature to split for certain nodes. See above for details like shapes and sizes.
+//	thresholds_list: An output list of Rank 1 tensors indicating the bucket id to compare with (as a threshold) for split in each node. See above for details like shapes and sizes.
+//	left_node_contribs_list: A list of Rank 2 tensors indicating the contribution of the left nodes when branching from parent nodes (given by the tensor element in the output node_ids_list) to the left direction by the given threshold for each feature. This value will be used to make the left node value by adding to the parent node value. Second dimension size is 1 for 1-dimensional logits, but would be larger for multi-class problems. See above for details like shapes and sizes.
+//	right_node_contribs_list: A list of Rank 2 tensors, with the same shape/conditions as left_node_contribs_list, but just that the value is for the right node.
+func BoostedTreesCalculateBestGainsPerFeature(scope *Scope, node_id_range tf.Output, stats_summary_list []tf.Output, l1 tf.Output, l2 tf.Output, tree_complexity tf.Output, min_node_weight tf.Output, max_splits int64) (node_ids_list []tf.Output, gains_list []tf.Output, thresholds_list []tf.Output, left_node_contribs_list []tf.Output, right_node_contribs_list []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"max_splits": max_splits}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesCalculateBestGainsPerFeature",
+		Input: []tf.Input{
+			node_id_range, tf.OutputList(stats_summary_list), l1, l2, tree_complexity, min_node_weight,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if node_ids_list, idx, err = makeOutputList(op, idx, "node_ids_list"); err != nil {
+		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
+		return
+	}
+	if gains_list, idx, err = makeOutputList(op, idx, "gains_list"); err != nil {
+		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
+		return
+	}
+	if thresholds_list, idx, err = makeOutputList(op, idx, "thresholds_list"); err != nil {
+		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
+		return
+	}
+	if left_node_contribs_list, idx, err = makeOutputList(op, idx, "left_node_contribs_list"); err != nil {
+		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
+		return
+	}
+	if right_node_contribs_list, idx, err = makeOutputList(op, idx, "right_node_contribs_list"); err != nil {
+		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
+		return
+	}
+	return node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list
+}
+
+// Checks whether a tree ensemble has been initialized.
+//
+// Arguments:
+//	tree_ensemble_handle: Handle to the tree ensemble resouce.
+//
+// Returns output boolean on whether it is initialized or not.
+func IsBoostedTreesEnsembleInitialized(scope *Scope, tree_ensemble_handle tf.Output) (is_initialized tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "IsBoostedTreesEnsembleInitialized",
+		Input: []tf.Input{
+			tree_ensemble_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // BoostedTreesEnsembleResourceHandleOpAttr is an optional argument to BoostedTreesEnsembleResourceHandleOp.
 type BoostedTreesEnsembleResourceHandleOpAttr func(optionalAttr)
 
@@ -3896,48 +3770,24 @@ func BoostedTreesEnsembleResourceHandleOp(scope *Scope, optional ...BoostedTrees
 	return op.Output(0)
 }
 
-// Output the logits for the given input data
+// Deserializes a proto into the tree handle
 //
 // Arguments:
-//	tree_handle: Handle to the tree resource.
-//	dense_features: Rank 2 dense features tensor.
-//	logits_dimension: Scalar, dimension of the logits.
+//	tree_handle: Handle to the tree resource to be restored.
+//	tree_config: Serialied proto string of the boosted_trees.Tree proto.
 //
-// Returns The logits predictions from the tree for each instance in the batch.
-func TensorForestTreePredict(scope *Scope, tree_handle tf.Output, dense_features tf.Output, logits_dimension int64) (logits tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"logits_dimension": logits_dimension}
-	opspec := tf.OpSpec{
-		Type: "TensorForestTreePredict",
-		Input: []tf.Input{
-			tree_handle, dense_features,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Get the number of nodes in a tree
-//
-// Arguments:
-//	tree_handle: Handle to the tree resource.
-//
-// Returns The size of the tree.
-func TensorForestTreeSize(scope *Scope, tree_handle tf.Output) (tree_size tf.Output) {
+// Returns the created operation.
+func TensorForestTreeDeserialize(scope *Scope, tree_handle tf.Output, tree_config tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorForestTreeSize",
+		Type: "TensorForestTreeDeserialize",
 		Input: []tf.Input{
-			tree_handle,
+			tree_handle, tree_config,
 		},
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
 // Serializes the tree handle to a proto
@@ -3960,43 +3810,6 @@ func TensorForestTreeSerialize(scope *Scope, tree_handle tf.Output) (tree_config
 	return op.Output(0)
 }
 
-// BoostedTreesQuantileStreamResourceHandleOpAttr is an optional argument to BoostedTreesQuantileStreamResourceHandleOp.
-type BoostedTreesQuantileStreamResourceHandleOpAttr func(optionalAttr)
-
-// BoostedTreesQuantileStreamResourceHandleOpContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func BoostedTreesQuantileStreamResourceHandleOpContainer(value string) BoostedTreesQuantileStreamResourceHandleOpAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// BoostedTreesQuantileStreamResourceHandleOpSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func BoostedTreesQuantileStreamResourceHandleOpSharedName(value string) BoostedTreesQuantileStreamResourceHandleOpAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Creates a handle to a BoostedTreesQuantileStreamResource.
-func BoostedTreesQuantileStreamResourceHandleOp(scope *Scope, optional ...BoostedTreesQuantileStreamResourceHandleOpAttr) (resource tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesQuantileStreamResourceHandleOp",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Creates a tree resource and returns a handle to it.
 //
 // Arguments:
@@ -4017,29 +3830,6 @@ func TensorForestCreateTreeVariable(scope *Scope, tree_handle tf.Output, tree_co
 	return scope.AddOperation(opspec)
 }
 
-// Returns immutable tensor from memory region.
-//
-// The current implementation memmaps the tensor from a file.
-//
-// Arguments:
-//	dtype: Type of the returned tensor.
-//	shape: Shape of the returned tensor.
-//	memory_region_name: Name of readonly memory region used by the tensor, see
-// NewReadOnlyMemoryRegionFromFile in tensorflow::Env.
-func ImmutableConst(scope *Scope, dtype tf.DataType, shape tf.Shape, memory_region_name string) (tensor tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype, "shape": shape, "memory_region_name": memory_region_name}
-	opspec := tf.OpSpec{
-		Type: "ImmutableConst",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Checks whether a tree has been initialized.
 //
 // Arguments:
@@ -4060,6 +3850,43 @@ func TensorForestTreeIsInitializedOp(scope *Scope, tree_handle tf.Output) (is_in
 	return op.Output(0)
 }
 
+// TensorForestTreeResourceHandleOpAttr is an optional argument to TensorForestTreeResourceHandleOp.
+type TensorForestTreeResourceHandleOpAttr func(optionalAttr)
+
+// TensorForestTreeResourceHandleOpContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func TensorForestTreeResourceHandleOpContainer(value string) TensorForestTreeResourceHandleOpAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// TensorForestTreeResourceHandleOpSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func TensorForestTreeResourceHandleOpSharedName(value string) TensorForestTreeResourceHandleOpAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Creates a handle to a TensorForestTreeResource
+func TensorForestTreeResourceHandleOp(scope *Scope, optional ...TensorForestTreeResourceHandleOpAttr) (resource tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorForestTreeResourceHandleOp",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // AllCandidateSamplerAttr is an optional argument to AllCandidateSampler.
 type AllCandidateSamplerAttr func(optionalAttr)
 
@@ -4106,10 +3933,13 @@ func AllCandidateSamplerSeed2(value int64) AllCandidateSamplerAttr {
 // candidates in a batch are unique. This requires some approximation to
 // estimate the post-rejection sampling probabilities.
 //
-// Returns A vector of length num_sampled, in which each element is
-// the ID of a sampled candidate.A batch_size * num_true matrix, representing
+// Returns:
+//	sampled_candidates: A vector of length num_sampled, in which each element is
+// the ID of a sampled candidate.
+//	true_expected_count: A batch_size * num_true matrix, representing
 // the number of times each candidate is expected to occur in a batch
-// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
+// of sampled candidates. If unique=true, then this is a probability.
+//	sampled_expected_count: A vector of length num_sampled, for each sampled
 // candidate representing the number of times the candidate is expected
 // to occur in a batch of sampled candidates.  If unique=true, then this is a
 // probability.
@@ -4132,6 +3962,394 @@ func AllCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, n
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
+// FixedUnigramCandidateSamplerAttr is an optional argument to FixedUnigramCandidateSampler.
+type FixedUnigramCandidateSamplerAttr func(optionalAttr)
+
+// FixedUnigramCandidateSamplerVocabFile sets the optional vocab_file attribute to value.
+//
+// value: Each valid line in this file (which should have a CSV-like format)
+// corresponds to a valid word ID. IDs are in sequential order, starting from
+// num_reserved_ids. The last entry in each line is expected to be a value
+// corresponding to the count or relative probability. Exactly one of vocab_file
+// and unigrams needs to be passed to this op.
+// If not specified, defaults to ""
+func FixedUnigramCandidateSamplerVocabFile(value string) FixedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["vocab_file"] = value
+	}
+}
+
+// FixedUnigramCandidateSamplerDistortion sets the optional distortion attribute to value.
+//
+// value: The distortion is used to skew the unigram probability distribution.
+// Each weight is first raised to the distortion's power before adding to the
+// internal unigram distribution. As a result, distortion = 1.0 gives regular
+// unigram sampling (as defined by the vocab file), and distortion = 0.0 gives
+// a uniform distribution.
+// If not specified, defaults to 1
+func FixedUnigramCandidateSamplerDistortion(value float32) FixedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["distortion"] = value
+	}
+}
+
+// FixedUnigramCandidateSamplerNumReservedIds sets the optional num_reserved_ids attribute to value.
+//
+// value: Optionally some reserved IDs can be added in the range [0,
+// ..., num_reserved_ids) by the users. One use case is that a special unknown
+// word token is used as ID 0. These IDs will have a sampling probability of 0.
+// If not specified, defaults to 0
+func FixedUnigramCandidateSamplerNumReservedIds(value int64) FixedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["num_reserved_ids"] = value
+	}
+}
+
+// FixedUnigramCandidateSamplerNumShards sets the optional num_shards attribute to value.
+//
+// value: A sampler can be used to sample from a subset of the original range
+// in order to speed up the whole computation through parallelism. This parameter
+// (together with 'shard') indicates the number of partitions that are being
+// used in the overall computation.
+// If not specified, defaults to 1
+//
+// REQUIRES: value >= 1
+func FixedUnigramCandidateSamplerNumShards(value int64) FixedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["num_shards"] = value
+	}
+}
+
+// FixedUnigramCandidateSamplerShard sets the optional shard attribute to value.
+//
+// value: A sampler can be used to sample from a subset of the original range
+// in order to speed up the whole computation through parallelism. This parameter
+// (together with 'num_shards') indicates the particular partition number of a
+// sampler op, when partitioning is being used.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func FixedUnigramCandidateSamplerShard(value int64) FixedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["shard"] = value
+	}
+}
+
+// FixedUnigramCandidateSamplerUnigrams sets the optional unigrams attribute to value.
+//
+// value: A list of unigram counts or probabilities, one per ID in sequential
+// order. Exactly one of vocab_file and unigrams should be passed to this op.
+// If not specified, defaults to {}
+func FixedUnigramCandidateSamplerUnigrams(value []float32) FixedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["unigrams"] = value
+	}
+}
+
+// FixedUnigramCandidateSamplerSeed sets the optional seed attribute to value.
+//
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func FixedUnigramCandidateSamplerSeed(value int64) FixedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// FixedUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func FixedUnigramCandidateSamplerSeed2(value int64) FixedUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Generates labels for candidate sampling with a learned unigram distribution.
+//
+// A unigram sampler could use a fixed unigram distribution read from a
+// file or passed in as an in-memory array instead of building up the distribution
+// from data on the fly. There is also an option to skew the distribution by
+// applying a distortion power to the weights.
+//
+// The vocabulary file should be in CSV-like format, with the last field
+// being the weight associated with the word.
+//
+// For each batch, this op picks a single set of sampled candidate labels.
+//
+// The advantages of sampling candidates per-batch are simplicity and the
+// possibility of efficient dense matrix multiplication. The disadvantage is that
+// the sampled candidates must be chosen independently of the context and of the
+// true labels.
+//
+// Arguments:
+//	true_classes: A batch_size * num_true matrix, in which each row contains the
+// IDs of the num_true target_classes in the corresponding original label.
+//	num_true: Number of true labels per context.
+//	num_sampled: Number of candidates to randomly sample.
+//	unique: If unique is true, we sample with rejection, so that all sampled
+// candidates in a batch are unique. This requires some approximation to
+// estimate the post-rejection sampling probabilities.
+//	range_max: The sampler will sample integers from the interval [0, range_max).
+//
+// Returns:
+//	sampled_candidates: A vector of length num_sampled, in which each element is
+// the ID of a sampled candidate.
+//	true_expected_count: A batch_size * num_true matrix, representing
+// the number of times each candidate is expected to occur in a batch
+// of sampled candidates. If unique=true, then this is a probability.
+//	sampled_expected_count: A vector of length num_sampled, for each sampled
+// candidate representing the number of times the candidate is expected
+// to occur in a batch of sampled candidates.  If unique=true, then this is a
+// probability.
+func FixedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...FixedUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "FixedUnigramCandidateSampler",
+		Input: []tf.Input{
+			true_classes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// ThreadUnsafeUnigramCandidateSamplerAttr is an optional argument to ThreadUnsafeUnigramCandidateSampler.
+type ThreadUnsafeUnigramCandidateSamplerAttr func(optionalAttr)
+
+// ThreadUnsafeUnigramCandidateSamplerSeed sets the optional seed attribute to value.
+//
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func ThreadUnsafeUnigramCandidateSamplerSeed(value int64) ThreadUnsafeUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// ThreadUnsafeUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func ThreadUnsafeUnigramCandidateSamplerSeed2(value int64) ThreadUnsafeUnigramCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Generates labels for candidate sampling with a learned unigram distribution.
+//
+// See explanations of candidate sampling and the data formats at
+// go/candidate-sampling.
+//
+// For each batch, this op picks a single set of sampled candidate labels.
+//
+// The advantages of sampling candidates per-batch are simplicity and the
+// possibility of efficient dense matrix multiplication. The disadvantage is that
+// the sampled candidates must be chosen independently of the context and of the
+// true labels.
+//
+// Arguments:
+//	true_classes: A batch_size * num_true matrix, in which each row contains the
+// IDs of the num_true target_classes in the corresponding original label.
+//	num_true: Number of true labels per context.
+//	num_sampled: Number of candidates to randomly sample.
+//	unique: If unique is true, we sample with rejection, so that all sampled
+// candidates in a batch are unique. This requires some approximation to
+// estimate the post-rejection sampling probabilities.
+//	range_max: The sampler will sample integers from the interval [0, range_max).
+//
+// Returns:
+//	sampled_candidates: A vector of length num_sampled, in which each element is
+// the ID of a sampled candidate.
+//	true_expected_count: A batch_size * num_true matrix, representing
+// the number of times each candidate is expected to occur in a batch
+// of sampled candidates. If unique=true, then this is a probability.
+//	sampled_expected_count: A vector of length num_sampled, for each sampled
+// candidate representing the number of times the candidate is expected
+// to occur in a batch of sampled candidates.  If unique=true, then this is a
+// probability.
+func ThreadUnsafeUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...ThreadUnsafeUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ThreadUnsafeUnigramCandidateSampler",
+		Input: []tf.Input{
+			true_classes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// MatrixDiagPartV3Attr is an optional argument to MatrixDiagPartV3.
+type MatrixDiagPartV3Attr func(optionalAttr)
+
+// MatrixDiagPartV3Align sets the optional align attribute to value.
+//
+// value: Some diagonals are shorter than `max_diag_len` and need to be padded. `align` is
+// a string specifying how superdiagonals and subdiagonals should be aligned,
+// respectively. There are four possible alignments: "RIGHT_LEFT" (default),
+// "LEFT_RIGHT", "LEFT_LEFT", and "RIGHT_RIGHT". "RIGHT_LEFT" aligns superdiagonals
+// to the right (left-pads the row) and subdiagonals to the left (right-pads the
+// row). It is the packing format LAPACK uses. cuSPARSE uses "LEFT_RIGHT", which is
+// the opposite alignment.
+// If not specified, defaults to "RIGHT_LEFT"
+func MatrixDiagPartV3Align(value string) MatrixDiagPartV3Attr {
+	return func(m optionalAttr) {
+		m["align"] = value
+	}
+}
+
+// Returns the batched diagonal part of a batched tensor.
+//
+// Returns a tensor with the `k[0]`-th to `k[1]`-th diagonals of the batched
+// `input`.
+//
+// Assume `input` has `r` dimensions `[I, J, ..., L, M, N]`.
+// Let `max_diag_len` be the maximum length among all diagonals to be extracted,
+// `max_diag_len = min(M + min(k[1], 0), N + min(-k[0], 0))`
+// Let `num_diags` be the number of diagonals to extract,
+// `num_diags = k[1] - k[0] + 1`.
+//
+// If `num_diags == 1`, the output tensor is of rank `r - 1` with shape
+// `[I, J, ..., L, max_diag_len]` and values:
+//
+// ```
+// diagonal[i, j, ..., l, n]
+//   = input[i, j, ..., l, n+y, n+x] ; if 0 <= n+y < M and 0 <= n+x < N,
+//     padding_value                 ; otherwise.
+// ```
+// where `y = max(-k[1], 0)`, `x = max(k[1], 0)`.
+//
+// Otherwise, the output tensor has rank `r` with dimensions
+// `[I, J, ..., L, num_diags, max_diag_len]` with values:
+//
+// ```
+// diagonal[i, j, ..., l, m, n]
+//   = input[i, j, ..., l, n+y, n+x] ; if 0 <= n+y < M and 0 <= n+x < N,
+//     padding_value                 ; otherwise.
+// ```
+// where `d = k[1] - m`, `y = max(-d, 0) - offset`, and `x = max(d, 0) - offset`.
+//
+// `offset` is zero except when the alignment of the diagonal is to the right.
+// ```
+// offset = max_diag_len - diag_len(d) ; if (`align` in {RIGHT_LEFT, RIGHT_RIGHT}
+//                                            and `d >= 0`) or
+//                                          (`align` in {LEFT_RIGHT, RIGHT_RIGHT}
+//                                            and `d <= 0`)
+//          0                          ; otherwise
+// ```
+// where `diag_len(d) = min(cols - max(d, 0), rows + min(d, 0))`.
+//
+// The input must be at least a matrix.
+//
+// For example:
+//
+// ```
+// input = np.array([[[1, 2, 3, 4],  # Input shape: (2, 3, 4)
+//                    [5, 6, 7, 8],
+//                    [9, 8, 7, 6]],
+//                   [[5, 4, 3, 2],
+//                    [1, 2, 3, 4],
+//                    [5, 6, 7, 8]]])
+//
+// # A main diagonal from each batch.
+// tf.matrix_diag_part(input) ==> [[1, 6, 7],  # Output shape: (2, 3)
+//                                 [5, 2, 7]]
+//
+// # A superdiagonal from each batch.
+// tf.matrix_diag_part(input, k = 1)
+//   ==> [[2, 7, 6],  # Output shape: (2, 3)
+//        [4, 3, 8]]
+//
+// # A band from each batch.
+// tf.matrix_diag_part(input, k = (-1, 2))
+//   ==> [[[0, 3, 8],  # Output shape: (2, 4, 3)
+//         [2, 7, 6],
+//         [1, 6, 7],
+//         [5, 8, 0]],
+//        [[0, 3, 4],
+//         [4, 3, 8],
+//         [5, 2, 7],
+//         [1, 6, 0]]]
+//
+// # LEFT_RIGHT alignment.
+// tf.matrix_diag_part(input, k = (-1, 2), align="LEFT_RIGHT")
+//   ==> [[[3, 8, 0],  # Output shape: (2, 4, 3)
+//         [2, 7, 6],
+//         [1, 6, 7],
+//         [0, 5, 8]],
+//        [[3, 4, 0],
+//         [4, 3, 8],
+//         [5, 2, 7],
+//         [0, 1, 6]]]
+//
+// # max_diag_len can be shorter than the main diagonal.
+// tf.matrix_diag_part(input, k = (-2, -1))
+//   ==> [[[5, 8],
+//         [9, 0]],
+//        [[1, 6],
+//         [5, 0]]]
+//
+// # padding_value = 9
+// tf.matrix_diag_part(input, k = (1, 3), padding_value = 9)
+//   ==> [[[9, 9, 4],  # Output shape: (2, 3, 3)
+//         [9, 3, 8],
+//         [2, 7, 6]],
+//        [[9, 9, 2],
+//         [9, 3, 4],
+//         [4, 3, 8]]]
+//
+// ```
+//
+// Arguments:
+//	input: Rank `r` tensor where `r >= 2`.
+//	k: Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main
+// diagonal, and negative value means subdiagonals. `k` can be a single integer
+// (for a single diagonal) or a pair of integers specifying the low and high ends
+// of a matrix band. `k[0]` must not be larger than `k[1]`.
+//	padding_value: The value to fill the area outside the specified diagonal band with.
+// Default is 0.
+//
+// Returns The extracted diagonal(s).
+func MatrixDiagPartV3(scope *Scope, input tf.Output, k tf.Output, padding_value tf.Output, optional ...MatrixDiagPartV3Attr) (diagonal tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixDiagPartV3",
+		Input: []tf.Input{
+			input, k, padding_value,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // LearnedUnigramCandidateSamplerAttr is an optional argument to LearnedUnigramCandidateSampler.
 type LearnedUnigramCandidateSamplerAttr func(optionalAttr)
 
@@ -4179,10 +4397,13 @@ func LearnedUnigramCandidateSamplerSeed2(value int64) LearnedUnigramCandidateSam
 // estimate the post-rejection sampling probabilities.
 //	range_max: The sampler will sample integers from the interval [0, range_max).
 //
-// Returns A vector of length num_sampled, in which each element is
-// the ID of a sampled candidate.A batch_size * num_true matrix, representing
+// Returns:
+//	sampled_candidates: A vector of length num_sampled, in which each element is
+// the ID of a sampled candidate.
+//	true_expected_count: A batch_size * num_true matrix, representing
 // the number of times each candidate is expected to occur in a batch
-// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
+// of sampled candidates. If unique=true, then this is a probability.
+//	sampled_expected_count: A vector of length num_sampled, for each sampled
 // candidate representing the number of times the candidate is expected
 // to occur in a batch of sampled candidates.  If unique=true, then this is a
 // probability.
@@ -4205,6 +4426,82 @@ func LearnedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_tr
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
+// LogUniformCandidateSamplerAttr is an optional argument to LogUniformCandidateSampler.
+type LogUniformCandidateSamplerAttr func(optionalAttr)
+
+// LogUniformCandidateSamplerSeed sets the optional seed attribute to value.
+//
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func LogUniformCandidateSamplerSeed(value int64) LogUniformCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// LogUniformCandidateSamplerSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func LogUniformCandidateSamplerSeed2(value int64) LogUniformCandidateSamplerAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Generates labels for candidate sampling with a log-uniform distribution.
+//
+// See explanations of candidate sampling and the data formats at
+// go/candidate-sampling.
+//
+// For each batch, this op picks a single set of sampled candidate labels.
+//
+// The advantages of sampling candidates per-batch are simplicity and the
+// possibility of efficient dense matrix multiplication. The disadvantage is that
+// the sampled candidates must be chosen independently of the context and of the
+// true labels.
+//
+// Arguments:
+//	true_classes: A batch_size * num_true matrix, in which each row contains the
+// IDs of the num_true target_classes in the corresponding original label.
+//	num_true: Number of true labels per context.
+//	num_sampled: Number of candidates to randomly sample.
+//	unique: If unique is true, we sample with rejection, so that all sampled
+// candidates in a batch are unique. This requires some approximation to
+// estimate the post-rejection sampling probabilities.
+//	range_max: The sampler will sample integers from the interval [0, range_max).
+//
+// Returns:
+//	sampled_candidates: A vector of length num_sampled, in which each element is
+// the ID of a sampled candidate.
+//	true_expected_count: A batch_size * num_true matrix, representing
+// the number of times each candidate is expected to occur in a batch
+// of sampled candidates. If unique=true, then this is a probability.
+//	sampled_expected_count: A vector of length num_sampled, for each sampled
+// candidate representing the number of times the candidate is expected
+// to occur in a batch of sampled candidates.  If unique=true, then this is a
+// probability.
+func LogUniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LogUniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LogUniformCandidateSampler",
+		Input: []tf.Input{
+			true_classes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
 // Selects the k nearest centers for each point.
 //
 // Rows of points are assumed to be input points. Rows of centers are assumed to be
@@ -4217,8 +4514,10 @@ func LearnedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_tr
 //	k: Number of nearest centers to return for each point. If k is larger than m, then
 // only m centers are returned.
 //
-// Returns Matrix of shape (n, min(m, k)). Each row contains the indices of the centers
-// closest to the corresponding point, ordered by increasing distance.Matrix of shape (n, min(m, k)). Each row contains the squared L2 distance to the
+// Returns:
+//	nearest_center_indices: Matrix of shape (n, min(m, k)). Each row contains the indices of the centers
+// closest to the corresponding point, ordered by increasing distance.
+//	nearest_center_distances: Matrix of shape (n, min(m, k)). Each row contains the squared L2 distance to the
 // corresponding center in nearest_center_indices.
 func NearestNeighbors(scope *Scope, points tf.Output, centers tf.Output, k tf.Output) (nearest_center_indices tf.Output, nearest_center_distances tf.Output) {
 	if scope.Err() != nil {
@@ -4234,57 +4533,145 @@ func NearestNeighbors(scope *Scope, points tf.Output, centers tf.Output, k tf.Ou
 	return op.Output(0), op.Output(1)
 }
 
-// Broadcasts a tensor value to one or more other devices.
-func CollectiveBcastSend(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, shape tf.Shape) (data tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "shape": shape}
-	opspec := tf.OpSpec{
-		Type: "CollectiveBcastSend",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Mutually accumulates multiple tensors of identical type and shape.
-func CollectiveGather(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, shape tf.Shape) (data tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "shape": shape}
-	opspec := tf.OpSpec{
-		Type: "CollectiveGather",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Forwards the input to the output.
+// Returns the index of a data point that should be added to the seed set.
 //
-// This operator represents the loop termination condition used by the
-// "pivot" switches of a loop.
+// Entries in distances are assumed to be squared distances of candidate points to
+// the already sampled centers in the seed set. The op constructs one Markov chain
+// of the k-MC^2 algorithm and returns the index of one candidate point to be added
+// as an additional cluster center.
 //
 // Arguments:
-//	input: A boolean scalar, representing the branch predicate of the Switch op.
+//	distances: Vector with squared distances to the closest previously sampled cluster center
+// for each candidate point.
+//	seed: Scalar. Seed for initializing the random number generator.
 //
-// Returns The same tensor as `input`.
-func LoopCond(scope *Scope, input tf.Output) (output tf.Output) {
+// Returns Scalar with the index of the sampled point.
+func KMC2ChainInitialization(scope *Scope, distances tf.Output, seed tf.Output) (index tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "LoopCond",
+		Type: "KMC2ChainInitialization",
 		Input: []tf.Input{
-			input,
+			distances, seed,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Selects num_to_sample rows of input using the KMeans++ criterion.
+//
+// Rows of points are assumed to be input points. One row is selected at random.
+// Subsequent rows are sampled with probability proportional to the squared L2
+// distance from the nearest row selected thus far till num_to_sample rows have
+// been sampled.
+//
+// Arguments:
+//	points: Matrix of shape (n, d). Rows are assumed to be input points.
+//	num_to_sample: Scalar. The number of rows to sample. This value must not be larger than n.
+//	seed: Scalar. Seed for initializing the random number generator.
+//	num_retries_per_sample: Scalar. For each row that is sampled, this parameter
+// specifies the number of additional points to draw from the current
+// distribution before selecting the best. If a negative value is specified, a
+// heuristic is used to sample O(log(num_to_sample)) additional points.
+//
+// Returns Matrix of shape (num_to_sample, d). The sampled rows.
+func KmeansPlusPlusInitialization(scope *Scope, points tf.Output, num_to_sample tf.Output, seed tf.Output, num_retries_per_sample tf.Output) (samples tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "KmeansPlusPlusInitialization",
+		Input: []tf.Input{
+			points, num_to_sample, seed, num_retries_per_sample,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// AbortAttr is an optional argument to Abort.
+type AbortAttr func(optionalAttr)
+
+// AbortErrorMsg sets the optional error_msg attribute to value.
+//
+// value: A string which is the message associated with the exception.
+// If not specified, defaults to ""
+func AbortErrorMsg(value string) AbortAttr {
+	return func(m optionalAttr) {
+		m["error_msg"] = value
+	}
+}
+
+// AbortExitWithoutError sets the optional exit_without_error attribute to value.
+// If not specified, defaults to false
+func AbortExitWithoutError(value bool) AbortAttr {
+	return func(m optionalAttr) {
+		m["exit_without_error"] = value
+	}
+}
+
+// Raise a exception to abort the process when called.
+//
+// If exit_without_error is true, the process will exit normally,
+// otherwise it will exit with a SIGABORT signal.
+//
+// Returns nothing but an exception.
+//
+// Returns the created operation.
+func Abort(scope *Scope, optional ...AbortAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Abort",
+
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Makes its input available to the next iteration.
+//
+// Arguments:
+//	data: The tensor to be made available to the next iteration.
+//
+// Returns The same tensor as `data`.
+func NextIteration(scope *Scope, data tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "NextIteration",
+		Input: []tf.Input{
+			data,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Exits the current frame to its parent frame.
+//
+// Exit makes its input `data` available to the parent frame.
+//
+// Arguments:
+//	data: The tensor to be made available to the parent frame.
+//
+// Returns The same tensor as `data`.
+func Exit(scope *Scope, data tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Exit",
+		Input: []tf.Input{
+			data,
 		},
 	}
 	op := scope.AddOperation(opspec)
@@ -4346,124 +4733,121 @@ func Enter(scope *Scope, data tf.Output, frame_name string, optional ...EnterAtt
 	return op.Output(0)
 }
 
-// Forwards `data` to the output port determined by `pred`.
+// CTCBeamSearchDecoderAttr is an optional argument to CTCBeamSearchDecoder.
+type CTCBeamSearchDecoderAttr func(optionalAttr)
+
+// CTCBeamSearchDecoderMergeRepeated sets the optional merge_repeated attribute to value.
 //
-// If `pred` is true, the `data` input is forwarded to `output_true`. Otherwise,
-// the data goes to `output_false`.
+// value: If true, merge repeated classes in output.
+// If not specified, defaults to true
+func CTCBeamSearchDecoderMergeRepeated(value bool) CTCBeamSearchDecoderAttr {
+	return func(m optionalAttr) {
+		m["merge_repeated"] = value
+	}
+}
+
+// Performs beam search decoding on the logits given in input.
 //
-// See also `RefSwitch` and `Merge`.
+// A note about the attribute merge_repeated: For the beam search decoder,
+// this means that if consecutive entries in a beam are the same, only
+// the first of these is emitted.  That is, when the top path is "A B B B B",
+// "A B" is returned if merge_repeated = True but "A B B B B" is
+// returned if merge_repeated = False.
 //
 // Arguments:
-//	data: The tensor to be forwarded to the appropriate output.
-//	pred: A scalar that specifies which output port will receive data.
+//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
+//	sequence_length: A vector containing sequence lengths, size `(batch)`.
+//	beam_width: A scalar >= 0 (beam search beam width).
+//	top_paths: A scalar >= 0, <= beam_width (controls output size).
 //
-// Returns If `pred` is false, data will be forwarded to this output.If `pred` is true, data will be forwarded to this output.
-func Switch(scope *Scope, data tf.Output, pred tf.Output) (output_false tf.Output, output_true tf.Output) {
+// Returns:
+//	decoded_indices: A list (length: top_paths) of indices matrices.  Matrix j,
+// size `(total_decoded_outputs[j] x 2)`, has indices of a
+// `SparseTensor<int64, 2>`.  The rows store: [batch, time].
+//	decoded_values: A list (length: top_paths) of values vectors.  Vector j,
+// size `(length total_decoded_outputs[j])`, has the values of a
+// `SparseTensor<int64, 2>`.  The vector stores the decoded classes for beam j.
+//	decoded_shape: A list (length: top_paths) of shape vector.  Vector j,
+// size `(2)`, stores the shape of the decoded `SparseTensor[j]`.
+// Its values are: `[batch_size, max_decoded_length[j]]`.
+//	log_probability: A matrix, shaped: `(batch_size x top_paths)`.  The
+// sequence log-probabilities.
+func CTCBeamSearchDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, beam_width int64, top_paths int64, optional ...CTCBeamSearchDecoderAttr) (decoded_indices []tf.Output, decoded_values []tf.Output, decoded_shape []tf.Output, log_probability tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"beam_width": beam_width, "top_paths": top_paths}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Switch",
+		Type: "CTCBeamSearchDecoder",
 		Input: []tf.Input{
-			data, pred,
+			inputs, sequence_length,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// CudnnRNNCanonicalToParamsV2Attr is an optional argument to CudnnRNNCanonicalToParamsV2.
-type CudnnRNNCanonicalToParamsV2Attr func(optionalAttr)
-
-// CudnnRNNCanonicalToParamsV2RnnMode sets the optional rnn_mode attribute to value.
-// If not specified, defaults to "lstm"
-func CudnnRNNCanonicalToParamsV2RnnMode(value string) CudnnRNNCanonicalToParamsV2Attr {
-	return func(m optionalAttr) {
-		m["rnn_mode"] = value
+	if scope.Err() != nil {
+		return
 	}
-}
-
-// CudnnRNNCanonicalToParamsV2InputMode sets the optional input_mode attribute to value.
-// If not specified, defaults to "linear_input"
-func CudnnRNNCanonicalToParamsV2InputMode(value string) CudnnRNNCanonicalToParamsV2Attr {
-	return func(m optionalAttr) {
-		m["input_mode"] = value
+	var idx int
+	var err error
+	if decoded_indices, idx, err = makeOutputList(op, idx, "decoded_indices"); err != nil {
+		scope.UpdateErr("CTCBeamSearchDecoder", err)
+		return
 	}
-}
-
-// CudnnRNNCanonicalToParamsV2Direction sets the optional direction attribute to value.
-// If not specified, defaults to "unidirectional"
-func CudnnRNNCanonicalToParamsV2Direction(value string) CudnnRNNCanonicalToParamsV2Attr {
-	return func(m optionalAttr) {
-		m["direction"] = value
+	if decoded_values, idx, err = makeOutputList(op, idx, "decoded_values"); err != nil {
+		scope.UpdateErr("CTCBeamSearchDecoder", err)
+		return
 	}
-}
-
-// CudnnRNNCanonicalToParamsV2Dropout sets the optional dropout attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNCanonicalToParamsV2Dropout(value float32) CudnnRNNCanonicalToParamsV2Attr {
-	return func(m optionalAttr) {
-		m["dropout"] = value
+	if decoded_shape, idx, err = makeOutputList(op, idx, "decoded_shape"); err != nil {
+		scope.UpdateErr("CTCBeamSearchDecoder", err)
+		return
 	}
+	log_probability = op.Output(idx)
+	return decoded_indices, decoded_values, decoded_shape, log_probability
 }
 
-// CudnnRNNCanonicalToParamsV2Seed sets the optional seed attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNCanonicalToParamsV2Seed(value int64) CudnnRNNCanonicalToParamsV2Attr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
+// CTCGreedyDecoderAttr is an optional argument to CTCGreedyDecoder.
+type CTCGreedyDecoderAttr func(optionalAttr)
 
-// CudnnRNNCanonicalToParamsV2Seed2 sets the optional seed2 attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNCanonicalToParamsV2Seed2(value int64) CudnnRNNCanonicalToParamsV2Attr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// CudnnRNNCanonicalToParamsV2NumProj sets the optional num_proj attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNCanonicalToParamsV2NumProj(value int64) CudnnRNNCanonicalToParamsV2Attr {
-	return func(m optionalAttr) {
-		m["num_proj"] = value
-	}
-}
-
-// Converts CudnnRNN params from canonical form to usable form. It supports the projection in LSTM.
+// CTCGreedyDecoderMergeRepeated sets the optional merge_repeated attribute to value.
 //
-// Writes a set of weights into the opaque params buffer so they can be used in
-// upcoming training or inferences.
+// value: If True, merge repeated classes in output.
+// If not specified, defaults to false
+func CTCGreedyDecoderMergeRepeated(value bool) CTCGreedyDecoderAttr {
+	return func(m optionalAttr) {
+		m["merge_repeated"] = value
+	}
+}
+
+// Performs greedy decoding on the logits given in inputs.
 //
-// Note that the params buffer may not be compatible across different GPUs. So any
-// save and restoration should be converted to and from the canonical weights and
-// biases.
+// A note about the attribute merge_repeated: if enabled, when
+// consecutive logits' maximum indices are the same, only the first of
+// these is emitted.  Labeling the blank '*', the sequence "A B B * B B"
+// becomes "A B B" if merge_repeated = True and "A B B B B" if
+// merge_repeated = False.
 //
-// num_layers: Specifies the number of layers in the RNN model.
-// num_units: Specifies the size of the hidden state.
-// input_size: Specifies the size of the input state.
-// weights: the canonical form of weights that can be used for saving
-//     and restoration. They are more likely to be compatible across different
-//     generations.
-// biases: the canonical form of biases that can be used for saving
-//     and restoration. They are more likely to be compatible across different
-//     generations.
-// num_params_weigths: number of weight parameter matrix for all layers.
-// num_params_biases: number of bias parameter vector for all layers.
-// rnn_mode: Indicates the type of the RNN model.
-// input_mode: Indicate whether there is a linear projection between the input and
-//     The actual computation before the first layer. 'skip_input' is only allowed
-//     when input_size == num_units; 'auto_select' implies 'skip_input' when
-//     input_size == num_units; otherwise, it implies 'linear_input'.
-// direction: Indicates whether a bidirectional model will be used.
-//     dir = (direction == bidirectional) ? 2 : 1
-// dropout: dropout probability. When set to 0., dropout is disabled.
-// seed: the 1st part of a seed to initialize dropout.
-// seed2: the 2nd part of a seed to initialize dropout.
-// num_proj: The output dimensionality for the projection matrices. If None or 0,
-//     no projection is performed.
-func CudnnRNNCanonicalToParamsV2(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, weights []tf.Output, biases []tf.Output, optional ...CudnnRNNCanonicalToParamsV2Attr) (params tf.Output) {
+// Regardless of the value of merge_repeated, if the maximum index of a given
+// time and batch corresponds to the blank, index `(num_classes - 1)`, no new
+// element is emitted.
+//
+// Arguments:
+//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
+//	sequence_length: A vector containing sequence lengths, size `(batch_size)`.
+//
+// Returns:
+//	decoded_indices: Indices matrix, size `(total_decoded_outputs x 2)`,
+// of a `SparseTensor<int64, 2>`.  The rows store: [batch, time].
+//	decoded_values: Values vector, size: `(total_decoded_outputs)`,
+// of a `SparseTensor<int64, 2>`.  The vector stores the decoded classes.
+//	decoded_shape: Shape vector, size `(2)`, of the decoded SparseTensor.
+// Values are: `[batch_size, max_decoded_length]`.
+//	log_probability: Matrix, size `(batch_size x 1)`, containing sequence
+// log-probabilities.
+func CTCGreedyDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, optional ...CTCGreedyDecoderAttr) (decoded_indices tf.Output, decoded_values tf.Output, decoded_shape tf.Output, log_probability tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -4472,14 +4856,88 @@ func CudnnRNNCanonicalToParamsV2(scope *Scope, num_layers tf.Output, num_units t
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "CudnnRNNCanonicalToParamsV2",
+		Type: "CTCGreedyDecoder",
 		Input: []tf.Input{
-			num_layers, num_units, input_size, tf.OutputList(weights), tf.OutputList(biases),
+			inputs, sequence_length,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// CTCLossAttr is an optional argument to CTCLoss.
+type CTCLossAttr func(optionalAttr)
+
+// CTCLossPreprocessCollapseRepeated sets the optional preprocess_collapse_repeated attribute to value.
+//
+// value: Scalar, if true then repeated labels are
+// collapsed prior to the CTC calculation.
+// If not specified, defaults to false
+func CTCLossPreprocessCollapseRepeated(value bool) CTCLossAttr {
+	return func(m optionalAttr) {
+		m["preprocess_collapse_repeated"] = value
+	}
+}
+
+// CTCLossCtcMergeRepeated sets the optional ctc_merge_repeated attribute to value.
+//
+// value: Scalar.  If set to false, *during* CTC calculation
+// repeated non-blank labels will not be merged and are interpreted as
+// individual labels.  This is a simplified version of CTC.
+// If not specified, defaults to true
+func CTCLossCtcMergeRepeated(value bool) CTCLossAttr {
+	return func(m optionalAttr) {
+		m["ctc_merge_repeated"] = value
+	}
+}
+
+// CTCLossIgnoreLongerOutputsThanInputs sets the optional ignore_longer_outputs_than_inputs attribute to value.
+//
+// value: Scalar. If set to true, during CTC
+// calculation, items that have longer output sequences than input sequences
+// are skipped: they don't contribute to the loss term and have zero-gradient.
+// If not specified, defaults to false
+func CTCLossIgnoreLongerOutputsThanInputs(value bool) CTCLossAttr {
+	return func(m optionalAttr) {
+		m["ignore_longer_outputs_than_inputs"] = value
+	}
+}
+
+// Calculates the CTC Loss (log probability) for each batch entry.  Also calculates
+//
+// the gradient.  This class performs the softmax operation for you, so inputs
+// should be e.g. linear projections of outputs by an LSTM.
+//
+// Arguments:
+//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
+//	labels_indices: The indices of a `SparseTensor<int32, 2>`.
+// `labels_indices(i, :) == [b, t]` means `labels_values(i)` stores the id for
+// `(batch b, time t)`.
+//	labels_values: The values (labels) associated with the given batch and time.
+//	sequence_length: A vector containing sequence lengths (batch).
+//
+// Returns:
+//	loss: A vector (batch) containing log-probabilities.
+//	gradient: The gradient of `loss`.  3-D, shape:
+// `(max_time x batch_size x num_classes)`.
+func CTCLoss(scope *Scope, inputs tf.Output, labels_indices tf.Output, labels_values tf.Output, sequence_length tf.Output, optional ...CTCLossAttr) (loss tf.Output, gradient tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CTCLoss",
+		Input: []tf.Input{
+			inputs, labels_indices, labels_values, sequence_length,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
 }
 
 // CudnnRNNCanonicalToParamsAttr is an optional argument to CudnnRNNCanonicalToParams.
@@ -4583,40 +5041,162 @@ func CudnnRNNCanonicalToParams(scope *Scope, num_layers tf.Output, num_units tf.
 	return op.Output(0)
 }
 
-// Pads a tensor.
+// CudnnRNNParamsToCanonicalV2Attr is an optional argument to CudnnRNNParamsToCanonicalV2.
+type CudnnRNNParamsToCanonicalV2Attr func(optionalAttr)
+
+// CudnnRNNParamsToCanonicalV2RnnMode sets the optional rnn_mode attribute to value.
+// If not specified, defaults to "lstm"
+func CudnnRNNParamsToCanonicalV2RnnMode(value string) CudnnRNNParamsToCanonicalV2Attr {
+	return func(m optionalAttr) {
+		m["rnn_mode"] = value
+	}
+}
+
+// CudnnRNNParamsToCanonicalV2InputMode sets the optional input_mode attribute to value.
+// If not specified, defaults to "linear_input"
+func CudnnRNNParamsToCanonicalV2InputMode(value string) CudnnRNNParamsToCanonicalV2Attr {
+	return func(m optionalAttr) {
+		m["input_mode"] = value
+	}
+}
+
+// CudnnRNNParamsToCanonicalV2Direction sets the optional direction attribute to value.
+// If not specified, defaults to "unidirectional"
+func CudnnRNNParamsToCanonicalV2Direction(value string) CudnnRNNParamsToCanonicalV2Attr {
+	return func(m optionalAttr) {
+		m["direction"] = value
+	}
+}
+
+// CudnnRNNParamsToCanonicalV2Dropout sets the optional dropout attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNParamsToCanonicalV2Dropout(value float32) CudnnRNNParamsToCanonicalV2Attr {
+	return func(m optionalAttr) {
+		m["dropout"] = value
+	}
+}
+
+// CudnnRNNParamsToCanonicalV2Seed sets the optional seed attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNParamsToCanonicalV2Seed(value int64) CudnnRNNParamsToCanonicalV2Attr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// CudnnRNNParamsToCanonicalV2Seed2 sets the optional seed2 attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNParamsToCanonicalV2Seed2(value int64) CudnnRNNParamsToCanonicalV2Attr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// CudnnRNNParamsToCanonicalV2NumProj sets the optional num_proj attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNParamsToCanonicalV2NumProj(value int64) CudnnRNNParamsToCanonicalV2Attr {
+	return func(m optionalAttr) {
+		m["num_proj"] = value
+	}
+}
+
+// Retrieves CudnnRNN params in canonical form. It supports the projection in LSTM.
 //
-// This operation pads `input` according to the `paddings` and `constant_values`
-// you specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is
-// the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
-// how many padding values to add before the contents of `input` in that dimension,
-// and `paddings[D, 1]` indicates how many padding values to add after the contents
-// of `input` in that dimension. `constant_values` is a scalar tensor of the same
-// type as `input` that indicates the value to use for padding `input`.
+// Retrieves a set of weights from the opaque params buffer that can be saved and
+// restored in a way compatible with future runs.
 //
-// The padded size of each dimension D of the output is:
+// Note that the params buffer may not be compatible across different GPUs. So any
+// save and restoration should be converted to and from the canonical weights and
+// biases.
 //
-// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
+// num_layers: Specifies the number of layers in the RNN model.
+// num_units: Specifies the size of the hidden state.
+// input_size: Specifies the size of the input state.
+// num_params_weigths: number of weight parameter matrix for all layers.
+// num_params_biases: number of bias parameter vector for all layers.
+// weights: the canonical form of weights that can be used for saving
+//     and restoration. They are more likely to be compatible across different
+//     generations.
+// biases: the canonical form of biases that can be used for saving
+//     and restoration. They are more likely to be compatible across different
+//     generations.
+// rnn_mode: Indicates the type of the RNN model.
+// input_mode: Indicate whether there is a linear projection between the input and
+//     The actual computation before the first layer. 'skip_input' is only allowed
+//     when input_size == num_units; 'auto_select' implies 'skip_input' when
+//     input_size == num_units; otherwise, it implies 'linear_input'.
+// direction: Indicates whether a bidirectional model will be used.
+//     dir = (direction == bidirectional) ? 2 : 1
+// dropout: dropout probability. When set to 0., dropout is disabled.
+// seed: the 1st part of a seed to initialize dropout.
+// seed2: the 2nd part of a seed to initialize dropout.
+// num_proj: The output dimensionality for the projection matrices. If None or 0,
+//     no projection is performed.
+func CudnnRNNParamsToCanonicalV2(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, params tf.Output, num_params_weights int64, num_params_biases int64, optional ...CudnnRNNParamsToCanonicalV2Attr) (weights []tf.Output, biases []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_params_weights": num_params_weights, "num_params_biases": num_params_biases}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CudnnRNNParamsToCanonicalV2",
+		Input: []tf.Input{
+			num_layers, num_units, input_size, params,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if weights, idx, err = makeOutputList(op, idx, "weights"); err != nil {
+		scope.UpdateErr("CudnnRNNParamsToCanonicalV2", err)
+		return
+	}
+	if biases, idx, err = makeOutputList(op, idx, "biases"); err != nil {
+		scope.UpdateErr("CudnnRNNParamsToCanonicalV2", err)
+		return
+	}
+	return weights, biases
+}
+
+// Returns the diagonal part of the tensor.
+//
+// This operation returns a tensor with the `diagonal` part
+// of the `input`. The `diagonal` part is computed as follows:
+//
+// Assume `input` has dimensions `[D1,..., Dk, D1,..., Dk]`, then the output is a
+// tensor of rank `k` with dimensions `[D1,..., Dk]` where:
+//
+// `diagonal[i1,..., ik] = input[i1, ..., ik, i1,..., ik]`.
 //
 // For example:
 //
 // ```
-// # 't' is [[1, 1], [2, 2]]
-// # 'paddings' is [[1, 1], [2, 2]]
-// # 'constant_values' is 0
-// # rank of 't' is 2
-// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
-//                       [0, 0, 1, 1, 0, 0]
-//                       [0, 0, 2, 2, 0, 0]
-//                       [0, 0, 0, 0, 0, 0]]
+// # 'input' is [[1, 0, 0, 0]
+//               [0, 2, 0, 0]
+//               [0, 0, 3, 0]
+//               [0, 0, 0, 4]]
+//
+// tf.diag_part(input) ==> [1, 2, 3, 4]
 // ```
-func PadV2(scope *Scope, input tf.Output, paddings tf.Output, constant_values tf.Output) (output tf.Output) {
+//
+// Arguments:
+//	input: Rank k tensor where k is even and not zero.
+//
+// Returns The extracted diagonal.
+func DiagPart(scope *Scope, input tf.Output) (diagonal tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "PadV2",
+		Type: "DiagPart",
 		Input: []tf.Input{
-			input, paddings, constant_values,
+			input,
 		},
 	}
 	op := scope.AddOperation(opspec)
@@ -4983,49 +5563,521 @@ func CudnnRNNBackprop(scope *Scope, input tf.Output, input_h tf.Output, input_c
 	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
 }
 
-// OrderedMapClearAttr is an optional argument to OrderedMapClear.
-type OrderedMapClearAttr func(optionalAttr)
+// CudnnRNNV3Attr is an optional argument to CudnnRNNV3.
+type CudnnRNNV3Attr func(optionalAttr)
 
-// OrderedMapClearCapacity sets the optional capacity attribute to value.
+// CudnnRNNV3RnnMode sets the optional rnn_mode attribute to value.
+// If not specified, defaults to "lstm"
+func CudnnRNNV3RnnMode(value string) CudnnRNNV3Attr {
+	return func(m optionalAttr) {
+		m["rnn_mode"] = value
+	}
+}
+
+// CudnnRNNV3InputMode sets the optional input_mode attribute to value.
+// If not specified, defaults to "linear_input"
+func CudnnRNNV3InputMode(value string) CudnnRNNV3Attr {
+	return func(m optionalAttr) {
+		m["input_mode"] = value
+	}
+}
+
+// CudnnRNNV3Direction sets the optional direction attribute to value.
+// If not specified, defaults to "unidirectional"
+func CudnnRNNV3Direction(value string) CudnnRNNV3Attr {
+	return func(m optionalAttr) {
+		m["direction"] = value
+	}
+}
+
+// CudnnRNNV3Dropout sets the optional dropout attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNV3Dropout(value float32) CudnnRNNV3Attr {
+	return func(m optionalAttr) {
+		m["dropout"] = value
+	}
+}
+
+// CudnnRNNV3Seed sets the optional seed attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNV3Seed(value int64) CudnnRNNV3Attr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// CudnnRNNV3Seed2 sets the optional seed2 attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNV3Seed2(value int64) CudnnRNNV3Attr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// CudnnRNNV3NumProj sets the optional num_proj attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNV3NumProj(value int64) CudnnRNNV3Attr {
+	return func(m optionalAttr) {
+		m["num_proj"] = value
+	}
+}
+
+// CudnnRNNV3IsTraining sets the optional is_training attribute to value.
+// If not specified, defaults to true
+func CudnnRNNV3IsTraining(value bool) CudnnRNNV3Attr {
+	return func(m optionalAttr) {
+		m["is_training"] = value
+	}
+}
+
+// CudnnRNNV3TimeMajor sets the optional time_major attribute to value.
+// If not specified, defaults to true
+func CudnnRNNV3TimeMajor(value bool) CudnnRNNV3Attr {
+	return func(m optionalAttr) {
+		m["time_major"] = value
+	}
+}
+
+// A RNN backed by cuDNN.
+//
+// Computes the RNN from the input and initial states, with respect to the params
+// buffer. Accepts one extra input "sequence_lengths" than CudnnRNN.
+//
+// rnn_mode: Indicates the type of the RNN model.
+// input_mode: Indicates whether there is a linear projection between the input and
+//   the actual computation before the first layer. 'skip_input' is only allowed
+//   when input_size == num_units; 'auto_select' implies 'skip_input' when
+//   input_size == num_units; otherwise, it implies 'linear_input'.
+// direction: Indicates whether a bidirectional model will be used. Should be
+//   "unidirectional" or "bidirectional".
+// dropout: Dropout probability. When set to 0., dropout is disabled.
+// seed: The 1st part of a seed to initialize dropout.
+// seed2: The 2nd part of a seed to initialize dropout.
+// input: If time_major is true, this is a 3-D tensor with the shape of
+//     [seq_length, batch_size, input_size]. If time_major is false, the shape is
+//     [batch_size, seq_length, input_size].
+// input_h: If time_major is true, this is a 3-D tensor with the shape of
+//     [num_layer * dir, batch_size, num_units]. If time_major is false, the shape
+//     is [batch_size, num_layer * dir, num_units].
+// input_c: For LSTM, a 3-D tensor with the shape of
+//     [num_layer * dir, batch, num_units]. For other models, it is ignored.
+// params: A 1-D tensor that contains the weights and biases in an opaque layout.
+//     The size must be created through CudnnRNNParamsSize, and initialized
+//     separately. Note that they might not be compatible across different
+//     generations. So it is a good idea to save and restore
+// sequence_lengths: a vector of lengths of each input sequence.
+// output: If time_major is true, this is a 3-D tensor with the shape of
+//     [seq_length, batch_size, dir * num_units]. If time_major is false, the
+//     shape is [batch_size, seq_length, dir * num_units].
+// output_h: The same shape has input_h.
+// output_c: The same shape as input_c for LSTM. An empty tensor for other models.
+// is_training: Indicates whether this operation is used for inferenece or
+//   training.
+// time_major: Indicates whether the input/output format is time major or batch
+//     major.
+// reserve_space: An opaque tensor that can be used in backprop calculation. It
+//   is only produced if is_training is true.
+func CudnnRNNV3(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, sequence_lengths tf.Output, optional ...CudnnRNNV3Attr) (output tf.Output, output_h tf.Output, output_c tf.Output, reserve_space tf.Output, host_reserved tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CudnnRNNV3",
+		Input: []tf.Input{
+			input, input_h, input_c, params, sequence_lengths,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+}
+
+// Pads a tensor with zeros.
+//
+// This operation pads a `input` with zeros according to the `paddings` you
+// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the
+// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
+// how many zeros to add before the contents of `input` in that dimension, and
+// `paddings[D, 1]` indicates how many zeros to add after the contents of `input`
+// in that dimension.
+//
+// The padded size of each dimension D of the output is:
+//
+// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
+//
+// For example:
+//
+// ```
+// # 't' is [[1, 1], [2, 2]]
+// # 'paddings' is [[1, 1], [2, 2]]
+// # rank of 't' is 2
+// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
+//                       [0, 0, 1, 1, 0, 0]
+//                       [0, 0, 2, 2, 0, 0]
+//                       [0, 0, 0, 0, 0, 0]]
+// ```
+//
+func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Pad",
+		Input: []tf.Input{
+			input, paddings,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// CudnnRNNV2Attr is an optional argument to CudnnRNNV2.
+type CudnnRNNV2Attr func(optionalAttr)
+
+// CudnnRNNV2RnnMode sets the optional rnn_mode attribute to value.
+// If not specified, defaults to "lstm"
+func CudnnRNNV2RnnMode(value string) CudnnRNNV2Attr {
+	return func(m optionalAttr) {
+		m["rnn_mode"] = value
+	}
+}
+
+// CudnnRNNV2InputMode sets the optional input_mode attribute to value.
+// If not specified, defaults to "linear_input"
+func CudnnRNNV2InputMode(value string) CudnnRNNV2Attr {
+	return func(m optionalAttr) {
+		m["input_mode"] = value
+	}
+}
+
+// CudnnRNNV2Direction sets the optional direction attribute to value.
+// If not specified, defaults to "unidirectional"
+func CudnnRNNV2Direction(value string) CudnnRNNV2Attr {
+	return func(m optionalAttr) {
+		m["direction"] = value
+	}
+}
+
+// CudnnRNNV2Dropout sets the optional dropout attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNV2Dropout(value float32) CudnnRNNV2Attr {
+	return func(m optionalAttr) {
+		m["dropout"] = value
+	}
+}
+
+// CudnnRNNV2Seed sets the optional seed attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNV2Seed(value int64) CudnnRNNV2Attr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// CudnnRNNV2Seed2 sets the optional seed2 attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNV2Seed2(value int64) CudnnRNNV2Attr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// CudnnRNNV2IsTraining sets the optional is_training attribute to value.
+// If not specified, defaults to true
+func CudnnRNNV2IsTraining(value bool) CudnnRNNV2Attr {
+	return func(m optionalAttr) {
+		m["is_training"] = value
+	}
+}
+
+// A RNN backed by cuDNN.
+//
+// Computes the RNN from the input and initial states, with respect to the params
+// buffer. Produces one extra output "host_reserved" than CudnnRNN.
+//
+// rnn_mode: Indicates the type of the RNN model.
+// input_mode: Indicates whether there is a linear projection between the input and
+//   the actual computation before the first layer. 'skip_input' is only allowed
+//   when input_size == num_units; 'auto_select' implies 'skip_input' when
+//   input_size == num_units; otherwise, it implies 'linear_input'.
+// direction: Indicates whether a bidirectional model will be used. Should be
+//   "unidirectional" or "bidirectional".
+// dropout: Dropout probability. When set to 0., dropout is disabled.
+// seed: The 1st part of a seed to initialize dropout.
+// seed2: The 2nd part of a seed to initialize dropout.
+// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size].
+// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size,
+//     num_units].
+// input_c: For LSTM, a 3-D tensor with the shape of
+//     [num_layer * dir, batch, num_units]. For other models, it is ignored.
+// params: A 1-D tensor that contains the weights and biases in an opaque layout.
+//     The size must be created through CudnnRNNParamsSize, and initialized
+//     separately. Note that they might not be compatible across different
+//     generations. So it is a good idea to save and restore
+// output: A 3-D tensor with the shape of [seq_length, batch_size,
+//     dir * num_units].
+// output_h: The same shape has input_h.
+// output_c: The same shape as input_c for LSTM. An empty tensor for other models.
+// is_training: Indicates whether this operation is used for inferenece or
+//   training.
+// reserve_space: An opaque tensor that can be used in backprop calculation. It
+//   is only produced if is_training is true.
+// host_reserved: An opaque tensor that can be used in backprop calculation. It is
+//   only produced if is_training is true. It is output on host memory rather than
+//   device memory.
+func CudnnRNNV2(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, optional ...CudnnRNNV2Attr) (output tf.Output, output_h tf.Output, output_c tf.Output, reserve_space tf.Output, host_reserved tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CudnnRNNV2",
+		Input: []tf.Input{
+			input, input_h, input_c, params,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+}
+
+// CudnnRNNParamsSizeAttr is an optional argument to CudnnRNNParamsSize.
+type CudnnRNNParamsSizeAttr func(optionalAttr)
+
+// CudnnRNNParamsSizeRnnMode sets the optional rnn_mode attribute to value.
+// If not specified, defaults to "lstm"
+func CudnnRNNParamsSizeRnnMode(value string) CudnnRNNParamsSizeAttr {
+	return func(m optionalAttr) {
+		m["rnn_mode"] = value
+	}
+}
+
+// CudnnRNNParamsSizeInputMode sets the optional input_mode attribute to value.
+// If not specified, defaults to "linear_input"
+func CudnnRNNParamsSizeInputMode(value string) CudnnRNNParamsSizeAttr {
+	return func(m optionalAttr) {
+		m["input_mode"] = value
+	}
+}
+
+// CudnnRNNParamsSizeDirection sets the optional direction attribute to value.
+// If not specified, defaults to "unidirectional"
+func CudnnRNNParamsSizeDirection(value string) CudnnRNNParamsSizeAttr {
+	return func(m optionalAttr) {
+		m["direction"] = value
+	}
+}
+
+// CudnnRNNParamsSizeDropout sets the optional dropout attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNParamsSizeDropout(value float32) CudnnRNNParamsSizeAttr {
+	return func(m optionalAttr) {
+		m["dropout"] = value
+	}
+}
+
+// CudnnRNNParamsSizeSeed sets the optional seed attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNParamsSizeSeed(value int64) CudnnRNNParamsSizeAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// CudnnRNNParamsSizeSeed2 sets the optional seed2 attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNParamsSizeSeed2(value int64) CudnnRNNParamsSizeAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// CudnnRNNParamsSizeNumProj sets the optional num_proj attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNParamsSizeNumProj(value int64) CudnnRNNParamsSizeAttr {
+	return func(m optionalAttr) {
+		m["num_proj"] = value
+	}
+}
+
+// Computes size of weights that can be used by a Cudnn RNN model.
+//
+// Return the params size that can be used by the Cudnn RNN model. Subsequent
+// weight allocation and initialization should use this size.
+//
+// num_layers: Specifies the number of layers in the RNN model.
+// num_units: Specifies the size of the hidden state.
+// input_size: Specifies the size of the input state.
+// rnn_mode: Indicates the type of the RNN model.
+// input_mode: Indicate whether there is a linear projection between the input and
+//   The actual computation before the first layer. 'skip_input' is only allowed
+//   when input_size == num_units; 'auto_select' implies 'skip_input' when
+//   input_size == num_units; otherwise, it implies 'linear_input'.
+// direction: Indicates whether a bidirectional model will be used.
+//   dir = (direction == bidirectional) ? 2 : 1
+// dropout: dropout probability. When set to 0., dropout is disabled.
+// seed: the 1st part of a seed to initialize dropout.
+// seed2: the 2nd part of a seed to initialize dropout.
+// params_size: The size of the params buffer that should be allocated and
+//   initialized for this RNN model. Note that this params buffer may not be
+//   compatible across GPUs. Please use CudnnRNNParamsWeights and
+//   CudnnRNNParamsBiases to save and restore them in a way that is compatible
+//   across different runs.
+func CudnnRNNParamsSize(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, T tf.DataType, S tf.DataType, optional ...CudnnRNNParamsSizeAttr) (params_size tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"T": T, "S": S}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CudnnRNNParamsSize",
+		Input: []tf.Input{
+			num_layers, num_units, input_size,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RecordInputAttr is an optional argument to RecordInput.
+type RecordInputAttr func(optionalAttr)
+
+// RecordInputFileRandomSeed sets the optional file_random_seed attribute to value.
+//
+// value: Random seeds used to produce randomized records.
+// If not specified, defaults to 301
+func RecordInputFileRandomSeed(value int64) RecordInputAttr {
+	return func(m optionalAttr) {
+		m["file_random_seed"] = value
+	}
+}
+
+// RecordInputFileShuffleShiftRatio sets the optional file_shuffle_shift_ratio attribute to value.
+//
+// value: Shifts the list of files after the list is randomly
+// shuffled.
+// If not specified, defaults to 0
+func RecordInputFileShuffleShiftRatio(value float32) RecordInputAttr {
+	return func(m optionalAttr) {
+		m["file_shuffle_shift_ratio"] = value
+	}
+}
+
+// RecordInputFileBufferSize sets the optional file_buffer_size attribute to value.
+//
+// value: The randomization shuffling buffer.
+// If not specified, defaults to 10000
+func RecordInputFileBufferSize(value int64) RecordInputAttr {
+	return func(m optionalAttr) {
+		m["file_buffer_size"] = value
+	}
+}
+
+// RecordInputFileParallelism sets the optional file_parallelism attribute to value.
+//
+// value: How many sstables are opened and concurrently iterated over.
+// If not specified, defaults to 16
+func RecordInputFileParallelism(value int64) RecordInputAttr {
+	return func(m optionalAttr) {
+		m["file_parallelism"] = value
+	}
+}
+
+// RecordInputBatchSize sets the optional batch_size attribute to value.
+//
+// value: The batch size.
+// If not specified, defaults to 32
+func RecordInputBatchSize(value int64) RecordInputAttr {
+	return func(m optionalAttr) {
+		m["batch_size"] = value
+	}
+}
+
+// RecordInputCompressionType sets the optional compression_type attribute to value.
+//
+// value: The type of compression for the file. Currently ZLIB and
+// GZIP are supported. Defaults to none.
+// If not specified, defaults to ""
+func RecordInputCompressionType(value string) RecordInputAttr {
+	return func(m optionalAttr) {
+		m["compression_type"] = value
+	}
+}
+
+// Emits randomized records.
+//
+// Arguments:
+//	file_pattern: Glob pattern for the data files.
+//
+// Returns A tensor of shape [batch_size].
+func RecordInput(scope *Scope, file_pattern string, optional ...RecordInputAttr) (records tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"file_pattern": file_pattern}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RecordInput",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// OrderedMapIncompleteSizeAttr is an optional argument to OrderedMapIncompleteSize.
+type OrderedMapIncompleteSizeAttr func(optionalAttr)
+
+// OrderedMapIncompleteSizeCapacity sets the optional capacity attribute to value.
 // If not specified, defaults to 0
 //
 // REQUIRES: value >= 0
-func OrderedMapClearCapacity(value int64) OrderedMapClearAttr {
+func OrderedMapIncompleteSizeCapacity(value int64) OrderedMapIncompleteSizeAttr {
 	return func(m optionalAttr) {
 		m["capacity"] = value
 	}
 }
 
-// OrderedMapClearMemoryLimit sets the optional memory_limit attribute to value.
+// OrderedMapIncompleteSizeMemoryLimit sets the optional memory_limit attribute to value.
 // If not specified, defaults to 0
 //
 // REQUIRES: value >= 0
-func OrderedMapClearMemoryLimit(value int64) OrderedMapClearAttr {
+func OrderedMapIncompleteSizeMemoryLimit(value int64) OrderedMapIncompleteSizeAttr {
 	return func(m optionalAttr) {
 		m["memory_limit"] = value
 	}
 }
 
-// OrderedMapClearContainer sets the optional container attribute to value.
+// OrderedMapIncompleteSizeContainer sets the optional container attribute to value.
 // If not specified, defaults to ""
-func OrderedMapClearContainer(value string) OrderedMapClearAttr {
+func OrderedMapIncompleteSizeContainer(value string) OrderedMapIncompleteSizeAttr {
 	return func(m optionalAttr) {
 		m["container"] = value
 	}
 }
 
-// OrderedMapClearSharedName sets the optional shared_name attribute to value.
+// OrderedMapIncompleteSizeSharedName sets the optional shared_name attribute to value.
 // If not specified, defaults to ""
-func OrderedMapClearSharedName(value string) OrderedMapClearAttr {
+func OrderedMapIncompleteSizeSharedName(value string) OrderedMapIncompleteSizeAttr {
 	return func(m optionalAttr) {
 		m["shared_name"] = value
 	}
 }
 
-// Op removes all elements in the underlying container.
-//
-// Returns the created operation.
-func OrderedMapClear(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapClearAttr) (o *tf.Operation) {
+// Op returns the number of incomplete elements in the underlying container.
+func OrderedMapIncompleteSize(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapIncompleteSizeAttr) (size tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -5034,11 +6086,12 @@ func OrderedMapClear(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapC
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "OrderedMapClear",
+		Type: "OrderedMapIncompleteSize",
 
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
 // OrderedMapSizeAttr is an optional argument to OrderedMapSize.
@@ -5170,31 +6223,6 @@ func OrderedMapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataTyp
 	return key, values
 }
 
-// Concatenates tensors along one dimension.
-//
-// Arguments:
-//	values: List of `N` Tensors to concatenate. Their ranks and types must match,
-// and their sizes must match in all dimensions except `concat_dim`.
-//	axis: 0-D.  The dimension along which to concatenate.  Must be in the
-// range [-rank(values), rank(values)).
-//
-// Returns A `Tensor` with the concatenation of values stacked along the
-// `concat_dim` dimension.  This tensor's shape matches that of `values` except
-// in `concat_dim` where it has the sum of the sizes.
-func ConcatV2(scope *Scope, values []tf.Output, axis tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ConcatV2",
-		Input: []tf.Input{
-			tf.OutputList(values), axis,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // OrderedMapUnstageAttr is an optional argument to OrderedMapUnstage.
 type OrderedMapUnstageAttr func(optionalAttr)
 
@@ -5338,66 +6366,47 @@ func OrderedMapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.
 	return values
 }
 
-// OrderedMapStageAttr is an optional argument to OrderedMapStage.
-type OrderedMapStageAttr func(optionalAttr)
+// MapIncompleteSizeAttr is an optional argument to MapIncompleteSize.
+type MapIncompleteSizeAttr func(optionalAttr)
 
-// OrderedMapStageCapacity sets the optional capacity attribute to value.
-//
-// value: Maximum number of elements in the Staging Area. If > 0, inserts
-// on the container will block when the capacity is reached.
+// MapIncompleteSizeCapacity sets the optional capacity attribute to value.
 // If not specified, defaults to 0
 //
 // REQUIRES: value >= 0
-func OrderedMapStageCapacity(value int64) OrderedMapStageAttr {
+func MapIncompleteSizeCapacity(value int64) MapIncompleteSizeAttr {
 	return func(m optionalAttr) {
 		m["capacity"] = value
 	}
 }
 
-// OrderedMapStageMemoryLimit sets the optional memory_limit attribute to value.
+// MapIncompleteSizeMemoryLimit sets the optional memory_limit attribute to value.
 // If not specified, defaults to 0
 //
 // REQUIRES: value >= 0
-func OrderedMapStageMemoryLimit(value int64) OrderedMapStageAttr {
+func MapIncompleteSizeMemoryLimit(value int64) MapIncompleteSizeAttr {
 	return func(m optionalAttr) {
 		m["memory_limit"] = value
 	}
 }
 
-// OrderedMapStageContainer sets the optional container attribute to value.
-//
-// value: If non-empty, this queue is placed in the given container. Otherwise,
-// a default container is used.
+// MapIncompleteSizeContainer sets the optional container attribute to value.
 // If not specified, defaults to ""
-func OrderedMapStageContainer(value string) OrderedMapStageAttr {
+func MapIncompleteSizeContainer(value string) MapIncompleteSizeAttr {
 	return func(m optionalAttr) {
 		m["container"] = value
 	}
 }
 
-// OrderedMapStageSharedName sets the optional shared_name attribute to value.
-//
-// value: It is necessary to match this name to the matching Unstage Op.
+// MapIncompleteSizeSharedName sets the optional shared_name attribute to value.
 // If not specified, defaults to ""
-func OrderedMapStageSharedName(value string) OrderedMapStageAttr {
+func MapIncompleteSizeSharedName(value string) MapIncompleteSizeAttr {
 	return func(m optionalAttr) {
 		m["shared_name"] = value
 	}
 }
 
-// Stage (key, values) in the underlying container which behaves like a ordered
-//
-// associative container.   Elements are ordered by key.
-//
-// Arguments:
-//	key: int64
-//
-//	values: a list of tensors
-// dtypes A list of data types that inserted values should adhere to.
-//
-//
-// Returns the created operation.
-func OrderedMapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...OrderedMapStageAttr) (o *tf.Operation) {
+// Op returns the number of incomplete elements in the underlying container.
+func MapIncompleteSize(scope *Scope, dtypes []tf.DataType, optional ...MapIncompleteSizeAttr) (size tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -5406,128 +6415,55 @@ func OrderedMapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "OrderedMapStage",
-		Input: []tf.Input{
-			key, indices, tf.OutputList(values),
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
+		Type: "MapIncompleteSize",
 
-// TensorStridedSliceUpdateAttr is an optional argument to TensorStridedSliceUpdate.
-type TensorStridedSliceUpdateAttr func(optionalAttr)
-
-// TensorStridedSliceUpdateBeginMask sets the optional begin_mask attribute to value.
-// If not specified, defaults to 0
-func TensorStridedSliceUpdateBeginMask(value int64) TensorStridedSliceUpdateAttr {
-	return func(m optionalAttr) {
-		m["begin_mask"] = value
-	}
-}
-
-// TensorStridedSliceUpdateEndMask sets the optional end_mask attribute to value.
-// If not specified, defaults to 0
-func TensorStridedSliceUpdateEndMask(value int64) TensorStridedSliceUpdateAttr {
-	return func(m optionalAttr) {
-		m["end_mask"] = value
-	}
-}
-
-// TensorStridedSliceUpdateEllipsisMask sets the optional ellipsis_mask attribute to value.
-// If not specified, defaults to 0
-func TensorStridedSliceUpdateEllipsisMask(value int64) TensorStridedSliceUpdateAttr {
-	return func(m optionalAttr) {
-		m["ellipsis_mask"] = value
-	}
-}
-
-// TensorStridedSliceUpdateNewAxisMask sets the optional new_axis_mask attribute to value.
-// If not specified, defaults to 0
-func TensorStridedSliceUpdateNewAxisMask(value int64) TensorStridedSliceUpdateAttr {
-	return func(m optionalAttr) {
-		m["new_axis_mask"] = value
-	}
-}
-
-// TensorStridedSliceUpdateShrinkAxisMask sets the optional shrink_axis_mask attribute to value.
-// If not specified, defaults to 0
-func TensorStridedSliceUpdateShrinkAxisMask(value int64) TensorStridedSliceUpdateAttr {
-	return func(m optionalAttr) {
-		m["shrink_axis_mask"] = value
-	}
-}
-
-// Assign `value` to the sliced l-value reference of `input`.
-//
-// The values of `value` are assigned to the positions in the tensor `input` that
-// are selected by the slice parameters. The slice parameters `begin` `end`
-// `strides` etc. work exactly as in `StridedSlice`.
-//
-// NOTE this op currently does not support broadcasting and so `value`'s shape
-// must be exactly the shape produced by the slice of `input`.
-func TensorStridedSliceUpdate(scope *Scope, input tf.Output, begin tf.Output, end tf.Output, strides tf.Output, value tf.Output, optional ...TensorStridedSliceUpdateAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorStridedSliceUpdate",
-		Input: []tf.Input{
-			input, begin, end, strides, value,
-		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// MapClearAttr is an optional argument to MapClear.
-type MapClearAttr func(optionalAttr)
+// MapSizeAttr is an optional argument to MapSize.
+type MapSizeAttr func(optionalAttr)
 
-// MapClearCapacity sets the optional capacity attribute to value.
+// MapSizeCapacity sets the optional capacity attribute to value.
 // If not specified, defaults to 0
 //
 // REQUIRES: value >= 0
-func MapClearCapacity(value int64) MapClearAttr {
+func MapSizeCapacity(value int64) MapSizeAttr {
 	return func(m optionalAttr) {
 		m["capacity"] = value
 	}
 }
 
-// MapClearMemoryLimit sets the optional memory_limit attribute to value.
+// MapSizeMemoryLimit sets the optional memory_limit attribute to value.
 // If not specified, defaults to 0
 //
 // REQUIRES: value >= 0
-func MapClearMemoryLimit(value int64) MapClearAttr {
+func MapSizeMemoryLimit(value int64) MapSizeAttr {
 	return func(m optionalAttr) {
 		m["memory_limit"] = value
 	}
 }
 
-// MapClearContainer sets the optional container attribute to value.
+// MapSizeContainer sets the optional container attribute to value.
 // If not specified, defaults to ""
-func MapClearContainer(value string) MapClearAttr {
+func MapSizeContainer(value string) MapSizeAttr {
 	return func(m optionalAttr) {
 		m["container"] = value
 	}
 }
 
-// MapClearSharedName sets the optional shared_name attribute to value.
+// MapSizeSharedName sets the optional shared_name attribute to value.
 // If not specified, defaults to ""
-func MapClearSharedName(value string) MapClearAttr {
+func MapSizeSharedName(value string) MapSizeAttr {
 	return func(m optionalAttr) {
 		m["shared_name"] = value
 	}
 }
 
-// Op removes all elements in the underlying container.
-//
-// Returns the created operation.
-func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *tf.Operation) {
+// Op returns the number of elements in the underlying container.
+func MapSize(scope *Scope, dtypes []tf.DataType, optional ...MapSizeAttr) (size tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -5536,11 +6472,142 @@ func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MapClear",
+		Type: "MapSize",
 
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MapUnstageNoKeyAttr is an optional argument to MapUnstageNoKey.
+type MapUnstageNoKeyAttr func(optionalAttr)
+
+// MapUnstageNoKeyCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapUnstageNoKeyCapacity(value int64) MapUnstageNoKeyAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// MapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapUnstageNoKeyMemoryLimit(value int64) MapUnstageNoKeyAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// MapUnstageNoKeyContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func MapUnstageNoKeyContainer(value string) MapUnstageNoKeyAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// MapUnstageNoKeySharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func MapUnstageNoKeySharedName(value string) MapUnstageNoKeyAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op removes and returns a random (key, value)
+//
+// from the underlying container.   If the underlying container
+// does not contain elements, the op will block until it does.
+func MapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MapUnstageNoKey",
+		Input: []tf.Input{
+			indices,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	key = op.Output(idx)
+	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
+		scope.UpdateErr("MapUnstageNoKey", err)
+		return
+	}
+	return key, values
+}
+
+// UnbatchAttr is an optional argument to Unbatch.
+type UnbatchAttr func(optionalAttr)
+
+// UnbatchContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func UnbatchContainer(value string) UnbatchAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// UnbatchSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func UnbatchSharedName(value string) UnbatchAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Reverses the operation of Batch for a single output Tensor.
+//
+// An instance of Unbatch either receives an empty batched_tensor, in which case it
+// asynchronously waits until the values become available from a concurrently
+// running instance of Unbatch with the same container and shared_name, or receives
+// a non-empty batched_tensor in which case it finalizes all other concurrently
+// running instances and outputs its own element from the batch.
+//
+// batched_tensor: The possibly transformed output of Batch. The size of the first
+//  dimension should remain unchanged by the transformations for the operation to
+//  work.
+// batch_index: The matching batch_index obtained from Batch.
+// id: The id scalar emitted by Batch.
+// unbatched_tensor: The Tensor corresponding to this execution.
+// timeout_micros: Maximum amount of time (in microseconds) to wait to receive the
+//  batched input tensor associated with a given invocation of the op.
+// container: Container to control resource sharing.
+// shared_name: Instances of Unbatch with the same container and shared_name are
+//  assumed to possibly belong to the same batch. If left empty, the op name will
+//  be used as the shared name.
+func Unbatch(scope *Scope, batched_tensor tf.Output, batch_index tf.Output, id tf.Output, timeout_micros int64, optional ...UnbatchAttr) (unbatched_tensor tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"timeout_micros": timeout_micros}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Unbatch",
+		Input: []tf.Input{
+			batched_tensor, batch_index, id,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
 // MapUnstageAttr is an optional argument to MapUnstage.
@@ -5614,300 +6681,6 @@ func MapUnstage(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.Data
 	return values
 }
 
-// LoadAndRemapMatrixAttr is an optional argument to LoadAndRemapMatrix.
-type LoadAndRemapMatrixAttr func(optionalAttr)
-
-// LoadAndRemapMatrixMaxRowsInMemory sets the optional max_rows_in_memory attribute to value.
-//
-// value: The maximum number of rows to load from the checkpoint at
-// once. If less than or equal to 0, the entire matrix will be loaded into
-// memory. Setting this arg trades increased disk reads for lower memory usage.
-// If not specified, defaults to -1
-func LoadAndRemapMatrixMaxRowsInMemory(value int64) LoadAndRemapMatrixAttr {
-	return func(m optionalAttr) {
-		m["max_rows_in_memory"] = value
-	}
-}
-
-// Loads a 2-D (matrix) `Tensor` with name `old_tensor_name` from the checkpoint
-//
-// at `ckpt_path` and potentially reorders its rows and columns using the
-// specified remappings.
-//
-// Most users should use one of the wrapper initializers (such as
-// `tf.contrib.framework.load_and_remap_matrix_initializer`) instead of this
-// function directly.
-//
-// The remappings are 1-D tensors with the following properties:
-//
-// * `row_remapping` must have exactly `num_rows` entries. Row `i` of the output
-//   matrix will be initialized from the row corresponding to index
-//   `row_remapping[i]` in the old `Tensor` from the checkpoint.
-// * `col_remapping` must have either 0 entries (indicating that no column
-//   reordering is needed) or `num_cols` entries. If specified, column `j` of the
-//   output matrix will be initialized from the column corresponding to index
-//   `col_remapping[j]` in the old `Tensor` from the checkpoint.
-// * A value of -1 in either of the remappings signifies a "missing" entry. In that
-//   case, values from the `initializing_values` tensor will be used to fill that
-//   missing row or column. If `row_remapping` has `r` missing entries and
-//   `col_remapping` has `c` missing entries, then the following condition must be
-//   true:
-//
-// `(r * num_cols) + (c * num_rows) - (r * c) == len(initializing_values)`
-//
-// The remapping tensors can be generated using the GenerateVocabRemapping op.
-//
-// As an example, with row_remapping = [1, 0, -1], col_remapping = [0, 2, -1],
-// initializing_values = [0.5, -0.5, 0.25, -0.25, 42], and w(i, j) representing
-// the value from row i, column j of the old tensor in the checkpoint, the output
-// matrix will look like the following:
-//
-// [[w(1, 0),  w(1, 2),  0.5],
-//  [w(0, 0),  w(0, 2), -0.5],
-//  [0.25,    -0.25,      42]]
-//
-// Arguments:
-//	ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`) from
-// which the old matrix `Tensor` will be loaded.
-//	old_tensor_name: Name of the 2-D `Tensor` to load from checkpoint.
-//	row_remapping: An int `Tensor` of row remappings (generally created by
-// `generate_vocab_remapping`).  Even if no row remapping is needed, this must
-// still be an index-valued Tensor (e.g. [0, 1, 2, ...]), or a shifted
-// index-valued `Tensor` (e.g. [8, 9, 10, ...], for partitioned `Variables`).
-//	col_remapping: An int `Tensor` of column remappings (generally created by
-// `generate_vocab_remapping`).  May be a size-0 `Tensor` if only row remapping
-// is to be done (e.g. column ordering is the same).
-//	initializing_values: A float `Tensor` containing  values to fill in for cells
-// in the output matrix that are not loaded from the checkpoint. Length must be
-// exactly the same as the number of missing / new cells.
-//	num_rows: Number of rows (length of the 1st dimension) in the output matrix.
-//	num_cols: Number of columns (length of the 2nd dimension) in the output matrix.
-//
-// Returns Output matrix containing existing values loaded from the
-// checkpoint, and with any missing values filled in from initializing_values.
-func LoadAndRemapMatrix(scope *Scope, ckpt_path tf.Output, old_tensor_name tf.Output, row_remapping tf.Output, col_remapping tf.Output, initializing_values tf.Output, num_rows int64, num_cols int64, optional ...LoadAndRemapMatrixAttr) (output_matrix tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_rows": num_rows, "num_cols": num_cols}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadAndRemapMatrix",
-		Input: []tf.Input{
-			ckpt_path, old_tensor_name, row_remapping, col_remapping, initializing_values,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MapPeekAttr is an optional argument to MapPeek.
-type MapPeekAttr func(optionalAttr)
-
-// MapPeekCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func MapPeekCapacity(value int64) MapPeekAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// MapPeekMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func MapPeekMemoryLimit(value int64) MapPeekAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// MapPeekContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func MapPeekContainer(value string) MapPeekAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// MapPeekSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func MapPeekSharedName(value string) MapPeekAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op peeks at the values at the specified key.  If the
-//
-// underlying container does not contain this key
-// this op will block until it does.
-func MapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapPeekAttr) (values []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MapPeek",
-		Input: []tf.Input{
-			key, indices,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
-		scope.UpdateErr("MapPeek", err)
-		return
-	}
-	return values
-}
-
-// MapStageAttr is an optional argument to MapStage.
-type MapStageAttr func(optionalAttr)
-
-// MapStageCapacity sets the optional capacity attribute to value.
-//
-// value: Maximum number of elements in the Staging Area. If > 0, inserts
-// on the container will block when the capacity is reached.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func MapStageCapacity(value int64) MapStageAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// MapStageMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func MapStageMemoryLimit(value int64) MapStageAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// MapStageContainer sets the optional container attribute to value.
-//
-// value: If non-empty, this queue is placed in the given container. Otherwise,
-// a default container is used.
-// If not specified, defaults to ""
-func MapStageContainer(value string) MapStageAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// MapStageSharedName sets the optional shared_name attribute to value.
-//
-// value: It is necessary to match this name to the matching Unstage Op.
-// If not specified, defaults to ""
-func MapStageSharedName(value string) MapStageAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Stage (key, values) in the underlying container which behaves like a hashtable.
-//
-// Arguments:
-//	key: int64
-//
-//	values: a list of tensors
-// dtypes A list of data types that inserted values should adhere to.
-//
-//
-// Returns the created operation.
-func MapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...MapStageAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MapStage",
-		Input: []tf.Input{
-			key, indices, tf.OutputList(values),
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// StageClearAttr is an optional argument to StageClear.
-type StageClearAttr func(optionalAttr)
-
-// StageClearCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func StageClearCapacity(value int64) StageClearAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// StageClearMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func StageClearMemoryLimit(value int64) StageClearAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// StageClearContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func StageClearContainer(value string) StageClearAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// StageClearSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func StageClearSharedName(value string) StageClearAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op removes all elements in the underlying container.
-//
-// Returns the created operation.
-func StageClear(scope *Scope, dtypes []tf.DataType, optional ...StageClearAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StageClear",
-
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
 // StageSizeAttr is an optional argument to StageSize.
 type StageSizeAttr func(optionalAttr)
 
@@ -5965,6 +6738,78 @@ func StageSize(scope *Scope, dtypes []tf.DataType, optional ...StageSizeAttr) (s
 	return op.Output(0)
 }
 
+// StagePeekAttr is an optional argument to StagePeek.
+type StagePeekAttr func(optionalAttr)
+
+// StagePeekCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func StagePeekCapacity(value int64) StagePeekAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// StagePeekMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func StagePeekMemoryLimit(value int64) StagePeekAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// StagePeekContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func StagePeekContainer(value string) StagePeekAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// StagePeekSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func StagePeekSharedName(value string) StagePeekAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op peeks at the values at the specified index.  If the
+//
+// underlying container does not contain sufficient elements
+// this op will block until it does.   This Op is optimized for
+// performance.
+func StagePeek(scope *Scope, index tf.Output, dtypes []tf.DataType, optional ...StagePeekAttr) (values []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StagePeek",
+		Input: []tf.Input{
+			index,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
+		scope.UpdateErr("StagePeek", err)
+		return
+	}
+	return values
+}
+
 // UnstageAttr is an optional argument to Unstage.
 type UnstageAttr func(optionalAttr)
 
@@ -6131,172 +6976,187 @@ func DeleteSessionTensor(scope *Scope, handle tf.Output) (o *tf.Operation) {
 	return scope.AddOperation(opspec)
 }
 
-// Get the value of the tensor specified by its handle.
+// Store the input tensor in the state of the current session.
 //
 // Arguments:
-//	handle: The handle for a tensor stored in the session state.
-//	dtype: The type of the output value.
+//	value: The tensor to be stored.
 //
-// Returns The tensor for the given handle.
-func GetSessionTensor(scope *Scope, handle tf.Output, dtype tf.DataType) (value tf.Output) {
+// Returns The handle for the tensor stored in the session state, represented
+// as a ResourceHandle object.
+func GetSessionHandleV2(scope *Scope, value tf.Output) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
 	opspec := tf.OpSpec{
-		Type: "GetSessionTensor",
+		Type: "GetSessionHandleV2",
 		Input: []tf.Input{
-			handle,
+			value,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ReverseSequenceAttr is an optional argument to ReverseSequence.
-type ReverseSequenceAttr func(optionalAttr)
-
-// ReverseSequenceBatchDim sets the optional batch_dim attribute to value.
+// Store the input tensor in the state of the current session.
 //
-// value: The dimension along which reversal is performed.
-// If not specified, defaults to 0
-func ReverseSequenceBatchDim(value int64) ReverseSequenceAttr {
-	return func(m optionalAttr) {
-		m["batch_dim"] = value
+// Arguments:
+//	value: The tensor to be stored.
+//
+// Returns The handle for the tensor stored in the session state, represented
+// as a string.
+func GetSessionHandle(scope *Scope, value tf.Output) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
+	opspec := tf.OpSpec{
+		Type: "GetSessionHandle",
+		Input: []tf.Input{
+			value,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Reverses variable length slices.
+// Copy a tensor setting everything outside a central band in each innermost matrix
 //
-// This op first slices `input` along the dimension `batch_dim`, and for each
-// slice `i`, reverses the first `seq_lengths[i]` elements along
-// the dimension `seq_dim`.
+// to zero.
 //
-// The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`,
-// and `seq_lengths` must be a vector of length `input.dims[batch_dim]`.
+// The `band` part is computed as follows:
+// Assume `input` has `k` dimensions `[I, J, K, ..., M, N]`, then the output is a
+// tensor with the same shape where
 //
-// The output slice `i` along dimension `batch_dim` is then given by input
-// slice `i`, with the first `seq_lengths[i]` slices along dimension
-// `seq_dim` reversed.
+// `band[i, j, k, ..., m, n] = in_band(m, n) * input[i, j, k, ..., m, n]`.
+//
+// The indicator function
+//
+// `in_band(m, n) = (num_lower < 0 || (m-n) <= num_lower)) &&
+//                  (num_upper < 0 || (n-m) <= num_upper)`.
 //
 // For example:
 //
 // ```
-// # Given this:
-// batch_dim = 0
-// seq_dim = 1
-// input.dims = (4, 8, ...)
-// seq_lengths = [7, 2, 3, 5]
+// # if 'input' is [[ 0,  1,  2, 3]
+//                  [-1,  0,  1, 2]
+//                  [-2, -1,  0, 1]
+//                  [-3, -2, -1, 0]],
 //
-// # then slices of input are reversed on seq_dim, but only up to seq_lengths:
-// output[0, 0:7, :, ...] = input[0, 7:0:-1, :, ...]
-// output[1, 0:2, :, ...] = input[1, 2:0:-1, :, ...]
-// output[2, 0:3, :, ...] = input[2, 3:0:-1, :, ...]
-// output[3, 0:5, :, ...] = input[3, 5:0:-1, :, ...]
+// tf.matrix_band_part(input, 1, -1) ==> [[ 0,  1,  2, 3]
+//                                        [-1,  0,  1, 2]
+//                                        [ 0, -1,  0, 1]
+//                                        [ 0,  0, -1, 0]],
 //
-// # while entries past seq_lens are copied through:
-// output[0, 7:, :, ...] = input[0, 7:, :, ...]
-// output[1, 2:, :, ...] = input[1, 2:, :, ...]
-// output[2, 3:, :, ...] = input[2, 3:, :, ...]
-// output[3, 2:, :, ...] = input[3, 2:, :, ...]
+// tf.matrix_band_part(input, 2, 1) ==> [[ 0,  1,  0, 0]
+//                                       [-1,  0,  1, 0]
+//                                       [-2, -1,  0, 1]
+//                                       [ 0, -2, -1, 0]]
 // ```
 //
-// In contrast, if:
+// Useful special cases:
 //
 // ```
-// # Given this:
-// batch_dim = 2
-// seq_dim = 0
-// input.dims = (8, ?, 4, ...)
-// seq_lengths = [7, 2, 3, 5]
-//
-// # then slices of input are reversed on seq_dim, but only up to seq_lengths:
-// output[0:7, :, 0, :, ...] = input[7:0:-1, :, 0, :, ...]
-// output[0:2, :, 1, :, ...] = input[2:0:-1, :, 1, :, ...]
-// output[0:3, :, 2, :, ...] = input[3:0:-1, :, 2, :, ...]
-// output[0:5, :, 3, :, ...] = input[5:0:-1, :, 3, :, ...]
-//
-// # while entries past seq_lens are copied through:
-// output[7:, :, 0, :, ...] = input[7:, :, 0, :, ...]
-// output[2:, :, 1, :, ...] = input[2:, :, 1, :, ...]
-// output[3:, :, 2, :, ...] = input[3:, :, 2, :, ...]
-// output[2:, :, 3, :, ...] = input[2:, :, 3, :, ...]
+//  tf.matrix_band_part(input, 0, -1) ==> Upper triangular part.
+//  tf.matrix_band_part(input, -1, 0) ==> Lower triangular part.
+//  tf.matrix_band_part(input, 0, 0) ==> Diagonal.
 // ```
 //
 // Arguments:
-//	input: The input to reverse.
-//	seq_lengths: 1-D with length `input.dims(batch_dim)` and
-// `max(seq_lengths) <= input.dims(seq_dim)`
-//	seq_dim: The dimension which is partially reversed.
+//	input: Rank `k` tensor.
+//	num_lower: 0-D tensor. Number of subdiagonals to keep. If negative, keep entire
+// lower triangle.
+//	num_upper: 0-D tensor. Number of superdiagonals to keep. If negative, keep
+// entire upper triangle.
 //
-// Returns The partially reversed input. It has the same shape as `input`.
-func ReverseSequence(scope *Scope, input tf.Output, seq_lengths tf.Output, seq_dim int64, optional ...ReverseSequenceAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"seq_dim": seq_dim}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ReverseSequence",
-		Input: []tf.Input{
-			input, seq_lengths,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Deprecated. Use TensorArraySizeV3
-//
-// DEPRECATED at GraphDef version 26: Use TensorArraySizeV3
-func TensorArraySizeV2(scope *Scope, handle tf.Output, flow_in tf.Output) (size tf.Output) {
+// Returns Rank `k` tensor of the same shape as input. The extracted banded tensor.
+func MatrixBandPart(scope *Scope, input tf.Output, num_lower tf.Output, num_upper tf.Output) (band tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorArraySizeV2",
+		Type: "MatrixBandPart",
 		Input: []tf.Input{
-			handle, flow_in,
+			input, num_lower, num_upper,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// TensorArrayGatherV2Attr is an optional argument to TensorArrayGatherV2.
-type TensorArrayGatherV2Attr func(optionalAttr)
+// ListDiffAttr is an optional argument to ListDiff.
+type ListDiffAttr func(optionalAttr)
 
-// TensorArrayGatherV2ElementShape sets the optional element_shape attribute to value.
-// If not specified, defaults to <unknown_rank:true >
-func TensorArrayGatherV2ElementShape(value tf.Shape) TensorArrayGatherV2Attr {
+// ListDiffOutIdx sets the optional out_idx attribute to value.
+// If not specified, defaults to DT_INT32
+func ListDiffOutIdx(value tf.DataType) ListDiffAttr {
 	return func(m optionalAttr) {
-		m["element_shape"] = value
+		m["out_idx"] = value
 	}
 }
 
-// Deprecated. Use TensorArrayGatherV3
+// Computes the difference between two lists of numbers or strings.
 //
-// DEPRECATED at GraphDef version 26: Use TensorArrayGatherV3
-func TensorArrayGatherV2(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV2Attr) (value tf.Output) {
+// Given a list `x` and a list `y`, this operation returns a list `out` that
+// represents all values that are in `x` but not in `y`. The returned list `out`
+// is sorted in the same order that the numbers appear in `x` (duplicates are
+// preserved). This operation also returns a list `idx` that represents the
+// position of each `out` element in `x`. In other words:
+//
+// `out[i] = x[idx[i]] for i in [0, 1, ..., len(out) - 1]`
+//
+// For example, given this input:
+//
+// ```
+// x = [1, 2, 3, 4, 5, 6]
+// y = [1, 3, 5]
+// ```
+//
+// This operation would return:
+//
+// ```
+// out ==> [2, 4, 6]
+// idx ==> [1, 3, 5]
+// ```
+//
+// Arguments:
+//	x: 1-D. Values to keep.
+//	y: 1-D. Values to remove.
+//
+// Returns:
+//	out: 1-D. Values present in `x` but not in `y`.
+//	idx: 1-D. Positions of `x` values preserved in `out`.
+func ListDiff(scope *Scope, x tf.Output, y tf.Output, optional ...ListDiffAttr) (out tf.Output, idx tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dtype": dtype}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayGatherV2",
+		Type: "ListDiff",
 		Input: []tf.Input{
-			handle, indices, flow_in,
+			x, y,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Deprecated. Use TensorArrayScatterV3
+//
+// DEPRECATED at GraphDef version 26: Use TensorArrayScatterV3
+func TensorArrayScatterV2(scope *Scope, handle tf.Output, indices tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayScatterV2",
+		Input: []tf.Input{
+			handle, indices, value, flow_in,
+		},
+	}
+	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
@@ -6321,21 +7181,203 @@ func TensorArrayReadV2(scope *Scope, handle tf.Output, index tf.Output, flow_in
 
 // Deprecated. Use TensorArrayGradV3
 //
-// DEPRECATED at GraphDef version 26: Use TensorArrayWriteV3
-func TensorArrayWriteV2(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
+// DEPRECATED at GraphDef version 26: Use TensorArrayGradV3
+func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"source": source}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayGradV2",
+		Input: []tf.Input{
+			handle, flow_in,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// TensorArrayV2Attr is an optional argument to TensorArrayV2.
+type TensorArrayV2Attr func(optionalAttr)
+
+// TensorArrayV2ElementShape sets the optional element_shape attribute to value.
+// If not specified, defaults to {unknown_rank:true}
+func TensorArrayV2ElementShape(value tf.Shape) TensorArrayV2Attr {
+	return func(m optionalAttr) {
+		m["element_shape"] = value
+	}
+}
+
+// TensorArrayV2DynamicSize sets the optional dynamic_size attribute to value.
+// If not specified, defaults to false
+func TensorArrayV2DynamicSize(value bool) TensorArrayV2Attr {
+	return func(m optionalAttr) {
+		m["dynamic_size"] = value
+	}
+}
+
+// TensorArrayV2ClearAfterRead sets the optional clear_after_read attribute to value.
+// If not specified, defaults to true
+func TensorArrayV2ClearAfterRead(value bool) TensorArrayV2Attr {
+	return func(m optionalAttr) {
+		m["clear_after_read"] = value
+	}
+}
+
+// TensorArrayV2TensorArrayName sets the optional tensor_array_name attribute to value.
+// If not specified, defaults to ""
+func TensorArrayV2TensorArrayName(value string) TensorArrayV2Attr {
+	return func(m optionalAttr) {
+		m["tensor_array_name"] = value
+	}
+}
+
+// Deprecated. Use TensorArrayV3
+//
+// DEPRECATED at GraphDef version 26: Use TensorArrayV3
+func TensorArrayV2(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV2Attr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayV2",
+		Input: []tf.Input{
+			size,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Get the current size of the TensorArray.
+//
+// Arguments:
+//	handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad).
+//	flow_in: A float scalar that enforces proper chaining of operations.
+//
+// Returns The current size of the TensorArray.
+func TensorArraySizeV3(scope *Scope, handle tf.Output, flow_in tf.Output) (size tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayWriteV2",
+		Type: "TensorArraySizeV3",
 		Input: []tf.Input{
-			handle, index, value, flow_in,
+			handle, flow_in,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
+// Split the data from the input value into TensorArray elements.
+//
+// Assuming that `lengths` takes on values
+//
+//   ```(n0, n1, ..., n(T-1))```
+//
+// and that `value` has shape
+//
+//   ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```,
+//
+// this splits values into a TensorArray with T tensors.
+//
+// TensorArray index t will be the subtensor of values with starting position
+//
+//   ```(n0 + n1 + ... + n(t-1), 0, 0, ...)```
+//
+// and having size
+//
+//   ```nt x d0 x d1 x ...```
+//
+// Arguments:
+//	handle: The handle to a TensorArray.
+//	value: The concatenated tensor to write to the TensorArray.
+//	lengths: The vector of lengths, how to split the rows of value into the
+// TensorArray.
+//	flow_in: A float scalar that enforces proper chaining of operations.
+//
+// Returns A float scalar that enforces proper chaining of operations.
+func TensorArraySplitV3(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorArraySplitV3",
+		Input: []tf.Input{
+			handle, value, lengths, flow_in,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// TensorArrayConcatV3Attr is an optional argument to TensorArrayConcatV3.
+type TensorArrayConcatV3Attr func(optionalAttr)
+
+// TensorArrayConcatV3ElementShapeExcept0 sets the optional element_shape_except0 attribute to value.
+//
+// value: The expected shape of an element, if known,
+// excluding the first dimension. Used to validate the shapes of
+// TensorArray elements. If this shape is not fully specified, concatenating
+// zero-size TensorArrays is an error.
+// If not specified, defaults to {unknown_rank:true}
+func TensorArrayConcatV3ElementShapeExcept0(value tf.Shape) TensorArrayConcatV3Attr {
+	return func(m optionalAttr) {
+		m["element_shape_except0"] = value
+	}
+}
+
+// Concat the elements from the TensorArray into value `value`.
+//
+// Takes `T` elements of shapes
+//
+//   ```
+//   (n0 x d0 x d1 x ...), (n1 x d0 x d1 x ...), ..., (n(T-1) x d0 x d1 x ...)
+//   ```
+//
+// and concatenates them into a Tensor of shape:
+//
+//   ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```
+//
+// All elements must have the same shape (excepting the first dimension).
+//
+// Arguments:
+//	handle: The handle to a TensorArray.
+//	flow_in: A float scalar that enforces proper chaining of operations.
+//	dtype: The type of the elem that is returned.
+//
+// Returns:
+//	value: All of the elements in the TensorArray, concatenated along the first
+// axis.
+//	lengths: A vector of the row sizes of the original T elements in the
+// value output.  In the example above, this would be the values:
+// `(n1, n2, ..., n(T-1))`.
+func TensorArrayConcatV3(scope *Scope, handle tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayConcatV3Attr) (value tf.Output, lengths tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayConcatV3",
+		Input: []tf.Input{
+			handle, flow_in,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
 // TensorArrayGatherV3Attr is an optional argument to TensorArrayGatherV3.
 type TensorArrayGatherV3Attr func(optionalAttr)
 
@@ -6344,7 +7386,7 @@ type TensorArrayGatherV3Attr func(optionalAttr)
 // value: The expected shape of an element, if known. Used to
 // validate the shapes of TensorArray elements. If this shape is not
 // fully specified, gathering zero-size TensorArrays is an error.
-// If not specified, defaults to <unknown_rank:true >
+// If not specified, defaults to {unknown_rank:true}
 func TensorArrayGatherV3ElementShape(value tf.Shape) TensorArrayGatherV3Attr {
 	return func(m optionalAttr) {
 		m["element_shape"] = value
@@ -6382,77 +7424,61 @@ func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow
 	return op.Output(0)
 }
 
-// LogUniformCandidateSamplerAttr is an optional argument to LogUniformCandidateSampler.
-type LogUniformCandidateSamplerAttr func(optionalAttr)
+// GatherAttr is an optional argument to Gather.
+type GatherAttr func(optionalAttr)
 
-// LogUniformCandidateSamplerSeed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func LogUniformCandidateSamplerSeed(value int64) LogUniformCandidateSamplerAttr {
+// GatherValidateIndices sets the optional validate_indices attribute to value.
+// If not specified, defaults to true
+func GatherValidateIndices(value bool) GatherAttr {
 	return func(m optionalAttr) {
-		m["seed"] = value
+		m["validate_indices"] = value
 	}
 }
 
-// LogUniformCandidateSamplerSeed2 sets the optional seed2 attribute to value.
+// Gather slices from `params` according to `indices`.
 //
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func LogUniformCandidateSamplerSeed2(value int64) LogUniformCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Generates labels for candidate sampling with a log-uniform distribution.
+// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
+// Produces an output tensor with shape `indices.shape + params.shape[1:]` where:
 //
-// See explanations of candidate sampling and the data formats at
-// go/candidate-sampling.
+// ```python
+//     # Scalar indices
+//     output[:, ..., :] = params[indices, :, ... :]
 //
-// For each batch, this op picks a single set of sampled candidate labels.
+//     # Vector indices
+//     output[i, :, ..., :] = params[indices[i], :, ... :]
 //
-// The advantages of sampling candidates per-batch are simplicity and the
-// possibility of efficient dense matrix multiplication. The disadvantage is that
-// the sampled candidates must be chosen independently of the context and of the
-// true labels.
+//     # Higher rank indices
+//     output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :]
+// ```
 //
-// Arguments:
-//	true_classes: A batch_size * num_true matrix, in which each row contains the
-// IDs of the num_true target_classes in the corresponding original label.
-//	num_true: Number of true labels per context.
-//	num_sampled: Number of candidates to randomly sample.
-//	unique: If unique is true, we sample with rejection, so that all sampled
-// candidates in a batch are unique. This requires some approximation to
-// estimate the post-rejection sampling probabilities.
-//	range_max: The sampler will sample integers from the interval [0, range_max).
+// If `indices` is a permutation and `len(indices) == params.shape[0]` then
+// this operation will permute `params` accordingly.
 //
-// Returns A vector of length num_sampled, in which each element is
-// the ID of a sampled candidate.A batch_size * num_true matrix, representing
-// the number of times each candidate is expected to occur in a batch
-// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
-// candidate representing the number of times the candidate is expected
-// to occur in a batch of sampled candidates.  If unique=true, then this is a
-// probability.
-func LogUniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LogUniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
+// `validate_indices`: DEPRECATED. If this operation is assigned to CPU, values in
+// `indices` are always validated to be within range. If assigned to GPU,
+// out-of-bound indices result in safe but unspecified behavior, which may include
+// raising an error.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/Gather.png" alt>
+// </div>
+func Gather(scope *Scope, params tf.Output, indices tf.Output, optional ...GatherAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "LogUniformCandidateSampler",
+		Type: "Gather",
 		Input: []tf.Input{
-			true_classes,
+			params, indices,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
 // Read an element from the TensorArray into output `value`.
@@ -6534,184 +7560,23 @@ func TensorArrayGradWithShape(scope *Scope, handle tf.Output, flow_in tf.Output,
 	return op.Output(0), op.Output(1)
 }
 
-// Deprecated. Use TensorArrayGradV3
-//
-// DEPRECATED at GraphDef version 26: Use TensorArrayGradV3
-func TensorArrayGradV2(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"source": source}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayGradV2",
-		Input: []tf.Input{
-			handle, flow_in,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a TensorArray for storing the gradients of values in the given handle.
-//
-// If the given TensorArray gradient already exists, returns a reference to it.
-//
-// Locks the size of the original TensorArray by disabling its dynamic size flag.
-//
-// **A note about the input flow_in:**
-//
-// The handle flow_in forces the execution of the gradient lookup to occur
-// only after certain other operations have occurred.  For example, when
-// the forward TensorArray is dynamically sized, writes to this TensorArray
-// may resize the object.  The gradient TensorArray is statically sized based
-// on the size of the forward TensorArray when this operation executes.
-// Furthermore, the size of the forward TensorArray is frozen by this call.
-// As a result, the flow is used to ensure that the call to generate the gradient
-// TensorArray only happens after all writes are executed.
-//
-// In the case of dynamically sized TensorArrays, gradient computation should
-// only be performed on read operations that have themselves been chained via
-// flow to occur only after all writes have executed. That way the final size
-// of the forward TensorArray is known when this operation is called.
-//
-// **A note about the source attribute:**
-//
-// TensorArray gradient calls use an accumulator TensorArray object.  If
-// multiple gradients are calculated and run in the same session, the multiple
-// gradient nodes may accidentally flow through the same accumulator TensorArray.
-// This double counts and generally breaks the TensorArray gradient flow.
-//
-// The solution is to identify which gradient call this particular
-// TensorArray gradient is being called in.  This is performed by identifying
-// a unique string (e.g. "gradients", "gradients_1", ...) from the input
-// gradient Tensor's name.  This string is used as a suffix when creating
-// the TensorArray gradient object here (the attribute `source`).
-//
-// The attribute `source` is added as a suffix to the forward TensorArray's
-// name when performing the creation / lookup, so that each separate gradient
-// calculation gets its own TensorArray accumulator.
+// Delete the stack from its resource container.
 //
 // Arguments:
-//	handle: The handle to the forward TensorArray.
-//	flow_in: A float scalar that enforces proper chaining of operations.
-//	source: The gradient source string, used to decide which gradient TensorArray
-// to return.
-func TensorArrayGradV3(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output, flow_out tf.Output) {
+//	handle: The handle to a stack.
+//
+// Returns the created operation.
+func StackCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"source": source}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayGradV3",
+		Type: "StackCloseV2",
 		Input: []tf.Input{
-			handle, flow_in,
+			handle,
 		},
-		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// CudnnRNNParamsSizeAttr is an optional argument to CudnnRNNParamsSize.
-type CudnnRNNParamsSizeAttr func(optionalAttr)
-
-// CudnnRNNParamsSizeRnnMode sets the optional rnn_mode attribute to value.
-// If not specified, defaults to "lstm"
-func CudnnRNNParamsSizeRnnMode(value string) CudnnRNNParamsSizeAttr {
-	return func(m optionalAttr) {
-		m["rnn_mode"] = value
-	}
-}
-
-// CudnnRNNParamsSizeInputMode sets the optional input_mode attribute to value.
-// If not specified, defaults to "linear_input"
-func CudnnRNNParamsSizeInputMode(value string) CudnnRNNParamsSizeAttr {
-	return func(m optionalAttr) {
-		m["input_mode"] = value
-	}
-}
-
-// CudnnRNNParamsSizeDirection sets the optional direction attribute to value.
-// If not specified, defaults to "unidirectional"
-func CudnnRNNParamsSizeDirection(value string) CudnnRNNParamsSizeAttr {
-	return func(m optionalAttr) {
-		m["direction"] = value
-	}
-}
-
-// CudnnRNNParamsSizeDropout sets the optional dropout attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNParamsSizeDropout(value float32) CudnnRNNParamsSizeAttr {
-	return func(m optionalAttr) {
-		m["dropout"] = value
-	}
-}
-
-// CudnnRNNParamsSizeSeed sets the optional seed attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNParamsSizeSeed(value int64) CudnnRNNParamsSizeAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// CudnnRNNParamsSizeSeed2 sets the optional seed2 attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNParamsSizeSeed2(value int64) CudnnRNNParamsSizeAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// CudnnRNNParamsSizeNumProj sets the optional num_proj attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNParamsSizeNumProj(value int64) CudnnRNNParamsSizeAttr {
-	return func(m optionalAttr) {
-		m["num_proj"] = value
-	}
-}
-
-// Computes size of weights that can be used by a Cudnn RNN model.
-//
-// Return the params size that can be used by the Cudnn RNN model. Subsequent
-// weight allocation and initialization should use this size.
-//
-// num_layers: Specifies the number of layers in the RNN model.
-// num_units: Specifies the size of the hidden state.
-// input_size: Specifies the size of the input state.
-// rnn_mode: Indicates the type of the RNN model.
-// input_mode: Indicate whether there is a linear projection between the input and
-//   The actual computation before the first layer. 'skip_input' is only allowed
-//   when input_size == num_units; 'auto_select' implies 'skip_input' when
-//   input_size == num_units; otherwise, it implies 'linear_input'.
-// direction: Indicates whether a bidirectional model will be used.
-//   dir = (direction == bidirectional) ? 2 : 1
-// dropout: dropout probability. When set to 0., dropout is disabled.
-// seed: the 1st part of a seed to initialize dropout.
-// seed2: the 2nd part of a seed to initialize dropout.
-// params_size: The size of the params buffer that should be allocated and
-//   initialized for this RNN model. Note that this params buffer may not be
-//   compatible across GPUs. Please use CudnnRNNParamsWeights and
-//   CudnnRNNParamsBiases to save and restore them in a way that is compatible
-//   across different runs.
-func CudnnRNNParamsSize(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, T tf.DataType, S tf.DataType, optional ...CudnnRNNParamsSizeAttr) (params_size tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"T": T, "S": S}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CudnnRNNParamsSize",
-		Input: []tf.Input{
-			num_layers, num_units, input_size,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
 // Pop the element at the top of the stack.
@@ -6737,48 +7602,6 @@ func StackPopV2(scope *Scope, handle tf.Output, elem_type tf.DataType) (elem tf.
 	return op.Output(0)
 }
 
-// Split the data from the input value into TensorArray elements.
-//
-// Assuming that `lengths` takes on values
-//
-//   ```(n0, n1, ..., n(T-1))```
-//
-// and that `value` has shape
-//
-//   ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```,
-//
-// this splits values into a TensorArray with T tensors.
-//
-// TensorArray index t will be the subtensor of values with starting position
-//
-//   ```(n0 + n1 + ... + n(t-1), 0, 0, ...)```
-//
-// and having size
-//
-//   ```nt x d0 x d1 x ...```
-//
-// Arguments:
-//	handle: The handle to a TensorArray.
-//	value: The concatenated tensor to write to the TensorArray.
-//	lengths: The vector of lengths, how to split the rows of value into the
-// TensorArray.
-//	flow_in: A float scalar that enforces proper chaining of operations.
-//
-// Returns A float scalar that enforces proper chaining of operations.
-func TensorArraySplitV3(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArraySplitV3",
-		Input: []tf.Input{
-			handle, value, lengths, flow_in,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // StackPushV2Attr is an optional argument to StackPushV2.
 type StackPushV2Attr func(optionalAttr)
 
@@ -6859,6 +7682,32 @@ func StackV2(scope *Scope, max_size tf.Output, elem_type tf.DataType, optional .
 	return op.Output(0)
 }
 
+// Checks a tensor for NaN, -Inf and +Inf values.
+//
+// When run, reports an `InvalidArgument` error if `tensor` has any values
+// that are not a number (NaN) or infinity (Inf). Otherwise, passes `tensor` as-is.
+// Unlike CheckNumerics (V1), CheckNumericsV2 distinguishes -Inf and +Inf in the
+// errors it throws.
+//
+// Arguments:
+//
+//	message: Prefix of the error message.
+func CheckNumericsV2(scope *Scope, tensor tf.Output, message string) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"message": message}
+	opspec := tf.OpSpec{
+		Type: "CheckNumericsV2",
+		Input: []tf.Input{
+			tensor,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Applies a gradient to a given accumulator.
 //
 // Does not add if local_step is lesser than the accumulator's global_step.
@@ -6882,72 +7731,6 @@ func ResourceAccumulatorApplyGradient(scope *Scope, handle tf.Output, local_step
 	return scope.AddOperation(opspec)
 }
 
-// ResourceConditionalAccumulatorAttr is an optional argument to ResourceConditionalAccumulator.
-type ResourceConditionalAccumulatorAttr func(optionalAttr)
-
-// ResourceConditionalAccumulatorContainer sets the optional container attribute to value.
-//
-// value: If non-empty, this accumulator is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func ResourceConditionalAccumulatorContainer(value string) ResourceConditionalAccumulatorAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// ResourceConditionalAccumulatorSharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this accumulator will be shared under the
-// given name across multiple sessions.
-// If not specified, defaults to ""
-func ResourceConditionalAccumulatorSharedName(value string) ResourceConditionalAccumulatorAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// ResourceConditionalAccumulatorReductionType sets the optional reduction_type attribute to value.
-// If not specified, defaults to "MEAN"
-func ResourceConditionalAccumulatorReductionType(value string) ResourceConditionalAccumulatorAttr {
-	return func(m optionalAttr) {
-		m["reduction_type"] = value
-	}
-}
-
-// A conditional accumulator for aggregating gradients.
-//
-// The accumulator accepts gradients marked with local_step greater or
-// equal to the most recent global_step known to the accumulator. The
-// average can be extracted from the accumulator, provided sufficient
-// gradients have been accumulated. Extracting the average automatically
-// resets the aggregate to 0, and increments the global_step recorded by
-// the accumulator.
-// This is a resource version of ConditionalAccumulator that will work in TF2.0
-// with tf.cond version 2.
-//
-// Arguments:
-//	dtype: The type of the value being accumulated.
-//	shape: The shape of the values, can be [], in which case shape is unknown.
-//
-// Returns The handle to the accumulator.
-func ResourceConditionalAccumulator(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...ResourceConditionalAccumulatorAttr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype, "shape": shape}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceConditionalAccumulator",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Updates the accumulator with a new value for global_step.
 //
 // Logs warning if the accumulator's value is already higher than
@@ -6991,74 +7774,40 @@ func QueueSizeV2(scope *Scope, handle tf.Output) (size tf.Output) {
 	return op.Output(0)
 }
 
-// Returns true if queue is closed.
-//
-// This operation returns true if the queue is closed and false if the queue
-// is open.
-//
-// Arguments:
-//	handle: The handle to a queue.
-func QueueIsClosedV2(scope *Scope, handle tf.Output) (is_closed tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "QueueIsClosedV2",
-		Input: []tf.Input{
-			handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
+// QueueEnqueueManyV2Attr is an optional argument to QueueEnqueueManyV2.
+type QueueEnqueueManyV2Attr func(optionalAttr)
 
-// Checks whether a tree ensemble has been initialized.
+// QueueEnqueueManyV2TimeoutMs sets the optional timeout_ms attribute to value.
 //
-// Arguments:
-//	tree_ensemble_handle: Handle to the tree ensemble resouce.
-//
-// Returns output boolean on whether it is initialized or not.
-func IsBoostedTreesEnsembleInitialized(scope *Scope, tree_ensemble_handle tf.Output) (is_initialized tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "IsBoostedTreesEnsembleInitialized",
-		Input: []tf.Input{
-			tree_ensemble_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QueueCloseV2Attr is an optional argument to QueueCloseV2.
-type QueueCloseV2Attr func(optionalAttr)
-
-// QueueCloseV2CancelPendingEnqueues sets the optional cancel_pending_enqueues attribute to value.
-//
-// value: If true, all pending enqueue requests that are
-// blocked on the given queue will be canceled.
-// If not specified, defaults to false
-func QueueCloseV2CancelPendingEnqueues(value bool) QueueCloseV2Attr {
+// value: If the queue is too full, this operation will block for up
+// to timeout_ms milliseconds.
+// Note: This option is not supported yet.
+// If not specified, defaults to -1
+func QueueEnqueueManyV2TimeoutMs(value int64) QueueEnqueueManyV2Attr {
 	return func(m optionalAttr) {
-		m["cancel_pending_enqueues"] = value
+		m["timeout_ms"] = value
 	}
 }
 
-// Closes the given queue.
+// Enqueues zero or more tuples of one or more tensors in the given queue.
 //
-// This operation signals that no more elements will be enqueued in the
-// given queue. Subsequent Enqueue(Many) operations will fail.
-// Subsequent Dequeue(Many) operations will continue to succeed if
-// sufficient elements remain in the queue. Subsequent Dequeue(Many)
-// operations that would block will fail immediately.
+// This operation slices each component tensor along the 0th dimension to
+// make multiple queue elements. All of the tuple components must have the
+// same size in the 0th dimension.
+//
+// The components input has k elements, which correspond to the components of
+// tuples stored in the given queue.
+//
+// N.B. If the queue is full, this operation will block until the given
+// elements have been enqueued (or 'timeout_ms' elapses, if specified).
 //
 // Arguments:
 //	handle: The handle to a queue.
+//	components: One or more tensors from which the enqueued tensors should
+// be taken.
 //
 // Returns the created operation.
-func QueueCloseV2(scope *Scope, handle tf.Output, optional ...QueueCloseV2Attr) (o *tf.Operation) {
+func QueueEnqueueManyV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueManyV2Attr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -7067,204 +7816,59 @@ func QueueCloseV2(scope *Scope, handle tf.Output, optional ...QueueCloseV2Attr)
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "QueueCloseV2",
+		Type: "QueueEnqueueManyV2",
 		Input: []tf.Input{
-			handle,
+			handle, tf.OutputList(components),
 		},
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// QueueDequeueUpToV2Attr is an optional argument to QueueDequeueUpToV2.
-type QueueDequeueUpToV2Attr func(optionalAttr)
+// QueueEnqueueV2Attr is an optional argument to QueueEnqueueV2.
+type QueueEnqueueV2Attr func(optionalAttr)
 
-// QueueDequeueUpToV2TimeoutMs sets the optional timeout_ms attribute to value.
+// QueueEnqueueV2TimeoutMs sets the optional timeout_ms attribute to value.
 //
-// value: If the queue has fewer than n elements, this operation
-// will block for up to timeout_ms milliseconds.
-// Note: This option is not supported yet.
-// If not specified, defaults to -1
-func QueueDequeueUpToV2TimeoutMs(value int64) QueueDequeueUpToV2Attr {
-	return func(m optionalAttr) {
-		m["timeout_ms"] = value
-	}
-}
-
-// Dequeues `n` tuples of one or more tensors from the given queue.
-//
-// This operation is not supported by all queues.  If a queue does not support
-// DequeueUpTo, then an Unimplemented error is returned.
-//
-// If the queue is closed and there are more than 0 but less than `n`
-// elements remaining, then instead of returning an OutOfRange error like
-// QueueDequeueMany, less than `n` elements are returned immediately.  If
-// the queue is closed and there are 0 elements left in the queue, then
-// an OutOfRange error is returned just like in QueueDequeueMany.
-// Otherwise the behavior is identical to QueueDequeueMany:
-//
-// This operation concatenates queue-element component tensors along the
-// 0th dimension to make a single component tensor.  All of the components
-// in the dequeued tuple will have size n in the 0th dimension.
-//
-// This operation has `k` outputs, where `k` is the number of components in
-// the tuples stored in the given queue, and output `i` is the ith
-// component of the dequeued tuple.
-//
-// Arguments:
-//	handle: The handle to a queue.
-//	n: The number of tuples to dequeue.
-//	component_types: The type of each component in a tuple.
-//
-// Returns One or more tensors that were dequeued as a tuple.
-func QueueDequeueUpToV2(scope *Scope, handle tf.Output, n tf.Output, component_types []tf.DataType, optional ...QueueDequeueUpToV2Attr) (components []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"component_types": component_types}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QueueDequeueUpToV2",
-		Input: []tf.Input{
-			handle, n,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
-		scope.UpdateErr("QueueDequeueUpToV2", err)
-		return
-	}
-	return components
-}
-
-// QueueDequeueManyV2Attr is an optional argument to QueueDequeueManyV2.
-type QueueDequeueManyV2Attr func(optionalAttr)
-
-// QueueDequeueManyV2TimeoutMs sets the optional timeout_ms attribute to value.
-//
-// value: If the queue has fewer than n elements, this operation
-// will block for up to timeout_ms milliseconds.
-// Note: This option is not supported yet.
-// If not specified, defaults to -1
-func QueueDequeueManyV2TimeoutMs(value int64) QueueDequeueManyV2Attr {
-	return func(m optionalAttr) {
-		m["timeout_ms"] = value
-	}
-}
-
-// Dequeues `n` tuples of one or more tensors from the given queue.
-//
-// If the queue is closed and there are fewer than `n` elements, then an
-// OutOfRange error is returned.
-//
-// This operation concatenates queue-element component tensors along the
-// 0th dimension to make a single component tensor.  All of the components
-// in the dequeued tuple will have size `n` in the 0th dimension.
-//
-// This operation has `k` outputs, where `k` is the number of components in
-// the tuples stored in the given queue, and output `i` is the ith
-// component of the dequeued tuple.
-//
-// N.B. If the queue is empty, this operation will block until `n` elements
-// have been dequeued (or 'timeout_ms' elapses, if specified).
-//
-// Arguments:
-//	handle: The handle to a queue.
-//	n: The number of tuples to dequeue.
-//	component_types: The type of each component in a tuple.
-//
-// Returns One or more tensors that were dequeued as a tuple.
-func QueueDequeueManyV2(scope *Scope, handle tf.Output, n tf.Output, component_types []tf.DataType, optional ...QueueDequeueManyV2Attr) (components []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"component_types": component_types}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QueueDequeueManyV2",
-		Input: []tf.Input{
-			handle, n,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
-		scope.UpdateErr("QueueDequeueManyV2", err)
-		return
-	}
-	return components
-}
-
-// QueueDequeueV2Attr is an optional argument to QueueDequeueV2.
-type QueueDequeueV2Attr func(optionalAttr)
-
-// QueueDequeueV2TimeoutMs sets the optional timeout_ms attribute to value.
-//
-// value: If the queue is empty, this operation will block for up to
+// value: If the queue is full, this operation will block for up to
 // timeout_ms milliseconds.
 // Note: This option is not supported yet.
 // If not specified, defaults to -1
-func QueueDequeueV2TimeoutMs(value int64) QueueDequeueV2Attr {
+func QueueEnqueueV2TimeoutMs(value int64) QueueEnqueueV2Attr {
 	return func(m optionalAttr) {
 		m["timeout_ms"] = value
 	}
 }
 
-// Dequeues a tuple of one or more tensors from the given queue.
+// Enqueues a tuple of one or more tensors in the given queue.
 //
-// This operation has k outputs, where k is the number of components
-// in the tuples stored in the given queue, and output i is the ith
-// component of the dequeued tuple.
+// The components input has k elements, which correspond to the components of
+// tuples stored in the given queue.
 //
-// N.B. If the queue is empty, this operation will block until an element
-// has been dequeued (or 'timeout_ms' elapses, if specified).
+// N.B. If the queue is full, this operation will block until the given
+// element has been enqueued (or 'timeout_ms' elapses, if specified).
 //
 // Arguments:
 //	handle: The handle to a queue.
-//	component_types: The type of each component in a tuple.
+//	components: One or more tensors from which the enqueued tensors should be taken.
 //
-// Returns One or more tensors that were dequeued as a tuple.
-func QueueDequeueV2(scope *Scope, handle tf.Output, component_types []tf.DataType, optional ...QueueDequeueV2Attr) (components []tf.Output) {
+// Returns the created operation.
+func QueueEnqueueV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueV2Attr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"component_types": component_types}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "QueueDequeueV2",
+		Type: "QueueEnqueueV2",
 		Input: []tf.Input{
-			handle,
+			handle, tf.OutputList(components),
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
-		scope.UpdateErr("QueueDequeueV2", err)
-		return
-	}
-	return components
+	return scope.AddOperation(opspec)
 }
 
 // PriorityQueueV2Attr is an optional argument to PriorityQueueV2.
@@ -7273,7 +7877,7 @@ type PriorityQueueV2Attr func(optionalAttr)
 // PriorityQueueV2ComponentTypes sets the optional component_types attribute to value.
 //
 // value: The type of each component in a value.
-// If not specified, defaults to <>
+// If not specified, defaults to {}
 //
 // REQUIRES: len(value) >= 0
 func PriorityQueueV2ComponentTypes(value []tf.DataType) PriorityQueueV2Attr {
@@ -7347,73 +7951,1356 @@ func PriorityQueueV2(scope *Scope, shapes []tf.Shape, optional ...PriorityQueueV
 	return op.Output(0)
 }
 
-// FIFOQueueV2Attr is an optional argument to FIFOQueueV2.
-type FIFOQueueV2Attr func(optionalAttr)
-
-// FIFOQueueV2Shapes sets the optional shapes attribute to value.
+// Does nothing. Serves as a control trigger for scheduling.
 //
-// value: The shape of each component in a value. The length of this attr must
-// be either 0 or the same as the length of component_types. If the length of
-// this attr is 0, the shapes of queue elements are not constrained, and
-// only one element may be dequeued at a time.
-// If not specified, defaults to <>
+// Only useful as a placeholder for control edges.
 //
-// REQUIRES: len(value) >= 0
-func FIFOQueueV2Shapes(value []tf.Shape) FIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["shapes"] = value
+// Returns the created operation.
+func ControlTrigger(scope *Scope) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
 	}
+	opspec := tf.OpSpec{
+		Type: "ControlTrigger",
+	}
+	return scope.AddOperation(opspec)
 }
 
-// FIFOQueueV2Capacity sets the optional capacity attribute to value.
+// Interleave the values from the `data` tensors into a single tensor.
 //
-// value: The upper bound on the number of elements in this queue.
-// Negative numbers mean no limit.
-// If not specified, defaults to -1
-func FIFOQueueV2Capacity(value int64) FIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
+// Builds a merged tensor such that
+//
+// ```python
+//     merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]
+// ```
+//
+// For example, if each `indices[m]` is scalar or vector, we have
+//
+// ```python
+//     # Scalar indices:
+//     merged[indices[m], ...] = data[m][...]
+//
+//     # Vector indices:
+//     merged[indices[m][i], ...] = data[m][i, ...]
+// ```
+//
+// Each `data[i].shape` must start with the corresponding `indices[i].shape`,
+// and the rest of `data[i].shape` must be constant w.r.t. `i`.  That is, we
+// must have `data[i].shape = indices[i].shape + constant`.  In terms of this
+// `constant`, the output shape is
+//
+//     merged.shape = [max(indices)] + constant
+//
+// Values may be merged in parallel, so if an index appears in both `indices[m][i]`
+// and `indices[n][j]`, the result may be invalid. This differs from the normal
+// DynamicStitch operator that defines the behavior in that case.
+//
+// For example:
+//
+// ```python
+//     indices[0] = 6
+//     indices[1] = [4, 1]
+//     indices[2] = [[5, 2], [0, 3]]
+//     data[0] = [61, 62]
+//     data[1] = [[41, 42], [11, 12]]
+//     data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]
+//     merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],
+//               [51, 52], [61, 62]]
+// ```
+//
+// This method can be used to merge partitions created by `dynamic_partition`
+// as illustrated on the following example:
+//
+// ```python
+//     # Apply function (increments x_i) on elements for which a certain condition
+//     # apply (x_i != -1 in this example).
+//     x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])
+//     condition_mask=tf.not_equal(x,tf.constant(-1.))
+//     partitioned_data = tf.dynamic_partition(
+//         x, tf.cast(condition_mask, tf.int32) , 2)
+//     partitioned_data[1] = partitioned_data[1] + 1.0
+//     condition_indices = tf.dynamic_partition(
+//         tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)
+//     x = tf.dynamic_stitch(condition_indices, partitioned_data)
+//     # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain
+//     # unchanged.
+// ```
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/DynamicStitch.png" alt>
+// </div>
+func ParallelDynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
+	opspec := tf.OpSpec{
+		Type: "ParallelDynamicStitch",
+		Input: []tf.Input{
+			tf.OutputList(indices), tf.OutputList(data),
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// FIFOQueueV2Container sets the optional container attribute to value.
+// Partitions `data` into `num_partitions` tensors using indices from `partitions`.
 //
-// value: If non-empty, this queue is placed in the given container.
+// For each index tuple `js` of size `partitions.ndim`, the slice `data[js, ...]`
+// becomes part of `outputs[partitions[js]]`.  The slices with `partitions[js] = i`
+// are placed in `outputs[i]` in lexicographic order of `js`, and the first
+// dimension of `outputs[i]` is the number of entries in `partitions` equal to `i`.
+// In detail,
+//
+// ```python
+//     outputs[i].shape = [sum(partitions == i)] + data.shape[partitions.ndim:]
+//
+//     outputs[i] = pack([data[js, ...] for js if partitions[js] == i])
+// ```
+//
+// `data.shape` must start with `partitions.shape`.
+//
+// For example:
+//
+// ```python
+//     # Scalar partitions.
+//     partitions = 1
+//     num_partitions = 2
+//     data = [10, 20]
+//     outputs[0] = []  # Empty with shape [0, 2]
+//     outputs[1] = [[10, 20]]
+//
+//     # Vector partitions.
+//     partitions = [0, 0, 1, 1, 0]
+//     num_partitions = 2
+//     data = [10, 20, 30, 40, 50]
+//     outputs[0] = [10, 20, 50]
+//     outputs[1] = [30, 40]
+// ```
+//
+// See `dynamic_stitch` for an example on how to merge partitions back.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/DynamicPartition.png" alt>
+// </div>
+//
+// Arguments:
+//
+//	partitions: Any shape.  Indices in the range `[0, num_partitions)`.
+//	num_partitions: The number of partitions to output.
+func DynamicPartition(scope *Scope, data tf.Output, partitions tf.Output, num_partitions int64) (outputs []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_partitions": num_partitions}
+	opspec := tf.OpSpec{
+		Type: "DynamicPartition",
+		Input: []tf.Input{
+			data, partitions,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
+		scope.UpdateErr("DynamicPartition", err)
+		return
+	}
+	return outputs
+}
+
+// ResourceConditionalAccumulatorAttr is an optional argument to ResourceConditionalAccumulator.
+type ResourceConditionalAccumulatorAttr func(optionalAttr)
+
+// ResourceConditionalAccumulatorContainer sets the optional container attribute to value.
+//
+// value: If non-empty, this accumulator is placed in the given container.
 // Otherwise, a default container is used.
 // If not specified, defaults to ""
-func FIFOQueueV2Container(value string) FIFOQueueV2Attr {
+func ResourceConditionalAccumulatorContainer(value string) ResourceConditionalAccumulatorAttr {
 	return func(m optionalAttr) {
 		m["container"] = value
 	}
 }
 
-// FIFOQueueV2SharedName sets the optional shared_name attribute to value.
+// ResourceConditionalAccumulatorSharedName sets the optional shared_name attribute to value.
 //
-// value: If non-empty, this queue will be shared under the given name
-// across multiple sessions.
+// value: If non-empty, this accumulator will be shared under the
+// given name across multiple sessions.
 // If not specified, defaults to ""
-func FIFOQueueV2SharedName(value string) FIFOQueueV2Attr {
+func ResourceConditionalAccumulatorSharedName(value string) ResourceConditionalAccumulatorAttr {
 	return func(m optionalAttr) {
 		m["shared_name"] = value
 	}
 }
 
-// A queue that produces elements in first-in first-out order.
+// ResourceConditionalAccumulatorReductionType sets the optional reduction_type attribute to value.
+// If not specified, defaults to "MEAN"
+func ResourceConditionalAccumulatorReductionType(value string) ResourceConditionalAccumulatorAttr {
+	return func(m optionalAttr) {
+		m["reduction_type"] = value
+	}
+}
+
+// A conditional accumulator for aggregating gradients.
+//
+// The accumulator accepts gradients marked with local_step greater or
+// equal to the most recent global_step known to the accumulator. The
+// average can be extracted from the accumulator, provided sufficient
+// gradients have been accumulated. Extracting the average automatically
+// resets the aggregate to 0, and increments the global_step recorded by
+// the accumulator.
+// This is a resource version of ConditionalAccumulator that will work in TF2.0
+// with tf.cond version 2.
 //
 // Arguments:
-//	component_types: The type of each component in a value.
+//	dtype: The type of the value being accumulated.
+//	shape: The shape of the values, can be [], in which case shape is unknown.
 //
-// Returns The handle to the queue.
-func FIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...FIFOQueueV2Attr) (handle tf.Output) {
+// Returns The handle to the accumulator.
+func ResourceConditionalAccumulator(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...ResourceConditionalAccumulatorAttr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"component_types": component_types}
+	attrs := map[string]interface{}{"dtype": dtype, "shape": shape}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "FIFOQueueV2",
+		Type: "ResourceConditionalAccumulator",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MultiDeviceIteratorFromStringHandleAttr is an optional argument to MultiDeviceIteratorFromStringHandle.
+type MultiDeviceIteratorFromStringHandleAttr func(optionalAttr)
+
+// MultiDeviceIteratorFromStringHandleOutputTypes sets the optional output_types attribute to value.
+//
+// value: The type list for the return values.
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func MultiDeviceIteratorFromStringHandleOutputTypes(value []tf.DataType) MultiDeviceIteratorFromStringHandleAttr {
+	return func(m optionalAttr) {
+		m["output_types"] = value
+	}
+}
+
+// MultiDeviceIteratorFromStringHandleOutputShapes sets the optional output_shapes attribute to value.
+//
+// value: The list of shapes being produced.
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func MultiDeviceIteratorFromStringHandleOutputShapes(value []tf.Shape) MultiDeviceIteratorFromStringHandleAttr {
+	return func(m optionalAttr) {
+		m["output_shapes"] = value
+	}
+}
+
+// Generates a MultiDeviceIterator resource from its provided string handle.
+//
+// Arguments:
+//	string_handle: String representing the resource.
+//
+// Returns A MultiDeviceIterator resource.
+func MultiDeviceIteratorFromStringHandle(scope *Scope, string_handle tf.Output, optional ...MultiDeviceIteratorFromStringHandleAttr) (multi_device_iterator tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MultiDeviceIteratorFromStringHandle",
+		Input: []tf.Input{
+			string_handle,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a TensorArray for storing the gradients of values in the given handle.
+//
+// If the given TensorArray gradient already exists, returns a reference to it.
+//
+// Locks the size of the original TensorArray by disabling its dynamic size flag.
+//
+// **A note about the input flow_in:**
+//
+// The handle flow_in forces the execution of the gradient lookup to occur
+// only after certain other operations have occurred.  For example, when
+// the forward TensorArray is dynamically sized, writes to this TensorArray
+// may resize the object.  The gradient TensorArray is statically sized based
+// on the size of the forward TensorArray when this operation executes.
+// Furthermore, the size of the forward TensorArray is frozen by this call.
+// As a result, the flow is used to ensure that the call to generate the gradient
+// TensorArray only happens after all writes are executed.
+//
+// In the case of dynamically sized TensorArrays, gradient computation should
+// only be performed on read operations that have themselves been chained via
+// flow to occur only after all writes have executed. That way the final size
+// of the forward TensorArray is known when this operation is called.
+//
+// **A note about the source attribute:**
+//
+// TensorArray gradient calls use an accumulator TensorArray object.  If
+// multiple gradients are calculated and run in the same session, the multiple
+// gradient nodes may accidentally flow through the same accumulator TensorArray.
+// This double counts and generally breaks the TensorArray gradient flow.
+//
+// The solution is to identify which gradient call this particular
+// TensorArray gradient is being called in.  This is performed by identifying
+// a unique string (e.g. "gradients", "gradients_1", ...) from the input
+// gradient Tensor's name.  This string is used as a suffix when creating
+// the TensorArray gradient object here (the attribute `source`).
+//
+// The attribute `source` is added as a suffix to the forward TensorArray's
+// name when performing the creation / lookup, so that each separate gradient
+// calculation gets its own TensorArray accumulator.
+//
+// Arguments:
+//	handle: The handle to the forward TensorArray.
+//	flow_in: A float scalar that enforces proper chaining of operations.
+//	source: The gradient source string, used to decide which gradient TensorArray
+// to return.
+func TensorArrayGradV3(scope *Scope, handle tf.Output, flow_in tf.Output, source string) (grad_handle tf.Output, flow_out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"source": source}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayGradV3",
+		Input: []tf.Input{
+			handle, flow_in,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Produces a string handle for the given MultiDeviceIterator.
+//
+// Arguments:
+//	multi_device_iterator: A MultiDeviceIterator resource.
+//
+// Returns A string representing the resource.
+func MultiDeviceIteratorToStringHandle(scope *Scope, multi_device_iterator tf.Output) (string_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MultiDeviceIteratorToStringHandle",
+		Input: []tf.Input{
+			multi_device_iterator,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Gets next element for the provided shard number.
+//
+// Arguments:
+//	multi_device_iterator: A MultiDeviceIterator resource.
+//	shard_num: Integer representing which shard to fetch data for.
+//	incarnation_id: Which incarnation of the MultiDeviceIterator is running.
+//	output_types: The type list for the return values.
+//	output_shapes: The list of shapes being produced.
+//
+// Returns Result of the get_next on the dataset.
+func MultiDeviceIteratorGetNextFromShard(scope *Scope, multi_device_iterator tf.Output, shard_num tf.Output, incarnation_id tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "MultiDeviceIteratorGetNextFromShard",
+		Input: []tf.Input{
+			multi_device_iterator, shard_num, incarnation_id,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("MultiDeviceIteratorGetNextFromShard", err)
+		return
+	}
+	return components
+}
+
+// Creates a MultiDeviceIterator resource.
+//
+// Arguments:
+//	devices: A list of devices the iterator works across.
+//	shared_name: If non-empty, this resource will be shared under the given name
+// across multiple sessions.
+//	container: If non-empty, this resource is placed in the given container.
+// Otherwise, a default container is used.
+//	output_types: The type list for the return values.
+//	output_shapes: The list of shapes being produced.
+//
+// Returns Handle to the resource created.
+func MultiDeviceIterator(scope *Scope, devices []string, shared_name string, container string, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"devices": devices, "shared_name": shared_name, "container": container, "output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "MultiDeviceIterator",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// BoostedTreesCalculateBestFeatureSplitAttr is an optional argument to BoostedTreesCalculateBestFeatureSplit.
+type BoostedTreesCalculateBestFeatureSplitAttr func(optionalAttr)
+
+// BoostedTreesCalculateBestFeatureSplitSplitType sets the optional split_type attribute to value.
+//
+// value: A string indicating if this Op should perform inequality split or equality split.
+// If not specified, defaults to "inequality"
+func BoostedTreesCalculateBestFeatureSplitSplitType(value string) BoostedTreesCalculateBestFeatureSplitAttr {
+	return func(m optionalAttr) {
+		m["split_type"] = value
+	}
+}
+
+// Calculates gains for each feature and returns the best possible split information for the feature.
+//
+// The split information is the best threshold (bucket id), gains and left/right node contributions per node for each feature.
+//
+// It is possible that not all nodes can be split on each feature. Hence, the list of possible nodes can differ between the features. Therefore, we return `node_ids_list` for each feature, containing the list of nodes that this feature can be used to split.
+//
+// In this manner, the output is the best split per features and per node, so that it needs to be combined later to produce the best split for each node (among all possible features).
+//
+// The output shapes are compatible in a way that the first dimension of all tensors are the same and equal to the number of possible split nodes for each feature.
+//
+// Arguments:
+//	node_id_range: A Rank 1 tensor (shape=[2]) to specify the range [first, last) of node ids to process within `stats_summary_list`. The nodes are iterated between the two nodes specified by the tensor, as like `for node_id in range(node_id_range[0], node_id_range[1])` (Note that the last index node_id_range[1] is exclusive).
+//	stats_summary: A Rank 4 tensor (#shape=[max_splits, feature_dims, bucket, stats_dims]) for accumulated stats summary (gradient/hessian) per node, per dimension, per buckets for each feature.
+// The first dimension of the tensor is the maximum number of splits, and thus not all elements of it will be used, but only the indexes specified by node_ids will be used.
+//	l1: l1 regularization factor on leaf weights, per instance based.
+//	l2: l2 regularization factor on leaf weights, per instance based.
+//	tree_complexity: adjustment to the gain, per leaf based.
+//	min_node_weight: mininum avg of hessians in a node before required for the node to be considered for splitting.
+//	logits_dimension: The dimension of logit, i.e., number of classes.
+//
+// Returns:
+//	node_ids: A Rank 1 tensors indicating possible split node ids for each feature. The length of the list is num_features, but each tensor has different size as each feature provides different possible nodes. See above for details like shapes and sizes.
+//	gains: A Rank 1 tensors indicating the best gains for each feature to split for certain nodes. See above for details like shapes and sizes.
+//	feature_dimensions: A Rank 1 tensors indicating the best feature dimension for each feature to split for certain nodes if the feature is multi-dimension. See above for details like shapes and sizes.
+//	thresholds: A Rank 1 tensors indicating the bucket id to compare with (as a threshold) for split in each node. See above for details like shapes and sizes.
+//	left_node_contribs: A Rank 2 tensors indicating the contribution of the left nodes when branching from parent nodes (given by the tensor element in the output node_ids_list) to the left direction by the given threshold for each feature. This value will be used to make the left node value by adding to the parent node value. Second dimension size is 1 for 1-dimensional logits, but would be larger for multi-class problems. See above for details like shapes and sizes.
+//	right_node_contribs: A Rank 2 tensors, with the same shape/conditions as left_node_contribs_list, but just that the value is for the right node.
+//	split_with_default_directions: A Rank 1 tensors indicating the which direction to go if data is missing. See above for details like shapes and sizes.
+// Inequality with default left returns 0, inequality with default right returns 1, equality with default right returns 2.
+func BoostedTreesCalculateBestFeatureSplit(scope *Scope, node_id_range tf.Output, stats_summary tf.Output, l1 tf.Output, l2 tf.Output, tree_complexity tf.Output, min_node_weight tf.Output, logits_dimension int64, optional ...BoostedTreesCalculateBestFeatureSplitAttr) (node_ids tf.Output, gains tf.Output, feature_dimensions tf.Output, thresholds tf.Output, left_node_contribs tf.Output, right_node_contribs tf.Output, split_with_default_directions tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"logits_dimension": logits_dimension}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesCalculateBestFeatureSplit",
+		Input: []tf.Input{
+			node_id_range, stats_summary, l1, l2, tree_complexity, min_node_weight,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6)
+}
+
+// ModelDatasetAttr is an optional argument to ModelDataset.
+type ModelDatasetAttr func(optionalAttr)
+
+// ModelDatasetAlgorithm sets the optional algorithm attribute to value.
+// If not specified, defaults to 0
+func ModelDatasetAlgorithm(value int64) ModelDatasetAttr {
+	return func(m optionalAttr) {
+		m["algorithm"] = value
+	}
+}
+
+// ModelDatasetCpuBudget sets the optional cpu_budget attribute to value.
+// If not specified, defaults to 0
+func ModelDatasetCpuBudget(value int64) ModelDatasetAttr {
+	return func(m optionalAttr) {
+		m["cpu_budget"] = value
+	}
+}
+
+// Identity transformation that models performance.
+//
+// Identity transformation that models performance.
+//
+// Arguments:
+//	input_dataset: A variant tensor representing the input dataset.
+//
+//
+func ModelDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ModelDatasetAttr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ModelDataset",
+		Input: []tf.Input{
+			input_dataset,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns a list of tensors with the same shapes and contents as the input
+//
+// tensors.
+//
+// This op can be used to override the gradient for complicated functions. For
+// example, suppose y = f(x) and we wish to apply a custom function g for backprop
+// such that dx = g(dy). In Python,
+//
+// ```python
+// with tf.get_default_graph().gradient_override_map(
+//     {'IdentityN': 'OverrideGradientWithG'}):
+//   y, _ = identity_n([f(x), x])
+//
+// @tf.RegisterGradient('OverrideGradientWithG')
+// def ApplyG(op, dy, _):
+//   return [None, g(dy)]  # Do not backprop to f(x).
+// ```
+func IdentityN(scope *Scope, input []tf.Output) (output []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "IdentityN",
+		Input: []tf.Input{
+			tf.OutputList(input),
+		},
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
+		scope.UpdateErr("IdentityN", err)
+		return
+	}
+	return output
+}
+
+// Returns true if and only if the given Optional variant has a value.
+func OptionalHasValue(scope *Scope, optional tf.Output) (has_value tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "OptionalHasValue",
+		Input: []tf.Input{
+			optional,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Constructs an Optional variant from a tuple of tensors.
+func OptionalFromValue(scope *Scope, components []tf.Output) (optional tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "OptionalFromValue",
+		Input: []tf.Input{
+			tf.OutputList(components),
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// OptimizeDatasetAttr is an optional argument to OptimizeDataset.
+type OptimizeDatasetAttr func(optionalAttr)
+
+// OptimizeDatasetOptimizationConfigs sets the optional optimization_configs attribute to value.
+// If not specified, defaults to {}
+func OptimizeDatasetOptimizationConfigs(value []string) OptimizeDatasetAttr {
+	return func(m optionalAttr) {
+		m["optimization_configs"] = value
+	}
+}
+
+// Creates a dataset by applying optimizations to `input_dataset`.
+//
+// Creates a dataset by applying optimizations to `input_dataset`.
+//
+// Arguments:
+//	input_dataset: A variant tensor representing the input dataset.
+//	optimizations: A `tf.string` vector `tf.Tensor` identifying optimizations to use.
+//
+//
+func OptimizeDataset(scope *Scope, input_dataset tf.Output, optimizations tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...OptimizeDatasetAttr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "OptimizeDataset",
+		Input: []tf.Input{
+			input_dataset, optimizations,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// DatasetToGraphAttr is an optional argument to DatasetToGraph.
+type DatasetToGraphAttr func(optionalAttr)
+
+// DatasetToGraphStatefulWhitelist sets the optional stateful_whitelist attribute to value.
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func DatasetToGraphStatefulWhitelist(value []string) DatasetToGraphAttr {
+	return func(m optionalAttr) {
+		m["stateful_whitelist"] = value
+	}
+}
+
+// DatasetToGraphAllowStateful sets the optional allow_stateful attribute to value.
+// If not specified, defaults to false
+func DatasetToGraphAllowStateful(value bool) DatasetToGraphAttr {
+	return func(m optionalAttr) {
+		m["allow_stateful"] = value
+	}
+}
+
+// DatasetToGraphStripDeviceAssignment sets the optional strip_device_assignment attribute to value.
+// If not specified, defaults to false
+func DatasetToGraphStripDeviceAssignment(value bool) DatasetToGraphAttr {
+	return func(m optionalAttr) {
+		m["strip_device_assignment"] = value
+	}
+}
+
+// Returns a serialized GraphDef representing `input_dataset`.
+//
+// Returns a graph representation for `input_dataset`.
+//
+// Arguments:
+//	input_dataset: A variant tensor representing the dataset to return the graph representation for.
+//
+// Returns The graph representation of the dataset (as serialized GraphDef).
+func DatasetToGraph(scope *Scope, input_dataset tf.Output, optional ...DatasetToGraphAttr) (graph tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DatasetToGraph",
+		Input: []tf.Input{
+			input_dataset,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Converts the given `resource_handle` representing an iterator to a string.
+//
+// Arguments:
+//	resource_handle: A handle to an iterator resource.
+//
+// Returns A string representation of the given handle.
+func IteratorToStringHandle(scope *Scope, resource_handle tf.Output) (string_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "IteratorToStringHandle",
+		Input: []tf.Input{
+			resource_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Gets the next output from the given iterator.
+//
+// This operation is a synchronous version IteratorGetNext. It should only be used
+// in situations where the iterator does not block the calling thread, or where
+// the calling thread is not a member of the thread pool used to execute parallel
+// operations (e.g. in eager mode).
+func IteratorGetNextSync(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "IteratorGetNextSync",
+		Input: []tf.Input{
+			iterator,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("IteratorGetNextSync", err)
+		return
+	}
+	return components
+}
+
+// Gets the next output from the given iterator .
+func IteratorGetNext(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "IteratorGetNext",
+		Input: []tf.Input{
+			iterator,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("IteratorGetNext", err)
+		return
+	}
+	return components
+}
+
+// Makes a new iterator from the given `dataset` and stores it in `iterator`.
+//
+// This operation may be executed multiple times. Each execution will reset the
+// iterator in `iterator` to the first element of `dataset`.
+//
+// Returns the created operation.
+func MakeIterator(scope *Scope, dataset tf.Output, iterator tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MakeIterator",
+		Input: []tf.Input{
+			dataset, iterator,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// A container for an iterator resource.
+//
+// Arguments:
+//	handle: A handle to the iterator to delete.
+//	deleter: A variant deleter.
+//
+// Returns the created operation.
+func DeleteIterator(scope *Scope, handle tf.Output, deleter tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DeleteIterator",
+		Input: []tf.Input{
+			handle, deleter,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Splits a tensor into `num_split` tensors along one dimension.
+//
+// Arguments:
+//	value: The tensor to split.
+//	size_splits: list containing the sizes of each output tensor along the split
+// dimension. Must sum to the dimension of value along split_dim.
+// Can contain one -1 indicating that dimension is to be inferred.
+//	axis: 0-D.  The dimension along which to split.  Must be in the range
+// `[-rank(value), rank(value))`.
+//
+//
+// Returns Tensors whose shape matches that of `value`
+// except along `axis`, where their sizes are
+// `size_splits[i]`.
+func SplitV(scope *Scope, value tf.Output, size_splits tf.Output, axis tf.Output, num_split int64) (output []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_split": num_split}
+	opspec := tf.OpSpec{
+		Type: "SplitV",
+		Input: []tf.Input{
+			value, size_splits, axis,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
+		scope.UpdateErr("SplitV", err)
+		return
+	}
+	return output
+}
+
+// A container for an iterator resource.
+//
+// Returns:
+//	handle: A handle to the iterator that can be passed to a "MakeIterator" or
+// "IteratorGetNext" op. In contrast to Iterator, AnonymousIterator prevents
+// resource sharing by name, and does not keep a reference to the resource
+// container.
+//	deleter: A variant deleter that should be passed into the op that deletes the iterator.
+func AnonymousIteratorV2(scope *Scope, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output, deleter tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "AnonymousIteratorV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// A container for an iterator resource.
+//
+// Returns A handle to the iterator that can be passed to a "MakeIterator" or
+// "IteratorGetNext" op. In contrast to Iterator, AnonymousIterator prevents
+// resource sharing by name, and does not keep a reference to the resource
+// container.
+func AnonymousIterator(scope *Scope, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "AnonymousIterator",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// A container for an iterator resource.
+//
+// Returns A handle to the iterator that can be passed to a "MakeIterator"
+// or "IteratorGetNext" op.
+func Iterator(scope *Scope, shared_name string, container string, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"shared_name": shared_name, "container": container, "output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "Iterator",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that emits the records from one or more binary files.
+//
+// Arguments:
+//	filenames: A scalar or a vector containing the name(s) of the file(s) to be
+// read.
+//	header_bytes: A scalar representing the number of bytes to skip at the
+// beginning of a file.
+//	record_bytes: A scalar representing the number of bytes in each record.
+//	footer_bytes: A scalar representing the number of bytes to skip at the end
+// of a file.
+//	buffer_size: A scalar representing the number of bytes to buffer. Must be > 0.
+func FixedLengthRecordDataset(scope *Scope, filenames tf.Output, header_bytes tf.Output, record_bytes tf.Output, footer_bytes tf.Output, buffer_size tf.Output) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "FixedLengthRecordDataset",
+		Input: []tf.Input{
+			filenames, header_bytes, record_bytes, footer_bytes, buffer_size,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that emits the lines of one or more text files.
+//
+// Arguments:
+//	filenames: A scalar or a vector containing the name(s) of the file(s) to be
+// read.
+//	compression_type: A scalar containing either (i) the empty string (no
+// compression), (ii) "ZLIB", or (iii) "GZIP".
+//	buffer_size: A scalar containing the number of bytes to buffer.
+func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TextLineDataset",
+		Input: []tf.Input{
+			filenames, compression_type, buffer_size,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// A container for an iterator resource.
+//
+// Arguments:
+//	multi_device_iterator: A handle to the multi device iterator to delete.
+//	iterators: A list of iterator handles (unused). This is added so that automatic control dependencies get added during function tracing that ensure this op runs after all the dependent iterators are deleted.
+//	deleter: A variant deleter.
+//
+// Returns the created operation.
+func DeleteMultiDeviceIterator(scope *Scope, multi_device_iterator tf.Output, iterators []tf.Output, deleter tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DeleteMultiDeviceIterator",
+		Input: []tf.Input{
+			multi_device_iterator, tf.OutputList(iterators), deleter,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Creates a dataset with a range of values. Corresponds to python's xrange.
+//
+// Arguments:
+//	start: corresponds to start in python's xrange().
+//	stop: corresponds to stop in python's xrange().
+//	step: corresponds to step in python's xrange().
+//
+//
+func RangeDataset(scope *Scope, start tf.Output, stop tf.Output, step tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "RangeDataset",
+		Input: []tf.Input{
+			start, stop, step,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that batches and pads `batch_size` elements from the input.
+//
+// Arguments:
+//
+//	batch_size: A scalar representing the number of elements to accumulate in a
+// batch.
+//	padded_shapes: A list of int64 tensors representing the desired padded shapes
+// of the corresponding output components. These shapes may be partially
+// specified, using `-1` to indicate that a particular dimension should be
+// padded to the maximum size of all batch elements.
+//	padding_values: A list of scalars containing the padding value to use for
+// each of the outputs.
+//
+func PaddedBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, padded_shapes []tf.Output, padding_values []tf.Output, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "PaddedBatchDataset",
+		Input: []tf.Input{
+			input_dataset, batch_size, tf.OutputList(padded_shapes), tf.OutputList(padding_values),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// BatchDatasetV2Attr is an optional argument to BatchDatasetV2.
+type BatchDatasetV2Attr func(optionalAttr)
+
+// BatchDatasetV2ParallelCopy sets the optional parallel_copy attribute to value.
+// If not specified, defaults to false
+func BatchDatasetV2ParallelCopy(value bool) BatchDatasetV2Attr {
+	return func(m optionalAttr) {
+		m["parallel_copy"] = value
+	}
+}
+
+// Creates a dataset that batches `batch_size` elements from `input_dataset`.
+//
+// Arguments:
+//
+//	batch_size: A scalar representing the number of elements to accumulate in a batch.
+//	drop_remainder: A scalar representing whether the last batch should be dropped in case its size
+// is smaller than desired.
+//
+//
+func BatchDatasetV2(scope *Scope, input_dataset tf.Output, batch_size tf.Output, drop_remainder tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...BatchDatasetV2Attr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "BatchDatasetV2",
+		Input: []tf.Input{
+			input_dataset, batch_size, drop_remainder,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ShuffleDatasetAttr is an optional argument to ShuffleDataset.
+type ShuffleDatasetAttr func(optionalAttr)
+
+// ShuffleDatasetReshuffleEachIteration sets the optional reshuffle_each_iteration attribute to value.
+//
+// value: If true, each iterator over this dataset will be given
+// a different pseudorandomly generated seed, based on a sequence seeded by the
+// `seed` and `seed2` inputs. If false, each iterator will be given the same
+// seed, and repeated iteration over this dataset will yield the exact same
+// sequence of results.
+// If not specified, defaults to true
+func ShuffleDatasetReshuffleEachIteration(value bool) ShuffleDatasetAttr {
+	return func(m optionalAttr) {
+		m["reshuffle_each_iteration"] = value
+	}
+}
+
+// Creates a dataset that shuffles elements from `input_dataset` pseudorandomly.
+//
+// Arguments:
+//
+//	buffer_size: The number of output elements to buffer in an iterator over
+// this dataset. Compare with the `min_after_dequeue` attr when creating a
+// `RandomShuffleQueue`.
+//	seed: A scalar seed for the random number generator. If either `seed` or
+// `seed2` is set to be non-zero, the random number generator is seeded
+// by the given seed.  Otherwise, a random seed is used.
+//	seed2: A second scalar seed to avoid seed collision.
+//
+//
+func ShuffleDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ShuffleDatasetAttr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ShuffleDataset",
+		Input: []tf.Input{
+			input_dataset, buffer_size, seed, seed2,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset containing elements of first component of `input_dataset` having true in the last component.
+func FilterByLastComponentDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "FilterByLastComponentDataset",
+		Input: []tf.Input{
+			input_dataset,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// PrefetchDatasetAttr is an optional argument to PrefetchDataset.
+type PrefetchDatasetAttr func(optionalAttr)
+
+// PrefetchDatasetSlackPeriod sets the optional slack_period attribute to value.
+// If not specified, defaults to 0
+func PrefetchDatasetSlackPeriod(value int64) PrefetchDatasetAttr {
+	return func(m optionalAttr) {
+		m["slack_period"] = value
+	}
+}
+
+// PrefetchDatasetLegacyAutotune sets the optional legacy_autotune attribute to value.
+// If not specified, defaults to true
+func PrefetchDatasetLegacyAutotune(value bool) PrefetchDatasetAttr {
+	return func(m optionalAttr) {
+		m["legacy_autotune"] = value
+	}
+}
+
+// Creates a dataset that asynchronously prefetches elements from `input_dataset`.
+//
+// Arguments:
+//
+//	buffer_size: The maximum number of elements to buffer in an iterator over
+// this dataset.
+//
+//
+func PrefetchDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...PrefetchDatasetAttr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "PrefetchDataset",
+		Input: []tf.Input{
+			input_dataset, buffer_size,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Forwards the input to the output.
+//
+// This operator represents the loop termination condition used by the
+// "pivot" switches of a loop.
+//
+// Arguments:
+//	input: A boolean scalar, representing the branch predicate of the Switch op.
+//
+// Returns The same tensor as `input`.
+func LoopCond(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "LoopCond",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that skips `count` elements from the `input_dataset`.
+//
+// Arguments:
+//
+//	count: A scalar representing the number of elements from the `input_dataset`
+// that should be skipped.  If count is -1, skips everything.
+//
+//
+func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "SkipDataset",
+		Input: []tf.Input{
+			input_dataset, count,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that emits the outputs of `input_dataset` `count` times.
+//
+// Arguments:
+//
+//	count: A scalar representing the number of times that `input_dataset` should
+// be repeated. A value of `-1` indicates that it should be repeated infinitely.
+//
+//
+func RepeatDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "RepeatDataset",
+		Input: []tf.Input{
+			input_dataset, count,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// UnpackAttr is an optional argument to Unpack.
+type UnpackAttr func(optionalAttr)
+
+// UnpackAxis sets the optional axis attribute to value.
+//
+// value: Dimension along which to unpack.  Negative values wrap around, so the
+// valid range is `[-R, R)`.
+// If not specified, defaults to 0
+func UnpackAxis(value int64) UnpackAttr {
+	return func(m optionalAttr) {
+		m["axis"] = value
+	}
+}
+
+// Unpacks a given dimension of a rank-`R` tensor into `num` rank-`(R-1)` tensors.
+//
+// Unpacks `num` tensors from `value` by chipping it along the `axis` dimension.
+// For example, given a tensor of shape `(A, B, C, D)`;
+//
+// If `axis == 0` then the i'th tensor in `output` is the slice `value[i, :, :, :]`
+//   and each tensor in `output` will have shape `(B, C, D)`. (Note that the
+//   dimension unpacked along is gone, unlike `split`).
+//
+// If `axis == 1` then the i'th tensor in `output` is the slice `value[:, i, :, :]`
+//   and each tensor in `output` will have shape `(A, C, D)`.
+// Etc.
+//
+// This is the opposite of `pack`.
+//
+// Arguments:
+//	value: 1-D or higher, with `axis` dimension size equal to `num`.
+//
+//
+// Returns The list of tensors unpacked from `value`.
+func Unpack(scope *Scope, value tf.Output, num int64, optional ...UnpackAttr) (output []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num": num}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Unpack",
+		Input: []tf.Input{
+			value,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
+		scope.UpdateErr("Unpack", err)
+		return
+	}
+	return output
+}
+
+// Creates a dataset that concatenates `input_dataset` with `another_dataset`.
+func ConcatenateDataset(scope *Scope, input_dataset tf.Output, another_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ConcatenateDataset",
+		Input: []tf.Input{
+			input_dataset, another_dataset,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// A placeholder op for a value that will be fed into the computation.
+//
+// DEPRECATED at GraphDef version 23: Placeholder now behaves the same as PlaceholderV2.
+//
+// N.B. This operation will fail with an error if it is executed. It is
+// intended as a way to represent a value that will always be fed, and to
+// provide attrs that enable the fed value to be checked at runtime.
+//
+// Arguments:
+//	dtype: The type of elements in the tensor.
+//	shape: The shape of the tensor. The shape can be any partially-specified
+// shape.  To be unconstrained, pass in a shape with unknown rank.
+//
+// Returns A placeholder tensor that must be replaced using the feed mechanism.
+func PlaceholderV2(scope *Scope, dtype tf.DataType, shape tf.Shape) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype, "shape": shape}
+	opspec := tf.OpSpec{
+		Type: "PlaceholderV2",
 
 		Attrs: attrs,
 	}
@@ -7430,7 +9317,7 @@ type RandomShuffleQueueV2Attr func(optionalAttr)
 // be either 0 or the same as the length of component_types. If the length of
 // this attr is 0, the shapes of queue elements are not constrained, and
 // only one element may be dequeued at a time.
-// If not specified, defaults to <>
+// If not specified, defaults to {}
 //
 // REQUIRES: len(value) >= 0
 func RandomShuffleQueueV2Shapes(value []tf.Shape) RandomShuffleQueueV2Attr {
@@ -7528,954 +9415,31 @@ func RandomShuffleQueueV2(scope *Scope, component_types []tf.DataType, optional
 	return op.Output(0)
 }
 
-// Interleave the values from the `data` tensors into a single tensor.
-//
-// Builds a merged tensor such that
-//
-// ```python
-//     merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]
-// ```
-//
-// For example, if each `indices[m]` is scalar or vector, we have
-//
-// ```python
-//     # Scalar indices:
-//     merged[indices[m], ...] = data[m][...]
-//
-//     # Vector indices:
-//     merged[indices[m][i], ...] = data[m][i, ...]
-// ```
-//
-// Each `data[i].shape` must start with the corresponding `indices[i].shape`,
-// and the rest of `data[i].shape` must be constant w.r.t. `i`.  That is, we
-// must have `data[i].shape = indices[i].shape + constant`.  In terms of this
-// `constant`, the output shape is
-//
-//     merged.shape = [max(indices)] + constant
-//
-// Values may be merged in parallel, so if an index appears in both `indices[m][i]`
-// and `indices[n][j]`, the result may be invalid. This differs from the normal
-// DynamicStitch operator that defines the behavior in that case.
-//
-// For example:
-//
-// ```python
-//     indices[0] = 6
-//     indices[1] = [4, 1]
-//     indices[2] = [[5, 2], [0, 3]]
-//     data[0] = [61, 62]
-//     data[1] = [[41, 42], [11, 12]]
-//     data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]
-//     merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],
-//               [51, 52], [61, 62]]
-// ```
-//
-// This method can be used to merge partitions created by `dynamic_partition`
-// as illustrated on the following example:
-//
-// ```python
-//     # Apply function (increments x_i) on elements for which a certain condition
-//     # apply (x_i != -1 in this example).
-//     x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])
-//     condition_mask=tf.not_equal(x,tf.constant(-1.))
-//     partitioned_data = tf.dynamic_partition(
-//         x, tf.cast(condition_mask, tf.int32) , 2)
-//     partitioned_data[1] = partitioned_data[1] + 1.0
-//     condition_indices = tf.dynamic_partition(
-//         tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)
-//     x = tf.dynamic_stitch(condition_indices, partitioned_data)
-//     # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain
-//     # unchanged.
-// ```
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/DynamicStitch.png" alt>
-// </div>
-func ParallelDynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged tf.Output) {
+// Creates a dataset that splits a SparseTensor into elements row-wise.
+func SparseTensorSliceDataset(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ParallelDynamicStitch",
+		Type: "SparseTensorSliceDataset",
 		Input: []tf.Input{
-			tf.OutputList(indices), tf.OutputList(data),
+			indices, values, dense_shape,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Interleave the values from the `data` tensors into a single tensor.
-//
-// Builds a merged tensor such that
-//
-// ```python
-//     merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]
-// ```
-//
-// For example, if each `indices[m]` is scalar or vector, we have
-//
-// ```python
-//     # Scalar indices:
-//     merged[indices[m], ...] = data[m][...]
-//
-//     # Vector indices:
-//     merged[indices[m][i], ...] = data[m][i, ...]
-// ```
-//
-// Each `data[i].shape` must start with the corresponding `indices[i].shape`,
-// and the rest of `data[i].shape` must be constant w.r.t. `i`.  That is, we
-// must have `data[i].shape = indices[i].shape + constant`.  In terms of this
-// `constant`, the output shape is
-//
-//     merged.shape = [max(indices)] + constant
-//
-// Values are merged in order, so if an index appears in both `indices[m][i]` and
-// `indices[n][j]` for `(m,i) < (n,j)` the slice `data[n][j]` will appear in the
-// merged result. If you do not need this guarantee, ParallelDynamicStitch might
-// perform better on some devices.
-//
-// For example:
-//
-// ```python
-//     indices[0] = 6
-//     indices[1] = [4, 1]
-//     indices[2] = [[5, 2], [0, 3]]
-//     data[0] = [61, 62]
-//     data[1] = [[41, 42], [11, 12]]
-//     data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]
-//     merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],
-//               [51, 52], [61, 62]]
-// ```
-//
-// This method can be used to merge partitions created by `dynamic_partition`
-// as illustrated on the following example:
-//
-// ```python
-//     # Apply function (increments x_i) on elements for which a certain condition
-//     # apply (x_i != -1 in this example).
-//     x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])
-//     condition_mask=tf.not_equal(x,tf.constant(-1.))
-//     partitioned_data = tf.dynamic_partition(
-//         x, tf.cast(condition_mask, tf.int32) , 2)
-//     partitioned_data[1] = partitioned_data[1] + 1.0
-//     condition_indices = tf.dynamic_partition(
-//         tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)
-//     x = tf.dynamic_stitch(condition_indices, partitioned_data)
-//     # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain
-//     # unchanged.
-// ```
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/DynamicStitch.png" alt>
-// </div>
-func DynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DynamicStitch",
-		Input: []tf.Input{
-			tf.OutputList(indices), tf.OutputList(data),
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MultiDeviceIteratorFromStringHandleAttr is an optional argument to MultiDeviceIteratorFromStringHandle.
-type MultiDeviceIteratorFromStringHandleAttr func(optionalAttr)
-
-// MultiDeviceIteratorFromStringHandleOutputTypes sets the optional output_types attribute to value.
-//
-// value: The type list for the return values.
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func MultiDeviceIteratorFromStringHandleOutputTypes(value []tf.DataType) MultiDeviceIteratorFromStringHandleAttr {
-	return func(m optionalAttr) {
-		m["output_types"] = value
-	}
-}
-
-// MultiDeviceIteratorFromStringHandleOutputShapes sets the optional output_shapes attribute to value.
-//
-// value: The list of shapes being produced.
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func MultiDeviceIteratorFromStringHandleOutputShapes(value []tf.Shape) MultiDeviceIteratorFromStringHandleAttr {
-	return func(m optionalAttr) {
-		m["output_shapes"] = value
-	}
-}
-
-// Generates a MultiDeviceIterator resource from its provided string handle.
-//
-// Arguments:
-//	string_handle: String representing the resource.
-//
-// Returns A MultiDeviceIterator resource.
-func MultiDeviceIteratorFromStringHandle(scope *Scope, string_handle tf.Output, optional ...MultiDeviceIteratorFromStringHandleAttr) (multi_device_iterator tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MultiDeviceIteratorFromStringHandle",
-		Input: []tf.Input{
-			string_handle,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Produces a string handle for the given MultiDeviceIterator.
-//
-// Arguments:
-//	multi_device_iterator: A MultiDeviceIterator resource.
-//
-// Returns A string representing the resource.
-func MultiDeviceIteratorToStringHandle(scope *Scope, multi_device_iterator tf.Output) (string_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MultiDeviceIteratorToStringHandle",
-		Input: []tf.Input{
-			multi_device_iterator,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Gets next element for the provided shard number.
-//
-// Arguments:
-//	multi_device_iterator: A MultiDeviceIterator resource.
-//	shard_num: Integer representing which shard to fetch data for.
-//	incarnation_id: Which incarnation of the MultiDeviceIterator is running.
-//	output_types: The type list for the return values.
-//	output_shapes: The list of shapes being produced.
-//
-// Returns Result of the get_next on the dataset.
-func MultiDeviceIteratorGetNextFromShard(scope *Scope, multi_device_iterator tf.Output, shard_num tf.Output, incarnation_id tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "MultiDeviceIteratorGetNextFromShard",
-		Input: []tf.Input{
-			multi_device_iterator, shard_num, incarnation_id,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
-		scope.UpdateErr("MultiDeviceIteratorGetNextFromShard", err)
-		return
-	}
-	return components
-}
-
-// Initializes the multi device iterator with the given dataset.
-//
-// Arguments:
-//	dataset: Dataset to be iterated upon.
-//	multi_device_iterator: A MultiDeviceIteratorResource.
-//	max_buffer_size: The maximum size of the host side per device buffer to keep.
-//
-// Returns An int64 indicating which incarnation of the MultiDeviceIterator
-// is running.
-func MultiDeviceIteratorInit(scope *Scope, dataset tf.Output, multi_device_iterator tf.Output, max_buffer_size tf.Output) (incarnation_id tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MultiDeviceIteratorInit",
-		Input: []tf.Input{
-			dataset, multi_device_iterator, max_buffer_size,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a MultiDeviceIterator resource.
-//
-// Arguments:
-//	devices: A list of devices the iterator works across.
-//	shared_name: If non-empty, this resource will be shared under the given name
-// across multiple sessions.
-//	container: If non-empty, this resource is placed in the given container.
-// Otherwise, a default container is used.
-//	output_types: The type list for the return values.
-//	output_shapes: The list of shapes being produced.
-//
-// Returns Handle to the resource created.
-func MultiDeviceIterator(scope *Scope, devices []string, shared_name string, container string, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"devices": devices, "shared_name": shared_name, "container": container, "output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "MultiDeviceIterator",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ModelDatasetAttr is an optional argument to ModelDataset.
-type ModelDatasetAttr func(optionalAttr)
-
-// ModelDatasetAlgorithm sets the optional algorithm attribute to value.
-// If not specified, defaults to 0
-func ModelDatasetAlgorithm(value int64) ModelDatasetAttr {
-	return func(m optionalAttr) {
-		m["algorithm"] = value
-	}
-}
-
-// ModelDatasetCpuBudget sets the optional cpu_budget attribute to value.
-// If not specified, defaults to 0
-func ModelDatasetCpuBudget(value int64) ModelDatasetAttr {
-	return func(m optionalAttr) {
-		m["cpu_budget"] = value
-	}
-}
-
-// Identity transformation that models performance.
-//
-// Identity transformation that models performance.
-//
-// Arguments:
-//	input_dataset: A variant tensor representing the input dataset.
-//
-//
-func ModelDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ModelDatasetAttr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ModelDataset",
-		Input: []tf.Input{
-			input_dataset,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns true if and only if the given Optional variant has a value.
-func OptionalHasValue(scope *Scope, optional tf.Output) (has_value tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "OptionalHasValue",
-		Input: []tf.Input{
-			optional,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates an Optional variant with no value.
-func OptionalNone(scope *Scope) (optional tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "OptionalNone",
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// OptimizeDatasetAttr is an optional argument to OptimizeDataset.
-type OptimizeDatasetAttr func(optionalAttr)
-
-// OptimizeDatasetOptimizationConfigs sets the optional optimization_configs attribute to value.
-// If not specified, defaults to <>
-func OptimizeDatasetOptimizationConfigs(value []string) OptimizeDatasetAttr {
-	return func(m optionalAttr) {
-		m["optimization_configs"] = value
-	}
-}
-
-// Creates a dataset by applying optimizations to `input_dataset`.
-//
-// Creates a dataset by applying optimizations to `input_dataset`.
-//
-// Arguments:
-//	input_dataset: A variant tensor representing the input dataset.
-//	optimizations: A `tf.string` vector `tf.Tensor` identifying optimizations to use.
-//
-//
-func OptimizeDataset(scope *Scope, input_dataset tf.Output, optimizations tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...OptimizeDatasetAttr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "OptimizeDataset",
-		Input: []tf.Input{
-			input_dataset, optimizations,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns a serialized GraphDef representing `input_dataset`.
-//
-// Returns a graph representation for `input_dataset`.
-//
-// Arguments:
-//	input_dataset: A variant tensor representing the dataset to return the graph representation for.
-//
-// Returns The graph representation of the dataset (as serialized GraphDef).
-func DatasetToGraph(scope *Scope, input_dataset tf.Output) (graph tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DatasetToGraph",
-		Input: []tf.Input{
-			input_dataset,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Converts the given variant tensor to an iterator and stores it in the given resource.
-//
-// Arguments:
-//	resource_handle: A handle to an iterator resource.
-//	serialized: A variant tensor storing the state of the iterator contained in the
-// resource.
-//
-// Returns the created operation.
-func DeserializeIterator(scope *Scope, resource_handle tf.Output, serialized tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DeserializeIterator",
-		Input: []tf.Input{
-			resource_handle, serialized,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Converts the given `resource_handle` representing an iterator to a variant tensor.
-//
-// Arguments:
-//	resource_handle: A handle to an iterator resource.
-//
-// Returns A variant tensor storing the state of the iterator contained in the
-// resource.
-func SerializeIterator(scope *Scope, resource_handle tf.Output) (serialized tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SerializeIterator",
-		Input: []tf.Input{
-			resource_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// IteratorFromStringHandleAttr is an optional argument to IteratorFromStringHandle.
-type IteratorFromStringHandleAttr func(optionalAttr)
-
-// IteratorFromStringHandleOutputTypes sets the optional output_types attribute to value.
-//
-// value: If specified, defines the type of each tuple component in an
-// element produced by the resulting iterator.
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func IteratorFromStringHandleOutputTypes(value []tf.DataType) IteratorFromStringHandleAttr {
-	return func(m optionalAttr) {
-		m["output_types"] = value
-	}
-}
-
-// IteratorFromStringHandleOutputShapes sets the optional output_shapes attribute to value.
-//
-// value: If specified, defines the shape of each tuple component in an
-// element produced by the resulting iterator.
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func IteratorFromStringHandleOutputShapes(value []tf.Shape) IteratorFromStringHandleAttr {
-	return func(m optionalAttr) {
-		m["output_shapes"] = value
-	}
-}
-
-// Converts the given string representing a handle to an iterator to a resource.
-//
-// Arguments:
-//	string_handle: A string representation of the given handle.
-//
-// Returns A handle to an iterator resource.
-func IteratorFromStringHandle(scope *Scope, string_handle tf.Output, optional ...IteratorFromStringHandleAttr) (resource_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "IteratorFromStringHandle",
-		Input: []tf.Input{
-			string_handle,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Converts the given `resource_handle` representing an iterator to a string.
-//
-// Arguments:
-//	resource_handle: A handle to an iterator resource.
-//
-// Returns A string representation of the given handle.
-func IteratorToStringHandle(scope *Scope, resource_handle tf.Output) (string_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "IteratorToStringHandle",
-		Input: []tf.Input{
-			resource_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Gets the next output from the given iterator .
-func IteratorGetNext(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "IteratorGetNext",
-		Input: []tf.Input{
-			iterator,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
-		scope.UpdateErr("IteratorGetNext", err)
-		return
-	}
-	return components
-}
-
-// Makes its input available to the next iteration.
-//
-// Arguments:
-//	data: The tensor to be made available to the next iteration.
-//
-// Returns The same tensor as `data`.
-func NextIteration(scope *Scope, data tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "NextIteration",
-		Input: []tf.Input{
-			data,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Makes a new iterator from the given `dataset` and stores it in `iterator`.
-//
-// This operation may be executed multiple times. Each execution will reset the
-// iterator in `iterator` to the first element of `dataset`.
-//
-// Returns the created operation.
-func MakeIterator(scope *Scope, dataset tf.Output, iterator tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MakeIterator",
-		Input: []tf.Input{
-			dataset, iterator,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// A container for an iterator resource.
-//
-// Arguments:
-//	handle: A handle to the iterator to delete.
-//	deleter: A variant deleter.
-//
-// Returns the created operation.
-func DeleteIterator(scope *Scope, handle tf.Output, deleter tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DeleteIterator",
-		Input: []tf.Input{
-			handle, deleter,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// A container for an iterator resource.
-//
-// Returns A handle to the iterator that can be passed to a "MakeIterator" or
-// "IteratorGetNext" op. In contrast to Iterator, AnonymousIterator prevents
-// resource sharing by name, and does not keep a reference to the resource
-// container.A variant deleter that should be passed into the op that deletes the iterator.
-func AnonymousIteratorV2(scope *Scope, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output, deleter tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "AnonymousIteratorV2",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// PaddingFIFOQueueV2Attr is an optional argument to PaddingFIFOQueueV2.
-type PaddingFIFOQueueV2Attr func(optionalAttr)
-
-// PaddingFIFOQueueV2Shapes sets the optional shapes attribute to value.
-//
-// value: The shape of each component in a value. The length of this attr must
-// be either 0 or the same as the length of component_types.
-// Shapes of fixed rank but variable size are allowed by setting
-// any shape dimension to -1.  In this case, the inputs' shape may vary along
-// the given dimension, and DequeueMany will pad the given dimension with
-// zeros up to the maximum shape of all elements in the given batch.
-// If the length of this attr is 0, different queue elements may have
-// different ranks and shapes, but only one element may be dequeued at a time.
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func PaddingFIFOQueueV2Shapes(value []tf.Shape) PaddingFIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["shapes"] = value
-	}
-}
-
-// PaddingFIFOQueueV2Capacity sets the optional capacity attribute to value.
-//
-// value: The upper bound on the number of elements in this queue.
-// Negative numbers mean no limit.
-// If not specified, defaults to -1
-func PaddingFIFOQueueV2Capacity(value int64) PaddingFIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// PaddingFIFOQueueV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this queue is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func PaddingFIFOQueueV2Container(value string) PaddingFIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// PaddingFIFOQueueV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this queue will be shared under the given name
-// across multiple sessions.
-// If not specified, defaults to ""
-func PaddingFIFOQueueV2SharedName(value string) PaddingFIFOQueueV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// A queue that produces elements in first-in first-out order.
-//
-// Variable-size shapes are allowed by setting the corresponding shape dimensions
-// to 0 in the shape attr.  In this case DequeueMany will pad up to the maximum
-// size of any given element in the minibatch.  See below for details.
-//
-// Arguments:
-//	component_types: The type of each component in a value.
-//
-// Returns The handle to the queue.
-func PaddingFIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...PaddingFIFOQueueV2Attr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"component_types": component_types}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "PaddingFIFOQueueV2",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// A container for an iterator resource.
-//
-// Returns A handle to the iterator that can be passed to a "MakeIterator"
-// or "IteratorGetNext" op.
-func Iterator(scope *Scope, shared_name string, container string, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"shared_name": shared_name, "container": container, "output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "Iterator",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that emits the records from one or more binary files.
-//
-// Arguments:
-//	filenames: A scalar or a vector containing the name(s) of the file(s) to be
-// read.
-//	header_bytes: A scalar representing the number of bytes to skip at the
-// beginning of a file.
-//	record_bytes: A scalar representing the number of bytes in each record.
-//	footer_bytes: A scalar representing the number of bytes to skip at the end
-// of a file.
-//	buffer_size: A scalar representing the number of bytes to buffer. Must be > 0.
-func FixedLengthRecordDataset(scope *Scope, filenames tf.Output, header_bytes tf.Output, record_bytes tf.Output, footer_bytes tf.Output, buffer_size tf.Output) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "FixedLengthRecordDataset",
-		Input: []tf.Input{
-			filenames, header_bytes, record_bytes, footer_bytes, buffer_size,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that emits the lines of one or more text files.
-//
-// Arguments:
-//	filenames: A scalar or a vector containing the name(s) of the file(s) to be
-// read.
-//	compression_type: A scalar containing either (i) the empty string (no
-// compression), (ii) "ZLIB", or (iii) "GZIP".
-//	buffer_size: A scalar containing the number of bytes to buffer.
-func TextLineDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TextLineDataset",
-		Input: []tf.Input{
-			filenames, compression_type, buffer_size,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ShuffleDatasetAttr is an optional argument to ShuffleDataset.
-type ShuffleDatasetAttr func(optionalAttr)
-
-// ShuffleDatasetReshuffleEachIteration sets the optional reshuffle_each_iteration attribute to value.
-//
-// value: If true, each iterator over this dataset will be given
-// a different pseudorandomly generated seed, based on a sequence seeded by the
-// `seed` and `seed2` inputs. If false, each iterator will be given the same
-// seed, and repeated iteration over this dataset will yield the exact same
-// sequence of results.
-// If not specified, defaults to true
-func ShuffleDatasetReshuffleEachIteration(value bool) ShuffleDatasetAttr {
-	return func(m optionalAttr) {
-		m["reshuffle_each_iteration"] = value
-	}
-}
-
-// Creates a dataset that shuffles elements from `input_dataset` pseudorandomly.
-//
-// Arguments:
-//
-//	buffer_size: The number of output elements to buffer in an iterator over
-// this dataset. Compare with the `min_after_dequeue` attr when creating a
-// `RandomShuffleQueue`.
-//	seed: A scalar seed for the random number generator. If either `seed` or
-// `seed2` is set to be non-zero, the random number generator is seeded
-// by the given seed.  Otherwise, a random seed is used.
-//	seed2: A second scalar seed to avoid seed collision.
-//
-//
-func ShuffleDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ShuffleDatasetAttr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ShuffleDataset",
-		Input: []tf.Input{
-			input_dataset, buffer_size, seed, seed2,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset with a range of values. Corresponds to python's xrange.
-//
-// Arguments:
-//	start: corresponds to start in python's xrange().
-//	stop: corresponds to stop in python's xrange().
-//	step: corresponds to step in python's xrange().
-//
-//
-func RangeDataset(scope *Scope, start tf.Output, stop tf.Output, step tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "RangeDataset",
-		Input: []tf.Input{
-			start, stop, step,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-//     Adds v into specified rows of x.
-//
-//     Computes y = x; y[i, :] += v; return y.
-//
-// Arguments:
-//	x: A `Tensor` of type T.
-//	i: A vector. Indices into the left-most dimension of `x`.
-//	v: A `Tensor` of type T. Same dimension sizes as x except the first dimension, which must be the same as i's size.
-//
-// Returns A `Tensor` of type T. An alias of `x`. The content of `y` is undefined if there are duplicates in `i`.
-func InplaceAdd(scope *Scope, x tf.Output, i tf.Output, v tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "InplaceAdd",
-		Input: []tf.Input{
-			x, i, v,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// PaddedBatchDatasetV2Attr is an optional argument to PaddedBatchDatasetV2.
-type PaddedBatchDatasetV2Attr func(optionalAttr)
-
-// PaddedBatchDatasetV2ParallelCopy sets the optional parallel_copy attribute to value.
-// If not specified, defaults to false
-func PaddedBatchDatasetV2ParallelCopy(value bool) PaddedBatchDatasetV2Attr {
-	return func(m optionalAttr) {
-		m["parallel_copy"] = value
-	}
-}
-
-// Creates a dataset that batches and pads `batch_size` elements from the input.
-//
-// Arguments:
-//
-//	batch_size: A scalar representing the number of elements to accumulate in a
-// batch.
-//	padded_shapes: A list of int64 tensors representing the desired padded shapes
-// of the corresponding output components. These shapes may be partially
-// specified, using `-1` to indicate that a particular dimension should be
-// padded to the maximum size of all batch elements.
-//	padding_values: A list of scalars containing the padding value to use for
-// each of the outputs.
-//	drop_remainder: A scalar representing whether the last batch should be dropped in case its size
-// is smaller than desired.
-//
-func PaddedBatchDatasetV2(scope *Scope, input_dataset tf.Output, batch_size tf.Output, padded_shapes []tf.Output, padding_values []tf.Output, drop_remainder tf.Output, output_shapes []tf.Shape, optional ...PaddedBatchDatasetV2Attr) (handle tf.Output) {
+// Creates a dataset that emits `components` as a tuple of tensors once.
+func TensorDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	attrs := map[string]interface{}{"output_shapes": output_shapes}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "PaddedBatchDatasetV2",
+		Type: "TensorDataset",
 		Input: []tf.Input{
-			input_dataset, batch_size, tf.OutputList(padded_shapes), tf.OutputList(padding_values), drop_remainder,
+			tf.OutputList(components),
 		},
 		Attrs: attrs,
 	}
@@ -8483,37 +9447,132 @@ func PaddedBatchDatasetV2(scope *Scope, input_dataset tf.Output, batch_size tf.O
 	return op.Output(0)
 }
 
-// ShardDatasetAttr is an optional argument to ShardDataset.
-type ShardDatasetAttr func(optionalAttr)
+// QueueCloseV2Attr is an optional argument to QueueCloseV2.
+type QueueCloseV2Attr func(optionalAttr)
 
-// ShardDatasetRequireNonEmpty sets the optional require_non_empty attribute to value.
+// QueueCloseV2CancelPendingEnqueues sets the optional cancel_pending_enqueues attribute to value.
+//
+// value: If true, all pending enqueue requests that are
+// blocked on the given queue will be canceled.
 // If not specified, defaults to false
-func ShardDatasetRequireNonEmpty(value bool) ShardDatasetAttr {
+func QueueCloseV2CancelPendingEnqueues(value bool) QueueCloseV2Attr {
 	return func(m optionalAttr) {
-		m["require_non_empty"] = value
+		m["cancel_pending_enqueues"] = value
 	}
 }
 
-// Creates a `Dataset` that includes only 1/`num_shards` of this dataset.
+// Closes the given queue.
+//
+// This operation signals that no more elements will be enqueued in the
+// given queue. Subsequent Enqueue(Many) operations will fail.
+// Subsequent Dequeue(Many) operations will continue to succeed if
+// sufficient elements remain in the queue. Subsequent Dequeue(Many)
+// operations that would block will fail immediately.
 //
 // Arguments:
+//	handle: The handle to a queue.
 //
-//	num_shards: An integer representing the number of shards operating in parallel.
-//	index: An integer representing the current worker index.
-//
-//
-func ShardDataset(scope *Scope, input_dataset tf.Output, num_shards tf.Output, index tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ShardDatasetAttr) (handle tf.Output) {
+// Returns the created operation.
+func QueueCloseV2(scope *Scope, handle tf.Output, optional ...QueueCloseV2Attr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ShardDataset",
+		Type: "QueueCloseV2",
 		Input: []tf.Input{
-			input_dataset, num_shards, index,
+			handle,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// DebugIdentityV2Attr is an optional argument to DebugIdentityV2.
+type DebugIdentityV2Attr func(optionalAttr)
+
+// DebugIdentityV2TfdbgContextId sets the optional tfdbg_context_id attribute to value.
+//
+// value: A tfdbg-generated ID for the context that the op belongs to,
+//   e.g., a concrete compiled tf.function.
+// If not specified, defaults to ""
+func DebugIdentityV2TfdbgContextId(value string) DebugIdentityV2Attr {
+	return func(m optionalAttr) {
+		m["tfdbg_context_id"] = value
+	}
+}
+
+// DebugIdentityV2OpName sets the optional op_name attribute to value.
+//
+// value: Optional. Name of the op that the debug op is concerned with.
+//   Used only for single-tensor trace.
+// If not specified, defaults to ""
+func DebugIdentityV2OpName(value string) DebugIdentityV2Attr {
+	return func(m optionalAttr) {
+		m["op_name"] = value
+	}
+}
+
+// DebugIdentityV2OutputSlot sets the optional output_slot attribute to value.
+//
+// value: Optional. Output slot index of the tensor that the debug op
+//   is concerned with. Used only for single-tensor trace.
+// If not specified, defaults to -1
+func DebugIdentityV2OutputSlot(value int64) DebugIdentityV2Attr {
+	return func(m optionalAttr) {
+		m["output_slot"] = value
+	}
+}
+
+// DebugIdentityV2TensorDebugMode sets the optional tensor_debug_mode attribute to value.
+//
+// value: TensorDebugMode enum value. See debug_event.proto for details.
+// If not specified, defaults to -1
+func DebugIdentityV2TensorDebugMode(value int64) DebugIdentityV2Attr {
+	return func(m optionalAttr) {
+		m["tensor_debug_mode"] = value
+	}
+}
+
+// DebugIdentityV2DebugUrls sets the optional debug_urls attribute to value.
+//
+// value: List of URLs to debug targets, e.g., file:///foo/tfdbg_dump.
+// If not specified, defaults to {}
+func DebugIdentityV2DebugUrls(value []string) DebugIdentityV2Attr {
+	return func(m optionalAttr) {
+		m["debug_urls"] = value
+	}
+}
+
+// Debug Identity V2 Op.
+//
+// Provides an identity mapping from input to output, while writing the content of
+// the input tensor by calling DebugEventsWriter.
+//
+// The semantics of the input tensor depends on tensor_debug_mode. In typical
+// usage, the input tensor comes directly from the user computation only when
+// graph_debug_mode is FULL_TENSOR (see protobuf/debug_event.proto for a
+// list of all the possible values of graph_debug_mode). For the other debug modes,
+// the input tensor should be produced by an additional op or subgraph that
+// computes summary information about one or more tensors.
+//
+// Arguments:
+//	input: Input tensor, non-Reference type
+func DebugIdentityV2(scope *Scope, input tf.Output, optional ...DebugIdentityV2Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DebugIdentityV2",
+		Input: []tf.Input{
+			input,
 		},
 		Attrs: attrs,
 	}
@@ -8521,38 +9580,71 @@ func ShardDataset(scope *Scope, input_dataset tf.Output, num_shards tf.Output, i
 	return op.Output(0)
 }
 
-// BatchDatasetV2Attr is an optional argument to BatchDatasetV2.
-type BatchDatasetV2Attr func(optionalAttr)
+// DebugNanCountAttr is an optional argument to DebugNanCount.
+type DebugNanCountAttr func(optionalAttr)
 
-// BatchDatasetV2ParallelCopy sets the optional parallel_copy attribute to value.
+// DebugNanCountDeviceName sets the optional device_name attribute to value.
+// If not specified, defaults to ""
+func DebugNanCountDeviceName(value string) DebugNanCountAttr {
+	return func(m optionalAttr) {
+		m["device_name"] = value
+	}
+}
+
+// DebugNanCountTensorName sets the optional tensor_name attribute to value.
+//
+// value: Name of the input tensor.
+// If not specified, defaults to ""
+func DebugNanCountTensorName(value string) DebugNanCountAttr {
+	return func(m optionalAttr) {
+		m["tensor_name"] = value
+	}
+}
+
+// DebugNanCountDebugUrls sets the optional debug_urls attribute to value.
+//
+// value: List of URLs to debug targets, e.g.,
+//   file:///foo/tfdbg_dump, grpc:://localhost:11011.
+// If not specified, defaults to {}
+func DebugNanCountDebugUrls(value []string) DebugNanCountAttr {
+	return func(m optionalAttr) {
+		m["debug_urls"] = value
+	}
+}
+
+// DebugNanCountGatedGrpc sets the optional gated_grpc attribute to value.
+//
+// value:  Whether this op will be gated. If any of the debug_urls of this
+//   debug node is of the grpc:// scheme, when the value of this attribute is set
+//   to True, the data will not actually be sent via the grpc stream unless this
+//   debug op has been enabled at the debug_url. If all of the debug_urls of this
+//   debug node are of the grpc:// scheme and the debug op is enabled at none of
+//   them, the output will be an empty Tensor.
 // If not specified, defaults to false
-func BatchDatasetV2ParallelCopy(value bool) BatchDatasetV2Attr {
+func DebugNanCountGatedGrpc(value bool) DebugNanCountAttr {
 	return func(m optionalAttr) {
-		m["parallel_copy"] = value
+		m["gated_grpc"] = value
 	}
 }
 
-// Creates a dataset that batches `batch_size` elements from `input_dataset`.
+// Debug NaN Value Counter Op.
+//
+// Counts number of NaNs in the input tensor, for debugging.
 //
 // Arguments:
-//
-//	batch_size: A scalar representing the number of elements to accumulate in a batch.
-//	drop_remainder: A scalar representing whether the last batch should be dropped in case its size
-// is smaller than desired.
-//
-//
-func BatchDatasetV2(scope *Scope, input_dataset tf.Output, batch_size tf.Output, drop_remainder tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...BatchDatasetV2Attr) (handle tf.Output) {
+//	input: Input tensor, non-Reference type.
+func DebugNanCount(scope *Scope, input tf.Output, optional ...DebugNanCountAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "BatchDatasetV2",
+		Type: "DebugNanCount",
 		Input: []tf.Input{
-			input_dataset, batch_size, drop_remainder,
+			input,
 		},
 		Attrs: attrs,
 	}
@@ -8560,131 +9652,271 @@ func BatchDatasetV2(scope *Scope, input_dataset tf.Output, batch_size tf.Output,
 	return op.Output(0)
 }
 
-// Creates a dataset that batches `batch_size` elements from `input_dataset`.
-//
-// Arguments:
-//
-//	batch_size: A scalar representing the number of elements to accumulate in a
-// batch.
-//
-//
-func BatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "BatchDataset",
-		Input: []tf.Input{
-			input_dataset, batch_size,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
+// DebugIdentityAttr is an optional argument to DebugIdentity.
+type DebugIdentityAttr func(optionalAttr)
 
-// Creates a dataset containing elements of first component of `input_dataset` having true in the last component.
-func FilterByLastComponentDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "FilterByLastComponentDataset",
-		Input: []tf.Input{
-			input_dataset,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// BoostedTreesCalculateBestFeatureSplitAttr is an optional argument to BoostedTreesCalculateBestFeatureSplit.
-type BoostedTreesCalculateBestFeatureSplitAttr func(optionalAttr)
-
-// BoostedTreesCalculateBestFeatureSplitSplitType sets the optional split_type attribute to value.
+// DebugIdentityDeviceName sets the optional device_name attribute to value.
 //
-// value: A string indicating if this Op should perform inequality split or equality split.
-// If not specified, defaults to "inequality"
-func BoostedTreesCalculateBestFeatureSplitSplitType(value string) BoostedTreesCalculateBestFeatureSplitAttr {
+// value: Name of the device on which the tensor resides.
+// If not specified, defaults to ""
+func DebugIdentityDeviceName(value string) DebugIdentityAttr {
 	return func(m optionalAttr) {
-		m["split_type"] = value
+		m["device_name"] = value
 	}
 }
 
-// Calculates gains for each feature and returns the best possible split information for the feature.
+// DebugIdentityTensorName sets the optional tensor_name attribute to value.
 //
-// The split information is the best threshold (bucket id), gains and left/right node contributions per node for each feature.
+// value: Name of the input tensor.
+// If not specified, defaults to ""
+func DebugIdentityTensorName(value string) DebugIdentityAttr {
+	return func(m optionalAttr) {
+		m["tensor_name"] = value
+	}
+}
+
+// DebugIdentityDebugUrls sets the optional debug_urls attribute to value.
 //
-// It is possible that not all nodes can be split on each feature. Hence, the list of possible nodes can differ between the features. Therefore, we return `node_ids_list` for each feature, containing the list of nodes that this feature can be used to split.
+// value: List of URLs to debug targets, e.g.,
+//   file:///foo/tfdbg_dump, grpc:://localhost:11011
+// If not specified, defaults to {}
+func DebugIdentityDebugUrls(value []string) DebugIdentityAttr {
+	return func(m optionalAttr) {
+		m["debug_urls"] = value
+	}
+}
+
+// DebugIdentityGatedGrpc sets the optional gated_grpc attribute to value.
 //
-// In this manner, the output is the best split per features and per node, so that it needs to be combined later to produce the best split for each node (among all possible features).
+// value: Whether this op will be gated. If any of the debug_urls of this
+//   debug node is of the grpc:// scheme, when the value of this attribute is set
+//   to True, the data will not actually be sent via the grpc stream unless this
+//   debug op has been enabled at the debug_url. If all of the debug_urls of this
+//   debug node are of the grpc:// scheme and the debug op is enabled at none of
+//   them, the output will be an empty Tensor.
+// If not specified, defaults to false
+func DebugIdentityGatedGrpc(value bool) DebugIdentityAttr {
+	return func(m optionalAttr) {
+		m["gated_grpc"] = value
+	}
+}
+
+// Provides an identity mapping of the non-Ref type input tensor for debugging.
 //
-// The output shapes are compatible in a way that the first dimension of all tensors are the same and equal to the number of possible split nodes for each feature.
+// Provides an identity mapping of the non-Ref type input tensor for debugging.
 //
 // Arguments:
-//	node_id_range: A Rank 1 tensor (shape=[2]) to specify the range [first, last) of node ids to process within `stats_summary_list`. The nodes are iterated between the two nodes specified by the tensor, as like `for node_id in range(node_id_range[0], node_id_range[1])` (Note that the last index node_id_range[1] is exclusive).
-//	stats_summary: A Rank 4 tensor (#shape=[max_splits, feature_dims, bucket, stats_dims]) for accumulated stats summary (gradient/hessian) per node, per dimension, per buckets for each feature.
-// The first dimension of the tensor is the maximum number of splits, and thus not all elements of it will be used, but only the indexes specified by node_ids will be used.
-//	l1: l1 regularization factor on leaf weights, per instance based.
-//	l2: l2 regularization factor on leaf weights, per instance based.
-//	tree_complexity: adjustment to the gain, per leaf based.
-//	min_node_weight: mininum avg of hessians in a node before required for the node to be considered for splitting.
-//	logits_dimension: The dimension of logit, i.e., number of classes.
+//	input: Input tensor, non-Reference type
+func DebugIdentity(scope *Scope, input tf.Output, optional ...DebugIdentityAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DebugIdentity",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Aggregates the summary of accumulated stats for the batch.
 //
-// Returns A Rank 1 tensors indicating possible split node ids for each feature. The length of the list is num_features, but each tensor has different size as each feature provides different possible nodes. See above for details like shapes and sizes.A Rank 1 tensors indicating the best gains for each feature to split for certain nodes. See above for details like shapes and sizes.A Rank 1 tensors indicating the best feature dimension for each feature to split for certain nodes if the feature is multi-dimension. See above for details like shapes and sizes.A Rank 1 tensors indicating the bucket id to compare with (as a threshold) for split in each node. See above for details like shapes and sizes.A Rank 2 tensors indicating the contribution of the left nodes when branching from parent nodes (given by the tensor element in the output node_ids_list) to the left direction by the given threshold for each feature. This value will be used to make the left node value by adding to the parent node value. Second dimension size is 1 for 1-dimensional logits, but would be larger for multi-class problems. See above for details like shapes and sizes.A Rank 2 tensors, with the same shape/conditions as left_node_contribs_list, but just that the value is for the right node.A Rank 1 tensors indicating the which direction to go if data is missing. See above for details like shapes and sizes.
-func BoostedTreesCalculateBestFeatureSplit(scope *Scope, node_id_range tf.Output, stats_summary tf.Output, l1 tf.Output, l2 tf.Output, tree_complexity tf.Output, min_node_weight tf.Output, logits_dimension int64, optional ...BoostedTreesCalculateBestFeatureSplitAttr) (node_ids tf.Output, gains tf.Output, feature_dimensions tf.Output, thresholds tf.Output, left_node_contribs tf.Output, right_node_contribs tf.Output, split_with_default_directions tf.Output) {
+// The summary stats contains gradients and hessians accumulated for each node, bucket and dimension id.
+//
+// Arguments:
+//	node_ids: int32; Rank 1 Tensor containing node ids for each example, shape [batch_size].
+//	gradients: float32; Rank 2 Tensor (shape=[batch_size, logits_dimension]) with gradients for each example.
+//	hessians: float32; Rank 2 Tensor (shape=[batch_size, hessian_dimension]) with hessians for each example.
+//	feature_indices: int32; Rank 2 indices of feature sparse Tensors (shape=[number of sparse entries, 2]).
+// Number of sparse entries across all instances from the batch. The first value is
+// the index of the instance, the second is dimension of the feature. The second axis
+// can only have 2 values, i.e., the input dense version of Tensor can only be matrix.
+//	feature_values: int32; Rank 1 values of feature sparse Tensors (shape=[number of sparse entries]).
+// Number of sparse entries across all instances from the batch. The first value is
+// the index of the instance, the second is dimension of the feature.
+//	feature_shape: int32; Rank 1 dense shape of feature sparse Tensors (shape=[2]).
+// The first axis can only have 2 values, [batch_size, feature_dimension].
+//	max_splits: int; the maximum number of splits possible in the whole tree.
+//	num_buckets: int; equals to the maximum possible value of bucketized feature + 1.
+//
+// Returns:
+//	stats_summary_indices: int32; Rank 2 indices of summary sparse Tensors (shape=[number of non zero statistics, 4])
+// The second axis can only be 4 including node id, feature dimension, bucket id, and statistics_dimension.
+// statistics_dimension = logits_dimension + hessian_dimension.
+//	stats_summary_values: output Rank 1 Tensor (shape=[number of non zero statistics])
+//	stats_summary_shape: output Rank 1 Tensor (shape=[4])
+// The tensor has following 4 values: [max_splits, feature_dimension, num_buckets, statistics_dimension],
+// where statistics_dimension = gradient_dimension + hessian_dimension. gradient_dimension
+// is the same as label_dimension, i.e., the output space. hessian_dimension can be the same
+// as logits dimension when diagonal hessian is used, or label_dimension^2 when full
+// hessian is used.
+func BoostedTreesSparseAggregateStats(scope *Scope, node_ids tf.Output, gradients tf.Output, hessians tf.Output, feature_indices tf.Output, feature_values tf.Output, feature_shape tf.Output, max_splits int64, num_buckets int64) (stats_summary_indices tf.Output, stats_summary_values tf.Output, stats_summary_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"max_splits": max_splits, "num_buckets": num_buckets}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesSparseAggregateStats",
+		Input: []tf.Input{
+			node_ids, gradients, hessians, feature_indices, feature_values, feature_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// DecodeProtoV2Attr is an optional argument to DecodeProtoV2.
+type DecodeProtoV2Attr func(optionalAttr)
+
+// DecodeProtoV2DescriptorSource sets the optional descriptor_source attribute to value.
+//
+// value: Either the special value `local://` or a path to a file containing
+// a serialized `FileDescriptorSet`.
+// If not specified, defaults to "local://"
+func DecodeProtoV2DescriptorSource(value string) DecodeProtoV2Attr {
+	return func(m optionalAttr) {
+		m["descriptor_source"] = value
+	}
+}
+
+// DecodeProtoV2MessageFormat sets the optional message_format attribute to value.
+//
+// value: Either `binary` or `text`.
+// If not specified, defaults to "binary"
+func DecodeProtoV2MessageFormat(value string) DecodeProtoV2Attr {
+	return func(m optionalAttr) {
+		m["message_format"] = value
+	}
+}
+
+// DecodeProtoV2Sanitize sets the optional sanitize attribute to value.
+//
+// value: Whether to sanitize the result or not.
+// If not specified, defaults to false
+func DecodeProtoV2Sanitize(value bool) DecodeProtoV2Attr {
+	return func(m optionalAttr) {
+		m["sanitize"] = value
+	}
+}
+
+// The op extracts fields from a serialized protocol buffers message into tensors.
+//
+// The `decode_proto` op extracts fields from a serialized protocol buffers
+// message into tensors.  The fields in `field_names` are decoded and converted
+// to the corresponding `output_types` if possible.
+//
+// A `message_type` name must be provided to give context for the field names.
+// The actual message descriptor can be looked up either in the linked-in
+// descriptor pool or a filename provided by the caller using the
+// `descriptor_source` attribute.
+//
+// Each output tensor is a dense tensor. This means that it is padded to hold
+// the largest number of repeated elements seen in the input minibatch. (The
+// shape is also padded by one to prevent zero-sized dimensions). The actual
+// repeat counts for each example in the minibatch can be found in the `sizes`
+// output. In many cases the output of `decode_proto` is fed immediately into
+// tf.squeeze if missing values are not a concern. When using tf.squeeze, always
+// pass the squeeze dimension explicitly to avoid surprises.
+//
+// For the most part, the mapping between Proto field types and TensorFlow dtypes
+// is straightforward. However, there are a few special cases:
+//
+// - A proto field that contains a submessage or group can only be converted
+// to `DT_STRING` (the serialized submessage). This is to reduce the complexity
+// of the API. The resulting string can be used as input to another instance of
+// the decode_proto op.
+//
+// - TensorFlow lacks support for unsigned integers. The ops represent uint64
+// types as a `DT_INT64` with the same twos-complement bit pattern (the obvious
+// way). Unsigned int32 values can be represented exactly by specifying type
+// `DT_INT64`, or using twos-complement if the caller specifies `DT_INT32` in
+// the `output_types` attribute.
+//
+// Both binary and text proto serializations are supported, and can be
+// chosen using the `format` attribute.
+//
+// The `descriptor_source` attribute selects the source of protocol
+// descriptors to consult when looking up `message_type`. This may be:
+//
+// - An empty string  or "local://", in which case protocol descriptors are
+// created for C++ (not Python) proto definitions linked to the binary.
+//
+// - A file, in which case protocol descriptors are created from the file,
+// which is expected to contain a `FileDescriptorSet` serialized as a string.
+// NOTE: You can build a `descriptor_source` file using the `--descriptor_set_out`
+// and `--include_imports` options to the protocol compiler `protoc`.
+//
+// - A "bytes://<bytes>", in which protocol descriptors are created from `<bytes>`,
+// which is expected to be a `FileDescriptorSet` serialized as a string.
+//
+// Arguments:
+//	bytes: Tensor of serialized protos with shape `batch_shape`.
+//	message_type: Name of the proto message type to decode.
+//	field_names: List of strings containing proto field names. An extension field can be decoded
+// by using its full name, e.g. EXT_PACKAGE.EXT_FIELD_NAME.
+//	output_types: List of TF types to use for the respective field in field_names.
+//
+// Returns:
+//	sizes: Tensor of int32 with shape `[batch_shape, len(field_names)]`.
+// Each entry is the number of values found for the corresponding field.
+// Optional fields may have 0 or 1 values.
+//	values: List of tensors containing values for the corresponding field.
+// `values[i]` has datatype `output_types[i]`
+// and shape `[batch_shape, max(sizes[...,i])]`.
+func DecodeProtoV2(scope *Scope, bytes tf.Output, message_type string, field_names []string, output_types []tf.DataType, optional ...DecodeProtoV2Attr) (sizes tf.Output, values []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"message_type": message_type, "field_names": field_names, "output_types": output_types}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodeProtoV2",
+		Input: []tf.Input{
+			bytes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	sizes = op.Output(idx)
+	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
+		scope.UpdateErr("DecodeProtoV2", err)
+		return
+	}
+	return sizes, values
+}
+
+// Output the logits for the given input data
+//
+// Arguments:
+//	tree_handle: Handle to the tree resource.
+//	dense_features: Rank 2 dense features tensor.
+//	logits_dimension: Scalar, dimension of the logits.
+//
+// Returns The logits predictions from the tree for each instance in the batch.
+func TensorForestTreePredict(scope *Scope, tree_handle tf.Output, dense_features tf.Output, logits_dimension int64) (logits tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	attrs := map[string]interface{}{"logits_dimension": logits_dimension}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "BoostedTreesCalculateBestFeatureSplit",
+		Type: "TensorForestTreePredict",
 		Input: []tf.Input{
-			node_id_range, stats_summary, l1, l2, tree_complexity, min_node_weight,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6)
-}
-
-// PrefetchDatasetAttr is an optional argument to PrefetchDataset.
-type PrefetchDatasetAttr func(optionalAttr)
-
-// PrefetchDatasetSlackPeriod sets the optional slack_period attribute to value.
-// If not specified, defaults to 0
-func PrefetchDatasetSlackPeriod(value int64) PrefetchDatasetAttr {
-	return func(m optionalAttr) {
-		m["slack_period"] = value
-	}
-}
-
-// Creates a dataset that asynchronously prefetches elements from `input_dataset`.
-//
-// Arguments:
-//
-//	buffer_size: The maximum number of elements to buffer in an iterator over
-// this dataset.
-//
-//
-func PrefetchDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...PrefetchDatasetAttr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "PrefetchDataset",
-		Input: []tf.Input{
-			input_dataset, buffer_size,
+			tree_handle, dense_features,
 		},
 		Attrs: attrs,
 	}
@@ -8692,176 +9924,94 @@ func PrefetchDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Outpu
 	return op.Output(0)
 }
 
-// UniqueAttr is an optional argument to Unique.
-type UniqueAttr func(optionalAttr)
+// EncodeProtoAttr is an optional argument to EncodeProto.
+type EncodeProtoAttr func(optionalAttr)
 
-// UniqueOutIdx sets the optional out_idx attribute to value.
-// If not specified, defaults to DT_INT32
-func UniqueOutIdx(value tf.DataType) UniqueAttr {
+// EncodeProtoDescriptorSource sets the optional descriptor_source attribute to value.
+// If not specified, defaults to "local://"
+func EncodeProtoDescriptorSource(value string) EncodeProtoAttr {
 	return func(m optionalAttr) {
-		m["out_idx"] = value
+		m["descriptor_source"] = value
 	}
 }
 
-// Finds unique elements in a 1-D tensor.
+// The op serializes protobuf messages provided in the input tensors.
 //
-// This operation returns a tensor `y` containing all of the unique elements of `x`
-// sorted in the same order that they occur in `x`. This operation also returns a
-// tensor `idx` the same size as `x` that contains the index of each value of `x`
-// in the unique output `y`. In other words:
+// The types of the tensors in `values` must match the schema for the fields
+// specified in `field_names`. All the tensors in `values` must have a common
+// shape prefix, *batch_shape*.
 //
-// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
+// The `sizes` tensor specifies repeat counts for each field.  The repeat count
+// (last dimension) of a each tensor in `values` must be greater than or equal
+// to corresponding repeat count in `sizes`.
 //
-// For example:
+// A `message_type` name must be provided to give context for the field names.
+// The actual message descriptor can be looked up either in the linked-in
+// descriptor pool or a filename provided by the caller using the
+// `descriptor_source` attribute.
 //
-// ```
-// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
-// y, idx = unique(x)
-// y ==> [1, 2, 4, 7, 8]
-// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
-// ```
+// For the most part, the mapping between Proto field types and TensorFlow dtypes
+// is straightforward. However, there are a few special cases:
+//
+// - A proto field that contains a submessage or group can only be converted
+// to `DT_STRING` (the serialized submessage). This is to reduce the complexity
+// of the API. The resulting string can be used as input to another instance of
+// the decode_proto op.
+//
+// - TensorFlow lacks support for unsigned integers. The ops represent uint64
+// types as a `DT_INT64` with the same twos-complement bit pattern (the obvious
+// way). Unsigned int32 values can be represented exactly by specifying type
+// `DT_INT64`, or using twos-complement if the caller specifies `DT_INT32` in
+// the `output_types` attribute.
+//
+// The `descriptor_source` attribute selects the source of protocol
+// descriptors to consult when looking up `message_type`. This may be:
+//
+// - An empty string  or "local://", in which case protocol descriptors are
+// created for C++ (not Python) proto definitions linked to the binary.
+//
+// - A file, in which case protocol descriptors are created from the file,
+// which is expected to contain a `FileDescriptorSet` serialized as a string.
+// NOTE: You can build a `descriptor_source` file using the `--descriptor_set_out`
+// and `--include_imports` options to the protocol compiler `protoc`.
+//
+// - A "bytes://<bytes>", in which protocol descriptors are created from `<bytes>`,
+// which is expected to be a `FileDescriptorSet` serialized as a string.
 //
 // Arguments:
-//	x: 1-D.
+//	sizes: Tensor of int32 with shape `[batch_shape, len(field_names)]`.
+//	values: List of tensors containing values for the corresponding field.
+//	field_names: List of strings containing proto field names.
+//	message_type: Name of the proto message type to decode.
 //
-// Returns 1-D.1-D.
-func Unique(scope *Scope, x tf.Output, optional ...UniqueAttr) (y tf.Output, idx tf.Output) {
+// Returns Tensor of serialized protos with shape `batch_shape`.
+func EncodeProto(scope *Scope, sizes tf.Output, values []tf.Output, field_names []string, message_type string, optional ...EncodeProtoAttr) (bytes tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"field_names": field_names, "message_type": message_type}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Unique",
+		Type: "EncodeProto",
 		Input: []tf.Input{
-			x,
+			sizes, tf.OutputList(values),
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// CudnnRNNAttr is an optional argument to CudnnRNN.
-type CudnnRNNAttr func(optionalAttr)
-
-// CudnnRNNRnnMode sets the optional rnn_mode attribute to value.
-// If not specified, defaults to "lstm"
-func CudnnRNNRnnMode(value string) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["rnn_mode"] = value
-	}
-}
-
-// CudnnRNNInputMode sets the optional input_mode attribute to value.
-// If not specified, defaults to "linear_input"
-func CudnnRNNInputMode(value string) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["input_mode"] = value
-	}
-}
-
-// CudnnRNNDirection sets the optional direction attribute to value.
-// If not specified, defaults to "unidirectional"
-func CudnnRNNDirection(value string) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["direction"] = value
-	}
-}
-
-// CudnnRNNDropout sets the optional dropout attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNDropout(value float32) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["dropout"] = value
-	}
-}
-
-// CudnnRNNSeed sets the optional seed attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNSeed(value int64) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// CudnnRNNSeed2 sets the optional seed2 attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNSeed2(value int64) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// CudnnRNNIsTraining sets the optional is_training attribute to value.
-// If not specified, defaults to true
-func CudnnRNNIsTraining(value bool) CudnnRNNAttr {
-	return func(m optionalAttr) {
-		m["is_training"] = value
-	}
-}
-
-// A RNN backed by cuDNN.
-//
-// Computes the RNN from the input and initial states, with respect to the params
-// buffer.
-//
-// rnn_mode: Indicates the type of the RNN model.
-// input_mode: Indicate whether there is a linear projection between the input and
-//   the actual computation before the first layer. 'skip_input' is only allowed
-//   when input_size == num_units; 'auto_select' implies 'skip_input' when
-//   input_size == num_units; otherwise, it implies 'linear_input'.
-// direction: Indicates whether a bidirectional model will be used. Should be
-//   "unidirectional" or "bidirectional".
-// dropout: Dropout probability. When set to 0., dropout is disabled.
-// seed: The 1st part of a seed to initialize dropout.
-// seed2: The 2nd part of a seed to initialize dropout.
-// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size].
-// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size,
-//     num_units].
-// input_c: For LSTM, a 3-D tensor with the shape of
-//     [num_layer * dir, batch, num_units]. For other models, it is ignored.
-// params: A 1-D tensor that contains the weights and biases in an opaque layout.
-//     The size must be created through CudnnRNNParamsSize, and initialized
-//     separately. Note that they might not be compatible across different
-//     generations. So it is a good idea to save and restore
-// output: A 3-D tensor with the shape of [seq_length, batch_size,
-//     dir * num_units].
-// output_h: The same shape has input_h.
-// output_c: The same shape as input_c for LSTM. An empty tensor for other models.
-// is_training: Indicates whether this operation is used for inferenece or
-//   training.
-// reserve_space: An opaque tensor that can be used in backprop calculation. It
-//   is only produced if is_training is false.
-func CudnnRNN(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, optional ...CudnnRNNAttr) (output tf.Output, output_h tf.Output, output_c tf.Output, reserve_space tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CudnnRNN",
-		Input: []tf.Input{
-			input, input_h, input_c, params,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+	return op.Output(0)
 }
 
 // Creates a dataset that contains the unique elements of `input_dataset`.
-func ExperimentalUniqueDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+func UniqueDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "ExperimentalUniqueDataset",
+		Type: "UniqueDataset",
 		Input: []tf.Input{
 			input_dataset,
 		},
@@ -8905,99 +10055,6 @@ func UnbatchDataset(scope *Scope, input_dataset tf.Output, output_types []tf.Dat
 	return op.Output(0)
 }
 
-// TensorArrayV3Attr is an optional argument to TensorArrayV3.
-type TensorArrayV3Attr func(optionalAttr)
-
-// TensorArrayV3ElementShape sets the optional element_shape attribute to value.
-//
-// value: The expected shape of an element, if known. Used to
-// validate the shapes of TensorArray elements. If this shape is not
-// fully specified, gathering zero-size TensorArrays is an error.
-// If not specified, defaults to <unknown_rank:true >
-func TensorArrayV3ElementShape(value tf.Shape) TensorArrayV3Attr {
-	return func(m optionalAttr) {
-		m["element_shape"] = value
-	}
-}
-
-// TensorArrayV3DynamicSize sets the optional dynamic_size attribute to value.
-//
-// value: A boolean that determines whether writes to the TensorArray
-// are allowed to grow the size.  By default, this is not allowed.
-// If not specified, defaults to false
-func TensorArrayV3DynamicSize(value bool) TensorArrayV3Attr {
-	return func(m optionalAttr) {
-		m["dynamic_size"] = value
-	}
-}
-
-// TensorArrayV3ClearAfterRead sets the optional clear_after_read attribute to value.
-//
-// value: If true (default), Tensors in the TensorArray are cleared
-// after being read.  This disables multiple read semantics but allows early
-// release of memory.
-// If not specified, defaults to true
-func TensorArrayV3ClearAfterRead(value bool) TensorArrayV3Attr {
-	return func(m optionalAttr) {
-		m["clear_after_read"] = value
-	}
-}
-
-// TensorArrayV3IdenticalElementShapes sets the optional identical_element_shapes attribute to value.
-//
-// value: If true (default is false), then all
-// elements in the TensorArray will be expected to have have identical shapes.
-// This allows certain behaviors, like dynamically checking for
-// consistent shapes on write, and being able to fill in properly
-// shaped zero tensors on stack -- even if the element_shape attribute
-// is not fully defined.
-// If not specified, defaults to false
-func TensorArrayV3IdenticalElementShapes(value bool) TensorArrayV3Attr {
-	return func(m optionalAttr) {
-		m["identical_element_shapes"] = value
-	}
-}
-
-// TensorArrayV3TensorArrayName sets the optional tensor_array_name attribute to value.
-//
-// value: Overrides the name used for the temporary tensor_array
-// resource. Default value is the name of the 'TensorArray' op (which
-// is guaranteed unique).
-// If not specified, defaults to ""
-func TensorArrayV3TensorArrayName(value string) TensorArrayV3Attr {
-	return func(m optionalAttr) {
-		m["tensor_array_name"] = value
-	}
-}
-
-// An array of Tensors of given size.
-//
-// Write data via Write and read via Read or Pack.
-//
-// Arguments:
-//	size: The size of the array.
-//	dtype: The type of the elements on the tensor_array.
-//
-// Returns The handle to the TensorArray.A scalar used to control gradient flow.
-func TensorArrayV3(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV3Attr) (handle tf.Output, flow tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayV3",
-		Input: []tf.Input{
-			size,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
 // Creates a dataset that uses a custom thread pool to compute `input_dataset`.
 //
 // Arguments:
@@ -9021,108 +10078,18 @@ func ExperimentalThreadPoolDataset(scope *Scope, input_dataset tf.Output, thread
 	return op.Output(0)
 }
 
-// Creates a dataset that uses a custom thread pool to compute `input_dataset`.
-//
-// Arguments:
-//
-//	thread_pool: A resource produced by the ThreadPoolHandle op.
-//
-//
-func ThreadPoolDataset(scope *Scope, input_dataset tf.Output, thread_pool tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// Gets the next output from the given iterator as an Optional variant.
+func IteratorGetNextAsOptional(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (optional tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "ThreadPoolDataset",
-		Input: []tf.Input{
-			input_dataset, thread_pool,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Partitions `data` into `num_partitions` tensors using indices from `partitions`.
-//
-// For each index tuple `js` of size `partitions.ndim`, the slice `data[js, ...]`
-// becomes part of `outputs[partitions[js]]`.  The slices with `partitions[js] = i`
-// are placed in `outputs[i]` in lexicographic order of `js`, and the first
-// dimension of `outputs[i]` is the number of entries in `partitions` equal to `i`.
-// In detail,
-//
-// ```python
-//     outputs[i].shape = [sum(partitions == i)] + data.shape[partitions.ndim:]
-//
-//     outputs[i] = pack([data[js, ...] for js if partitions[js] == i])
-// ```
-//
-// `data.shape` must start with `partitions.shape`.
-//
-// For example:
-//
-// ```python
-//     # Scalar partitions.
-//     partitions = 1
-//     num_partitions = 2
-//     data = [10, 20]
-//     outputs[0] = []  # Empty with shape [0, 2]
-//     outputs[1] = [[10, 20]]
-//
-//     # Vector partitions.
-//     partitions = [0, 0, 1, 1, 0]
-//     num_partitions = 2
-//     data = [10, 20, 30, 40, 50]
-//     outputs[0] = [10, 20, 50]
-//     outputs[1] = [30, 40]
-// ```
-//
-// See `dynamic_stitch` for an example on how to merge partitions back.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/DynamicPartition.png" alt>
-// </div>
-//
-// Arguments:
-//
-//	partitions: Any shape.  Indices in the range `[0, num_partitions)`.
-//	num_partitions: The number of partitions to output.
-func DynamicPartition(scope *Scope, data tf.Output, partitions tf.Output, num_partitions int64) (outputs []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_partitions": num_partitions}
-	opspec := tf.OpSpec{
-		Type: "DynamicPartition",
-		Input: []tf.Input{
-			data, partitions,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
-		scope.UpdateErr("DynamicPartition", err)
-		return
-	}
-	return outputs
-}
-
-// Produces a summary of any statistics recorded by the given statistics manager.
-func ExperimentalStatsAggregatorSummary(scope *Scope, iterator tf.Output) (summary tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalStatsAggregatorSummary",
+		Type: "IteratorGetNextAsOptional",
 		Input: []tf.Input{
 			iterator,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
@@ -9143,26 +10110,6 @@ func StatsAggregatorSummary(scope *Scope, iterator tf.Output) (summary tf.Output
 	return op.Output(0)
 }
 
-// Returns the number of gradients aggregated in the given accumulators.
-//
-// Arguments:
-//	handle: The handle to an accumulator.
-//
-// Returns The number of gradients aggregated in the given accumulator.
-func ResourceAccumulatorNumAccumulated(scope *Scope, handle tf.Output) (num_accumulated tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceAccumulatorNumAccumulated",
-		Input: []tf.Input{
-			handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // ExperimentalStatsAggregatorHandleAttr is an optional argument to ExperimentalStatsAggregatorHandle.
 type ExperimentalStatsAggregatorHandleAttr func(optionalAttr)
 
@@ -9200,63 +10147,6 @@ func ExperimentalStatsAggregatorHandle(scope *Scope, optional ...ExperimentalSta
 	return op.Output(0)
 }
 
-// ExperimentalThreadPoolHandleAttr is an optional argument to ExperimentalThreadPoolHandle.
-type ExperimentalThreadPoolHandleAttr func(optionalAttr)
-
-// ExperimentalThreadPoolHandleMaxIntraOpParallelism sets the optional max_intra_op_parallelism attribute to value.
-//
-// value: The maximum degree of parallelism to use within operations that execute on this
-// threadpool.
-// If not specified, defaults to 1
-func ExperimentalThreadPoolHandleMaxIntraOpParallelism(value int64) ExperimentalThreadPoolHandleAttr {
-	return func(m optionalAttr) {
-		m["max_intra_op_parallelism"] = value
-	}
-}
-
-// ExperimentalThreadPoolHandleContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func ExperimentalThreadPoolHandleContainer(value string) ExperimentalThreadPoolHandleAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// ExperimentalThreadPoolHandleSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func ExperimentalThreadPoolHandleSharedName(value string) ExperimentalThreadPoolHandleAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Creates a dataset that uses a custom thread pool to compute `input_dataset`.
-//
-// Arguments:
-//	num_threads: The number of threads in the thread pool.
-//	display_name: A human-readable name for the threads that may be visible in some
-// visualizations.
-// threadpool.
-//
-// Returns A resource that can be consumed by one or more ExperimentalThreadPoolDataset
-// ops.
-func ExperimentalThreadPoolHandle(scope *Scope, num_threads int64, display_name string, optional ...ExperimentalThreadPoolHandleAttr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_threads": num_threads, "display_name": display_name}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalThreadPoolHandle",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // StatsAggregatorHandleAttr is an optional argument to StatsAggregatorHandle.
 type StatsAggregatorHandleAttr func(optionalAttr)
 
@@ -9318,104 +10208,6 @@ func ExperimentalSqlDataset(scope *Scope, driver_name tf.Output, data_source_nam
 	return op.Output(0)
 }
 
-// Creates a dataset that executes a SQL query and emits rows of the result set.
-//
-// Arguments:
-//	driver_name: The database type. Currently, the only supported type is 'sqlite'.
-//	data_source_name: A connection string to connect to the database.
-//	query: A SQL query to execute.
-//
-//
-func SqlDataset(scope *Scope, driver_name tf.Output, data_source_name tf.Output, query tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "SqlDataset",
-		Input: []tf.Input{
-			driver_name, data_source_name, query,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SnapshotDatasetAttr is an optional argument to SnapshotDataset.
-type SnapshotDatasetAttr func(optionalAttr)
-
-// SnapshotDatasetCompression sets the optional compression attribute to value.
-// If not specified, defaults to ""
-func SnapshotDatasetCompression(value string) SnapshotDatasetAttr {
-	return func(m optionalAttr) {
-		m["compression"] = value
-	}
-}
-
-// SnapshotDatasetReaderPathPrefix sets the optional reader_path_prefix attribute to value.
-// If not specified, defaults to ""
-func SnapshotDatasetReaderPathPrefix(value string) SnapshotDatasetAttr {
-	return func(m optionalAttr) {
-		m["reader_path_prefix"] = value
-	}
-}
-
-// SnapshotDatasetWriterPathPrefix sets the optional writer_path_prefix attribute to value.
-// If not specified, defaults to ""
-func SnapshotDatasetWriterPathPrefix(value string) SnapshotDatasetAttr {
-	return func(m optionalAttr) {
-		m["writer_path_prefix"] = value
-	}
-}
-
-// SnapshotDatasetShardSizeBytes sets the optional shard_size_bytes attribute to value.
-// If not specified, defaults to 10737418240
-func SnapshotDatasetShardSizeBytes(value int64) SnapshotDatasetAttr {
-	return func(m optionalAttr) {
-		m["shard_size_bytes"] = value
-	}
-}
-
-// SnapshotDatasetPendingSnapshotExpirySeconds sets the optional pending_snapshot_expiry_seconds attribute to value.
-// If not specified, defaults to 86400
-func SnapshotDatasetPendingSnapshotExpirySeconds(value int64) SnapshotDatasetAttr {
-	return func(m optionalAttr) {
-		m["pending_snapshot_expiry_seconds"] = value
-	}
-}
-
-// Creates a dataset that will write to / read from a snapshot.
-//
-// This dataset attempts to determine whether a valid snapshot exists at the
-// `snapshot_path`, and reads from the snapshot in lieu of using `input_dataset`.
-// If not, it will run the preprocessing pipeline as usual, and write out a
-// snapshot of the data processed for future use.
-//
-// Arguments:
-//	input_dataset: A variant tensor representing the input dataset.
-//	path: The path we should write snapshots to / read snapshots from.
-//
-//
-func SnapshotDataset(scope *Scope, input_dataset tf.Output, path tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...SnapshotDatasetAttr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SnapshotDataset",
-		Input: []tf.Input{
-			input_dataset, path,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // Creates a dataset that passes a sliding window over `input_dataset`.
 //
 // Arguments:
@@ -9444,60 +10236,84 @@ func ExperimentalSlidingWindowDataset(scope *Scope, input_dataset tf.Output, win
 	return op.Output(0)
 }
 
-// Creates a dataset that passes a sliding window over `input_dataset`.
+// Deprecated. Use TensorArraySizeV3
 //
-// Arguments:
-//
-//	window_size: A scalar representing the number of elements in the
-// sliding window.
-//	window_shift: A scalar representing the steps moving the sliding window
-// forward in one iteration. It must be positive.
-//	window_stride: A scalar representing the stride of the input elements of the sliding window.
-// It must be positive.
-//
-//
-func SlidingWindowDataset(scope *Scope, input_dataset tf.Output, window_size tf.Output, window_shift tf.Output, window_stride tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// DEPRECATED at GraphDef version 26: Use TensorArraySizeV3
+func TensorArraySizeV2(scope *Scope, handle tf.Output, flow_in tf.Output) (size tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "SlidingWindowDataset",
+		Type: "TensorArraySizeV2",
 		Input: []tf.Input{
-			input_dataset, window_size, window_shift, window_stride,
+			handle, flow_in,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Creates a dataset that contains `rate` elements from the `input_dataset`.
+// Elementwise computes the bitwise right-shift of `x` and `y`.
 //
-// Arguments:
+// Performs a logical shift for unsigned integer types, and an arithmetic shift
+// for signed integer types.
 //
-//	rate: A scalar representing the sample rate of elements from the `input_dataset`
-// that should be taken.
-//	seed: A scalar representing seed of random number generator.
-//	seed2: A scalar representing seed2 of random number generator.
+// If `y` is negative, or greater than or equal to than the width of `x` in bits
+// the result is implementation defined.
 //
+// Example:
 //
-func SamplingDataset(scope *Scope, input_dataset tf.Output, rate tf.Output, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// ```python
+// import tensorflow as tf
+// from tensorflow.python.ops import bitwise_ops
+// import numpy as np
+// dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64]
+//
+// for dtype in dtype_list:
+//   lhs = tf.constant([-1, -5, -3, -14], dtype=dtype)
+//   rhs = tf.constant([5, 0, 7, 11], dtype=dtype)
+//
+//   right_shift_result = bitwise_ops.right_shift(lhs, rhs)
+//
+//   print(right_shift_result)
+//
+// # This will print:
+// # tf.Tensor([-1 -5 -1 -1], shape=(4,), dtype=int8)
+// # tf.Tensor([-1 -5 -1 -1], shape=(4,), dtype=int16)
+// # tf.Tensor([-1 -5 -1 -1], shape=(4,), dtype=int32)
+// # tf.Tensor([-1 -5 -1 -1], shape=(4,), dtype=int64)
+//
+// lhs = np.array([-2, 64, 101, 32], dtype=np.int8)
+// rhs = np.array([-1, -5, -3, -14], dtype=np.int8)
+// bitwise_ops.right_shift(lhs, rhs)
+// # <tf.Tensor: shape=(4,), dtype=int8, numpy=array([ -2,  64, 101,  32], dtype=int8)>
+// ```
+//
+func RightShift(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "SamplingDataset",
+		Type: "RightShift",
 		Input: []tf.Input{
-			input_dataset, rate, seed, seed2,
+			x, y,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
+// RebatchDatasetAttr is an optional argument to RebatchDataset.
+type RebatchDatasetAttr func(optionalAttr)
+
+// RebatchDatasetUseFallback sets the optional use_fallback attribute to value.
+// If not specified, defaults to true
+func RebatchDatasetUseFallback(value bool) RebatchDatasetAttr {
+	return func(m optionalAttr) {
+		m["use_fallback"] = value
+	}
+}
+
 // Creates a dataset that changes the batch size.
 //
 // Creates a dataset that changes the batch size of the dataset to current batch
@@ -9505,20 +10321,46 @@ func SamplingDataset(scope *Scope, input_dataset tf.Output, rate tf.Output, seed
 //
 // Arguments:
 //	input_dataset: A variant tensor representing the input dataset.
-//	num_workers: A scalar representing the number of workers to distribute this batch across. As
+//	num_replicas: A scalar representing the number of replicas to distribute this batch across. As
 // a result of this transformation the current batch size would end up being
 // divided  by this parameter.
 //
 //
-func RebatchDataset(scope *Scope, input_dataset tf.Output, num_workers tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+func RebatchDataset(scope *Scope, input_dataset tf.Output, num_replicas tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...RebatchDatasetAttr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RebatchDataset",
+		Input: []tf.Input{
+			input_dataset, num_replicas,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that uses a custom thread pool to compute `input_dataset`.
+//
+// Arguments:
+//
+//	num_threads: Identifies the number of threads to use for the private threadpool.
+//
+//
+func ExperimentalPrivateThreadPoolDataset(scope *Scope, input_dataset tf.Output, num_threads tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "RebatchDataset",
+		Type: "ExperimentalPrivateThreadPoolDataset",
 		Input: []tf.Input{
-			input_dataset, num_workers,
+			input_dataset, num_threads,
 		},
 		Attrs: attrs,
 	}
@@ -9526,180 +10368,77 @@ func RebatchDataset(scope *Scope, input_dataset tf.Output, num_workers tf.Output
 	return op.Output(0)
 }
 
-// Add the quantile summaries to each quantile stream resource.
-//
-// An op that adds a list of quantile summaries to a quantile stream resource. Each
-// summary Tensor is rank 2, containing summaries (value, weight, min_rank, max_rank)
-// for a single feature.
+// Creates a dataset that uses a custom thread pool to compute `input_dataset`.
 //
 // Arguments:
-//	quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource.
-//	summaries: string; List of Rank 2 Tensor each containing the summaries for a single feature.
 //
-// Returns the created operation.
-func BoostedTreesQuantileStreamResourceAddSummaries(scope *Scope, quantile_stream_resource_handle tf.Output, summaries []tf.Output) (o *tf.Operation) {
+//	num_threads: Identifies the number of threads to use for the private threadpool.
+//
+//
+func PrivateThreadPoolDataset(scope *Scope, input_dataset tf.Output, num_threads tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "BoostedTreesQuantileStreamResourceAddSummaries",
+		Type: "PrivateThreadPoolDataset",
 		Input: []tf.Input{
-			quantile_stream_resource_handle, tf.OutputList(summaries),
+			input_dataset, num_threads,
 		},
+		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// FractionalAvgPoolAttr is an optional argument to FractionalAvgPool.
-type FractionalAvgPoolAttr func(optionalAttr)
+// ExperimentalParseExampleDatasetAttr is an optional argument to ExperimentalParseExampleDataset.
+type ExperimentalParseExampleDatasetAttr func(optionalAttr)
 
-// FractionalAvgPoolPseudoRandom sets the optional pseudo_random attribute to value.
-//
-// value: When set to True, generates the pooling sequence in a
-// pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
-// Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for
-// difference between pseudorandom and random.
+// ExperimentalParseExampleDatasetSloppy sets the optional sloppy attribute to value.
 // If not specified, defaults to false
-func FractionalAvgPoolPseudoRandom(value bool) FractionalAvgPoolAttr {
+func ExperimentalParseExampleDatasetSloppy(value bool) ExperimentalParseExampleDatasetAttr {
 	return func(m optionalAttr) {
-		m["pseudo_random"] = value
+		m["sloppy"] = value
 	}
 }
 
-// FractionalAvgPoolOverlapping sets the optional overlapping attribute to value.
-//
-// value: When set to True, it means when pooling, the values at the boundary
-// of adjacent pooling cells are used by both cells. For example:
-//
-// `index  0  1  2  3  4`
-//
-// `value  20 5  16 3  7`
-//
-// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
-// The result would be [41/3, 26/3] for fractional avg pooling.
-// If not specified, defaults to false
-func FractionalAvgPoolOverlapping(value bool) FractionalAvgPoolAttr {
-	return func(m optionalAttr) {
-		m["overlapping"] = value
-	}
-}
-
-// FractionalAvgPoolDeterministic sets the optional deterministic attribute to value.
-//
-// value: When set to True, a fixed pooling region will be used when
-// iterating over a FractionalAvgPool node in the computation graph. Mainly used
-// in unit test to make FractionalAvgPool deterministic.
-// If not specified, defaults to false
-func FractionalAvgPoolDeterministic(value bool) FractionalAvgPoolAttr {
-	return func(m optionalAttr) {
-		m["deterministic"] = value
-	}
-}
-
-// FractionalAvgPoolSeed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func FractionalAvgPoolSeed(value int64) FractionalAvgPoolAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// FractionalAvgPoolSeed2 sets the optional seed2 attribute to value.
-//
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func FractionalAvgPoolSeed2(value int64) FractionalAvgPoolAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Performs fractional average pooling on the input.
-//
-// Fractional average pooling is similar to Fractional max pooling in the pooling
-// region generation step. The only difference is that after pooling regions are
-// generated, a mean operation is performed instead of a max operation in each
-// pooling region.
+// Transforms `input_dataset` containing `Example` protos as vectors of DT_STRING into a dataset of `Tensor` or `SparseTensor` objects representing the parsed features.
 //
 // Arguments:
-//	value: 4-D with shape `[batch, height, width, channels]`.
-//	pooling_ratio: Pooling ratio for each dimension of `value`, currently only
-// supports row and col dimension and should be >= 1.0. For example, a valid
-// pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
-// must be 1.0 because we don't allow pooling on batch and channels
-// dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
-// respectively.
 //
-// Returns output tensor after fractional avg pooling.row pooling sequence, needed to calculate gradient.column pooling sequence, needed to calculate gradient.
-func FractionalAvgPool(scope *Scope, value tf.Output, pooling_ratio []float32, optional ...FractionalAvgPoolAttr) (output tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output) {
+//
+//	dense_defaults: A dict mapping string keys to `Tensor`s.
+// The keys of the dict must match the dense_keys of the feature.
+//	sparse_keys: A list of string keys in the examples features.
+// The results for these keys will be returned as `SparseTensor` objects.
+//	dense_keys: A list of Ndense string Tensors (scalars).
+// The keys expected in the Examples features associated with dense values.
+//	sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
+// Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
+// and `tf.string` (`BytesList`) are supported.
+//	dense_shapes: List of tuples with the same length as `dense_keys`.
+// The shape of the data for each dense feature referenced by `dense_keys`.
+// Required for any input tensors identified by `dense_keys`.  Must be
+// either fully defined, or may contain an unknown first dimension.
+// An unknown first dimension means the feature is treated as having
+// a variable number of blocks, and the output shape along this dimension
+// is considered unknown at graph build time.  Padding is applied for
+// minibatch elements smaller than the maximum number of blocks for the
+// given feature along this dimension.
+//	output_types: The type list for the return values.
+//	output_shapes: The list of shapes being produced.
+func ExperimentalParseExampleDataset(scope *Scope, input_dataset tf.Output, num_parallel_calls tf.Output, dense_defaults []tf.Output, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ExperimentalParseExampleDatasetAttr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"pooling_ratio": pooling_ratio}
+	attrs := map[string]interface{}{"sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes, "output_types": output_types, "output_shapes": output_shapes}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "FractionalAvgPool",
+		Type: "ExperimentalParseExampleDataset",
 		Input: []tf.Input{
-			value,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// FractionalMaxPoolGradAttr is an optional argument to FractionalMaxPoolGrad.
-type FractionalMaxPoolGradAttr func(optionalAttr)
-
-// FractionalMaxPoolGradOverlapping sets the optional overlapping attribute to value.
-//
-// value: When set to True, it means when pooling, the values at the boundary
-// of adjacent pooling cells are used by both cells. For example:
-//
-// `index  0  1  2  3  4`
-//
-// `value  20 5  16 3  7`
-//
-// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
-// The result would be [20, 16] for fractional max pooling.
-// If not specified, defaults to false
-func FractionalMaxPoolGradOverlapping(value bool) FractionalMaxPoolGradAttr {
-	return func(m optionalAttr) {
-		m["overlapping"] = value
-	}
-}
-
-// Computes gradient of the FractionalMaxPool function.
-//
-// Arguments:
-//	orig_input: Original input for `fractional_max_pool`
-//	orig_output: Original output for `fractional_max_pool`
-//	out_backprop: 4-D with shape `[batch, height, width, channels]`.  Gradients
-// w.r.t. the output of `fractional_max_pool`.
-//	row_pooling_sequence: row pooling sequence, form pooling region with
-// col_pooling_sequence.
-//	col_pooling_sequence: column pooling sequence, form pooling region with
-// row_pooling sequence.
-//
-// Returns 4-D.  Gradients w.r.t. the input of `fractional_max_pool`.
-func FractionalMaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalMaxPoolGradAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FractionalMaxPoolGrad",
-		Input: []tf.Input{
-			orig_input, orig_output, out_backprop, row_pooling_sequence, col_pooling_sequence,
+			input_dataset, num_parallel_calls, tf.OutputList(dense_defaults),
 		},
 		Attrs: attrs,
 	}
@@ -9707,1044 +10446,76 @@ func FractionalMaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Ou
 	return op.Output(0)
 }
 
-// ResourceApplyAdagradDAAttr is an optional argument to ResourceApplyAdagradDA.
-type ResourceApplyAdagradDAAttr func(optionalAttr)
+// GenerateVocabRemappingAttr is an optional argument to GenerateVocabRemapping.
+type GenerateVocabRemappingAttr func(optionalAttr)
 
-// ResourceApplyAdagradDAUseLocking sets the optional use_locking attribute to value.
+// GenerateVocabRemappingOldVocabSize sets the optional old_vocab_size attribute to value.
 //
-// value: If True, updating of the var and accum tensors will be protected by
-// a lock; otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceApplyAdagradDAUseLocking(value bool) ResourceApplyAdagradDAAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the proximal adagrad scheme.
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	gradient_accumulator: Should be from a Variable().
-//	gradient_squared_accumulator: Should be from a Variable().
-//	grad: The gradient.
-//	lr: Scaling factor. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	global_step: Training step number. Must be a scalar.
-//
-// Returns the created operation.
-func ResourceApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceApplyAdagradDAAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdagradDA",
-		Input: []tf.Input{
-			var_, gradient_accumulator, gradient_squared_accumulator, grad, lr, l1, l2, global_step,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// FractionalMaxPoolAttr is an optional argument to FractionalMaxPool.
-type FractionalMaxPoolAttr func(optionalAttr)
-
-// FractionalMaxPoolPseudoRandom sets the optional pseudo_random attribute to value.
-//
-// value: When set to True, generates the pooling sequence in a
-// pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
-// Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for
-// difference between pseudorandom and random.
-// If not specified, defaults to false
-func FractionalMaxPoolPseudoRandom(value bool) FractionalMaxPoolAttr {
-	return func(m optionalAttr) {
-		m["pseudo_random"] = value
-	}
-}
-
-// FractionalMaxPoolOverlapping sets the optional overlapping attribute to value.
-//
-// value: When set to True, it means when pooling, the values at the boundary
-// of adjacent pooling cells are used by both cells. For example:
-//
-// `index  0  1  2  3  4`
-//
-// `value  20 5  16 3  7`
-//
-// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
-// The result would be [20, 16] for fractional max pooling.
-// If not specified, defaults to false
-func FractionalMaxPoolOverlapping(value bool) FractionalMaxPoolAttr {
-	return func(m optionalAttr) {
-		m["overlapping"] = value
-	}
-}
-
-// FractionalMaxPoolDeterministic sets the optional deterministic attribute to value.
-//
-// value: When set to True, a fixed pooling region will be used when
-// iterating over a FractionalMaxPool node in the computation graph. Mainly used
-// in unit test to make FractionalMaxPool deterministic.
-// If not specified, defaults to false
-func FractionalMaxPoolDeterministic(value bool) FractionalMaxPoolAttr {
-	return func(m optionalAttr) {
-		m["deterministic"] = value
-	}
-}
-
-// FractionalMaxPoolSeed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func FractionalMaxPoolSeed(value int64) FractionalMaxPoolAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// FractionalMaxPoolSeed2 sets the optional seed2 attribute to value.
-//
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func FractionalMaxPoolSeed2(value int64) FractionalMaxPoolAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Performs fractional max pooling on the input.
-//
-// Fractional max pooling is slightly different than regular max pooling.  In
-// regular max pooling, you downsize an input set by taking the maximum value of
-// smaller N x N subsections of the set (often 2x2), and try to reduce the set by
-// a factor of N, where N is an integer.  Fractional max pooling, as you might
-// expect from the word "fractional", means that the overall reduction ratio N
-// does not have to be an integer.
-//
-// The sizes of the pooling regions are generated randomly but are fairly uniform.
-// For example, let's look at the height dimension, and the constraints on the
-// list of rows that will be pool boundaries.
-//
-// First we define the following:
-//
-// 1.  input_row_length : the number of rows from the input set
-// 2.  output_row_length : which will be smaller than the input
-// 3.  alpha = input_row_length / output_row_length : our reduction ratio
-// 4.  K = floor(alpha)
-// 5.  row_pooling_sequence : this is the result list of pool boundary rows
-//
-// Then, row_pooling_sequence should satisfy:
-//
-// 1.  a[0] = 0 : the first value of the sequence is 0
-// 2.  a[end] = input_row_length : the last value of the sequence is the size
-// 3.  K <= (a[i+1] - a[i]) <= K+1 : all intervals are K or K+1 size
-// 4.  length(row_pooling_sequence) = output_row_length+1
-//
-// For more details on fractional max pooling, see this paper:
-// [Benjamin Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071)
-//
-// Arguments:
-//	value: 4-D with shape `[batch, height, width, channels]`.
-//	pooling_ratio: Pooling ratio for each dimension of `value`, currently only
-// supports row and col dimension and should be >= 1.0. For example, a valid
-// pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
-// must be 1.0 because we don't allow pooling on batch and channels
-// dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
-// respectively.
-//
-// Returns output tensor after fractional max pooling.row pooling sequence, needed to calculate gradient.column pooling sequence, needed to calculate gradient.
-func FractionalMaxPool(scope *Scope, value tf.Output, pooling_ratio []float32, optional ...FractionalMaxPoolAttr) (output tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"pooling_ratio": pooling_ratio}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FractionalMaxPool",
-		Input: []tf.Input{
-			value,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// PrelinearizeTupleAttr is an optional argument to PrelinearizeTuple.
-type PrelinearizeTupleAttr func(optionalAttr)
-
-// PrelinearizeTupleLayouts sets the optional layouts attribute to value.
-//
-// value: A vector holding the requested layout in minor-to-major sequence for all the
-// tuple shapes in the order the shapes appear in the "shapes" input. The layout
-// elements for a sub-shape can be set to -1 in which case the corresponding layout
-// will be computed by the infeed operation.
-// If not specified, defaults to <>
-func PrelinearizeTupleLayouts(value []int64) PrelinearizeTupleAttr {
-	return func(m optionalAttr) {
-		m["layouts"] = value
-	}
-}
-
-// An op which linearizes multiple Tensor values to an opaque variant tensor.
-//
-// Arguments:
-//	inputs: A list of tensors that will be provided using the infeed mechanism.
-//	shapes: The shapes of each tensor in `inputs`.
-func PrelinearizeTuple(scope *Scope, inputs []tf.Output, shapes []tf.Shape, optional ...PrelinearizeTupleAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"shapes": shapes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "PrelinearizeTuple",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes exponential of x element-wise.  \\(y = e^x\\).
-//
-//   This function computes the exponential of every element in the input tensor.
-//   i.e. `exp(x)` or `e^(x)`, where `x` is the input tensor.
-//   `e` denotes Euler's number and is approximately equal to 2.718281.
-//   Output is positive for any real input.
-//
-//   ```python
-//   x = tf.constant(2.0)
-//   tf.math.exp(x) ==> 7.389056
-//
-//   x = tf.constant([2.0, 8.0])
-//   tf.math.exp(x) ==> array([7.389056, 2980.958], dtype=float32)
-//   ```
-//
-//   For complex numbers, the exponential value is calculated as follows:
-//
-//   ```
-//   e^(x+iy) = e^x * e^iy = e^x * (cos y + i sin y)
-//   ```
-//
-//   Let's consider complex number 1+1j as an example.
-//   e^1 * (cos 1 + i sin 1) = 2.7182818284590 * (0.54030230586+0.8414709848j)
-//
-//   ```python
-//   x = tf.constant(1 + 1j)
-//   tf.math.exp(x) ==> 1.4686939399158851+2.2873552871788423j
-//   ```
-func Exp(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Exp",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Says whether the targets are in the top `K` predictions.
-//
-// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the
-// prediction for the target class is among the top `k` predictions among
-// all predictions for example `i`. Note that the behavior of `InTopK` differs
-// from the `TopK` op in its handling of ties; if multiple classes have the
-// same prediction value and straddle the top-`k` boundary, all of those
-// classes are considered to be in the top `k`.
-//
-// More formally, let
-//
-//   \\(predictions_i\\) be the predictions for all classes for example `i`,
-//   \\(targets_i\\) be the target class for example `i`,
-//   \\(out_i\\) be the output for example `i`,
-//
-// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$
-//
-// Arguments:
-//	predictions: A `batch_size` x `classes` tensor.
-//	targets: A `batch_size` vector of class ids.
-//	k: Number of top elements to look at for computing precision.
-//
-// Returns Computed Precision at `k` as a `bool Tensor`.
-func InTopK(scope *Scope, predictions tf.Output, targets tf.Output, k int64) (precision tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"k": k}
-	opspec := tf.OpSpec{
-		Type: "InTopK",
-		Input: []tf.Input{
-			predictions, targets,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes softmax cross entropy cost and gradients to backpropagate.
-//
-// Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept
-// a matrix of label probabilities, but rather a single label per row
-// of features.  This label is considered to have probability 1.0 for the
-// given row.
-//
-// Inputs are the logits, not probabilities.
-//
-// Arguments:
-//	features: batch_size x num_classes matrix
-//	labels: batch_size vector with values in [0, num_classes).
-// This is the label for the given minibatch entry.
-//
-// Returns Per example loss (batch_size vector).backpropagated gradients (batch_size x num_classes matrix).
-func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSoftmaxCrossEntropyWithLogits",
-		Input: []tf.Input{
-			features, labels,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Computes softsign gradients for a softsign operation.
-//
-// Arguments:
-//	gradients: The backpropagated gradients to the corresponding softsign operation.
-//	features: The features passed as input to the corresponding softsign operation.
-//
-// Returns The gradients: `gradients / (1 + abs(features)) ** 2`.
-func SoftsignGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SoftsignGrad",
-		Input: []tf.Input{
-			gradients, features,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes gradients for the scaled exponential linear (Selu) operation.
-//
-// Arguments:
-//	gradients: The backpropagated gradients to the corresponding Selu operation.
-//	outputs: The outputs of the corresponding Selu operation.
-//
-// Returns The gradients: `gradients * (outputs + scale * alpha)`
-// if outputs < 0, `scale * gradients` otherwise.
-func SeluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SeluGrad",
-		Input: []tf.Input{
-			gradients, outputs,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes exponential linear: `exp(features) - 1` if < 0, `features` otherwise.
-//
-// See [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)
-// ](http://arxiv.org/abs/1511.07289)
-func Elu(scope *Scope, features tf.Output) (activations tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Elu",
-		Input: []tf.Input{
-			features,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// LeakyReluAttr is an optional argument to LeakyRelu.
-type LeakyReluAttr func(optionalAttr)
-
-// LeakyReluAlpha sets the optional alpha attribute to value.
-// If not specified, defaults to 0.2
-func LeakyReluAlpha(value float32) LeakyReluAttr {
-	return func(m optionalAttr) {
-		m["alpha"] = value
-	}
-}
-
-// Computes rectified linear: `max(features, features * alpha)`.
-func LeakyRelu(scope *Scope, features tf.Output, optional ...LeakyReluAttr) (activations tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LeakyRelu",
-		Input: []tf.Input{
-			features,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// TensorListConcatAttr is an optional argument to TensorListConcat.
-type TensorListConcatAttr func(optionalAttr)
-
-// TensorListConcatElementShape sets the optional element_shape attribute to value.
-// If not specified, defaults to <unknown_rank:true >
-func TensorListConcatElementShape(value tf.Shape) TensorListConcatAttr {
-	return func(m optionalAttr) {
-		m["element_shape"] = value
-	}
-}
-
-// Concats all tensors in the list along the 0th dimension.
-//
-// Requires that all tensors have the same shape except the first dimension.
-//
-// input_handle: The input list.
-// tensor: The concated result.
-// lengths: Output tensor containing sizes of the 0th dimension of tensors in the list, used for computing the gradient.
-//
-func TensorListConcat(scope *Scope, input_handle tf.Output, element_dtype tf.DataType, optional ...TensorListConcatAttr) (tensor tf.Output, lengths tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"element_dtype": element_dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorListConcat",
-		Input: []tf.Input{
-			input_handle,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Computes rectified linear gradients for a Relu operation.
-//
-// Arguments:
-//	gradients: The backpropagated gradients to the corresponding Relu operation.
-//	features: The features passed as input to the corresponding Relu operation, OR
-// the outputs of that operation (both work equivalently).
-//
-// Returns `gradients * (features > 0)`.
-func ReluGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ReluGrad",
-		Input: []tf.Input{
-			gradients, features,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-//     Updates specified rows with values in `v`.
-//
-//     Computes `x[i, :] = v; return x`.
-//
-// Arguments:
-//	x: A tensor of type `T`.
-//	i: A vector. Indices into the left-most dimension of `x`.
-//	v: A `Tensor` of type T. Same dimension sizes as x except the first dimension, which must be the same as i's size.
-//
-// Returns A `Tensor` of type T. An alias of `x`. The content of `y` is undefined if there are duplicates in `i`.
-func InplaceUpdate(scope *Scope, x tf.Output, i tf.Output, v tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "InplaceUpdate",
-		Input: []tf.Input{
-			x, i, v,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes gradients for the exponential linear (Elu) operation.
-//
-// Arguments:
-//	gradients: The backpropagated gradients to the corresponding Elu operation.
-//	outputs: The outputs of the corresponding Elu operation.
-//
-// Returns The gradients: `gradients * (outputs + 1)` if outputs < 0,
-// `gradients` otherwise.
-func EluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "EluGrad",
-		Input: []tf.Input{
-			gradients, outputs,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Converts each string in the input Tensor to its hash mod by a number of buckets.
-//
-// The hash function is deterministic on the content of the string within the
-// process.
-//
-// Note that the hash function may change from time to time.
-// This functionality will be deprecated and it's recommended to use
-// `tf.string_to_hash_bucket_fast()` or `tf.string_to_hash_bucket_strong()`.
-//
-// Arguments:
-//
-//	num_buckets: The number of buckets.
-//
-// Returns A Tensor of the same shape as the input `string_tensor`.
-func StringToHashBucket(scope *Scope, string_tensor tf.Output, num_buckets int64) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_buckets": num_buckets}
-	opspec := tf.OpSpec{
-		Type: "StringToHashBucket",
-		Input: []tf.Input{
-			string_tensor,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// AvgPoolAttr is an optional argument to AvgPool.
-type AvgPoolAttr func(optionalAttr)
-
-// AvgPoolDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func AvgPoolDataFormat(value string) AvgPoolAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Performs average pooling on the input.
-//
-// Each entry in `output` is the mean of the corresponding size `ksize`
-// window in `value`.
-//
-// Arguments:
-//	value: 4-D with shape `[batch, height, width, channels]`.
-//	ksize: The size of the sliding window for each dimension of `value`.
-//	strides: The stride of the sliding window for each dimension of `value`.
-//	padding: The type of padding algorithm to use.
-//
-// Returns The average pooled output tensor.
-func AvgPool(scope *Scope, value tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "AvgPool",
-		Input: []tf.Input{
-			value,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// TopKV2Attr is an optional argument to TopKV2.
-type TopKV2Attr func(optionalAttr)
-
-// TopKV2Sorted sets the optional sorted attribute to value.
-//
-// value: If true the resulting `k` elements will be sorted by the values in
-// descending order.
-// If not specified, defaults to true
-func TopKV2Sorted(value bool) TopKV2Attr {
-	return func(m optionalAttr) {
-		m["sorted"] = value
-	}
-}
-
-// Finds values and indices of the `k` largest elements for the last dimension.
-//
-// If the input is a vector (rank-1), finds the `k` largest entries in the vector
-// and outputs their values and indices as vectors.  Thus `values[j]` is the
-// `j`-th largest entry in `input`, and its index is `indices[j]`.
-//
-// For matrices (resp. higher rank input), computes the top `k` entries in each
-// row (resp. vector along the last dimension).  Thus,
-//
-//     values.shape = indices.shape = input.shape[:-1] + [k]
-//
-// If two elements are equal, the lower-index element appears first.
-//
-// Arguments:
-//	input: 1-D or higher with last dimension at least `k`.
-//	k: 0-D.  Number of top elements to look for along the last dimension (along each
-// row for matrices).
-//
-// Returns The `k` largest elements along each last dimensional slice.The indices of `values` within the last dimension of `input`.
-func TopKV2(scope *Scope, input tf.Output, k tf.Output, optional ...TopKV2Attr) (values tf.Output, indices tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TopKV2",
-		Input: []tf.Input{
-			input, k,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// MaxPoolGradGradAttr is an optional argument to MaxPoolGradGrad.
-type MaxPoolGradGradAttr func(optionalAttr)
-
-// MaxPoolGradGradDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func MaxPoolGradGradDataFormat(value string) MaxPoolGradGradAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Computes second-order gradients of the maxpooling function.
-//
-// Arguments:
-//	orig_input: The original input tensor.
-//	orig_output: The original output tensor.
-//	grad: 4-D.  Gradients of gradients w.r.t. the input of `max_pool`.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
-//
-// Returns Gradients of gradients w.r.t. the input to `max_pool`.
-func MaxPoolGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradGradAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MaxPoolGradGrad",
-		Input: []tf.Input{
-			orig_input, orig_output, grad,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MaxPoolGradV2Attr is an optional argument to MaxPoolGradV2.
-type MaxPoolGradV2Attr func(optionalAttr)
-
-// MaxPoolGradV2DataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func MaxPoolGradV2DataFormat(value string) MaxPoolGradV2Attr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Computes gradients of the maxpooling function.
-//
-// Arguments:
-//	orig_input: The original input tensor.
-//	orig_output: The original output tensor.
-//	grad: 4-D.  Gradients w.r.t. the output of `max_pool`.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
-//
-// Returns Gradients w.r.t. the input to `max_pool`.
-func MaxPoolGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradV2Attr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MaxPoolGradV2",
-		Input: []tf.Input{
-			orig_input, orig_output, grad, ksize, strides,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MaxPoolGradAttr is an optional argument to MaxPoolGrad.
-type MaxPoolGradAttr func(optionalAttr)
-
-// MaxPoolGradDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func MaxPoolGradDataFormat(value string) MaxPoolGradAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Computes gradients of the maxpooling function.
-//
-// Arguments:
-//	orig_input: The original input tensor.
-//	orig_output: The original output tensor.
-//	grad: 4-D.  Gradients w.r.t. the output of `max_pool`.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
-//
-// Returns Gradients w.r.t. the input to `max_pool`.
-func MaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MaxPoolGrad",
-		Input: []tf.Input{
-			orig_input, orig_output, grad,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Decode web-safe base64-encoded strings.
-//
-// Input may or may not have padding at the end. See EncodeBase64 for padding.
-// Web-safe means that input must use - and _ instead of + and /.
-//
-// Arguments:
-//	input: Base64 strings to decode.
-//
-// Returns Decoded strings.
-func DecodeBase64(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DecodeBase64",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes numerical negative value element-wise.
-//
-// I.e., \\(y = -x\\).
-func Neg(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Neg",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Exits the current frame to its parent frame.
-//
-// Exit makes its input `data` available to the parent frame.
-//
-// Arguments:
-//	data: The tensor to be made available to the parent frame.
-//
-// Returns The same tensor as `data`.
-func Exit(scope *Scope, data tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Exit",
-		Input: []tf.Input{
-			data,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QuantizedDepthwiseConv2DWithBiasAndReluAttr is an optional argument to QuantizedDepthwiseConv2DWithBiasAndRelu.
-type QuantizedDepthwiseConv2DWithBiasAndReluAttr func(optionalAttr)
-
-// QuantizedDepthwiseConv2DWithBiasAndReluOutType sets the optional out_type attribute to value.
-//
-// value: The type of the output.
-// If not specified, defaults to DT_QINT32
-func QuantizedDepthwiseConv2DWithBiasAndReluOutType(value tf.DataType) QuantizedDepthwiseConv2DWithBiasAndReluAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// QuantizedDepthwiseConv2DWithBiasAndReluDilations sets the optional dilations attribute to value.
-//
-// value: List of dilation values.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 >
-func QuantizedDepthwiseConv2DWithBiasAndReluDilations(value []int64) QuantizedDepthwiseConv2DWithBiasAndReluAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes quantized depthwise Conv2D with Bias and Relu.
-//
-// Arguments:
-//	input: The original input tensor.
-//	filter: The original filter tensor.
-//	bias: The original bias tensor.
-//	min_input: The float value that the minimum quantized input value represents.
-//	max_input: The float value that the maximum quantized input value represents.
-//	min_filter: The float value that the minimum quantized filter value represents.
-//	max_filter: The float value that the maximum quantized filter value represents.
-//	strides: List of stride values.
-//
-//
-// Returns The output tensor.The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents.
-func QuantizedDepthwiseConv2DWithBiasAndRelu(scope *Scope, input tf.Output, filter tf.Output, bias tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedDepthwiseConv2DWithBiasAndReluAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedDepthwiseConv2DWithBiasAndRelu",
-		Input: []tf.Input{
-			input, filter, bias, min_input, max_input, min_filter, max_filter,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// ExtractJpegShapeAttr is an optional argument to ExtractJpegShape.
-type ExtractJpegShapeAttr func(optionalAttr)
-
-// ExtractJpegShapeOutputType sets the optional output_type attribute to value.
-//
-// value: (Optional) The output type of the operation (int32 or int64).
-// Defaults to int32.
-// If not specified, defaults to DT_INT32
-func ExtractJpegShapeOutputType(value tf.DataType) ExtractJpegShapeAttr {
-	return func(m optionalAttr) {
-		m["output_type"] = value
-	}
-}
-
-// Extract the shape information of a JPEG-encoded image.
-//
-// This op only parses the image header, so it is much faster than DecodeJpeg.
-//
-// Arguments:
-//	contents: 0-D. The JPEG-encoded image.
-//
-// Returns 1-D. The image shape with format [height, width, channels].
-func ExtractJpegShape(scope *Scope, contents tf.Output, optional ...ExtractJpegShapeAttr) (image_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ExtractJpegShape",
-		Input: []tf.Input{
-			contents,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Inverse fast Fourier transform.
-//
-// Computes the inverse 1-dimensional discrete Fourier transform over the
-// inner-most dimension of `input`.
-//
-// Arguments:
-//	input: A complex tensor.
-//
-// Returns A complex tensor of the same shape as `input`. The inner-most
-//   dimension of `input` is replaced with its inverse 1D Fourier transform.
-//
-// @compatibility(numpy)
-// Equivalent to np.fft.ifft
-// @end_compatibility
-func IFFT(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "IFFT",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// DecodeWavAttr is an optional argument to DecodeWav.
-type DecodeWavAttr func(optionalAttr)
-
-// DecodeWavDesiredChannels sets the optional desired_channels attribute to value.
-//
-// value: Number of sample channels wanted.
+// value: Number of entries in the old vocab file to consider.  If -1,
+// use the entire old vocabulary.
 // If not specified, defaults to -1
-func DecodeWavDesiredChannels(value int64) DecodeWavAttr {
+//
+// REQUIRES: value >= -1
+func GenerateVocabRemappingOldVocabSize(value int64) GenerateVocabRemappingAttr {
 	return func(m optionalAttr) {
-		m["desired_channels"] = value
+		m["old_vocab_size"] = value
 	}
 }
 
-// DecodeWavDesiredSamples sets the optional desired_samples attribute to value.
+// Given a path to new and old vocabulary files, returns a remapping Tensor of
 //
-// value: Length of audio requested.
-// If not specified, defaults to -1
-func DecodeWavDesiredSamples(value int64) DecodeWavAttr {
-	return func(m optionalAttr) {
-		m["desired_samples"] = value
-	}
-}
-
-// Decode a 16-bit PCM WAV file to a float tensor.
+// length `num_new_vocab`, where `remapping[i]` contains the row number in the old
+// vocabulary that corresponds to row `i` in the new vocabulary (starting at line
+// `new_vocab_offset` and up to `num_new_vocab` entities), or `-1` if entry `i`
+// in the new vocabulary is not in the old vocabulary.  The old vocabulary is
+// constrained to the first `old_vocab_size` entries if `old_vocab_size` is not the
+// default value of -1.
 //
-// The -32768 to 32767 signed 16-bit values will be scaled to -1.0 to 1.0 in float.
+// `num_vocab_offset` enables
+// use in the partitioned variable case, and should generally be set through
+// examining partitioning info.  The format of the files should be a text file,
+// with each line containing a single entity within the vocabulary.
 //
-// When desired_channels is set, if the input contains fewer channels than this
-// then the last channel will be duplicated to give the requested number, else if
-// the input has more channels than requested then the additional channels will be
-// ignored.
+// For example, with `new_vocab_file` a text file containing each of the following
+// elements on a single line: `[f0, f1, f2, f3]`, old_vocab_file = [f1, f0, f3],
+// `num_new_vocab = 3, new_vocab_offset = 1`, the returned remapping would be
+// `[0, -1, 2]`.
 //
-// If desired_samples is set, then the audio will be cropped or padded with zeroes
-// to the requested length.
+// The op also returns a count of how many entries in the new vocabulary
+// were present in the old vocabulary, which is used to calculate the number of
+// values to initialize in a weight matrix remapping
 //
-// The first output contains a Tensor with the content of the audio samples. The
-// lowest dimension will be the number of channels, and the second will be the
-// number of samples. For example, a ten-sample-long stereo WAV file should give an
-// output shape of [10, 2].
+// This functionality can be used to remap both row vocabularies (typically,
+// features) and column vocabularies (typically, classes) from TensorFlow
+// checkpoints.  Note that the partitioning logic relies on contiguous vocabularies
+// corresponding to div-partitioned variables.  Moreover, the underlying remapping
+// uses an IndexTable (as opposed to an inexact CuckooTable), so client code should
+// use the corresponding index_table_from_file() as the FeatureColumn framework
+// does (as opposed to tf.feature_to_id(), which uses a CuckooTable).
 //
 // Arguments:
-//	contents: The WAV-encoded audio, usually from a file.
+//	new_vocab_file: Path to the new vocab file.
+//	old_vocab_file: Path to the old vocab file.
+//	new_vocab_offset: How many entries into the new vocab file to start reading.
+//	num_new_vocab: Number of entries in the new vocab file to remap.
 //
-// Returns 2-D with shape `[length, channels]`.Scalar holding the sample rate found in the WAV header.
-func DecodeWav(scope *Scope, contents tf.Output, optional ...DecodeWavAttr) (audio tf.Output, sample_rate tf.Output) {
+// Returns:
+//	remapping: A Tensor of length num_new_vocab where the element at index i
+// is equal to the old ID that maps to the new ID i.  This element is -1 for any
+// new ID that is not found in the old vocabulary.
+//	num_present: Number of new vocab entries found in old vocab.
+func GenerateVocabRemapping(scope *Scope, new_vocab_file tf.Output, old_vocab_file tf.Output, new_vocab_offset int64, num_new_vocab int64, optional ...GenerateVocabRemappingAttr) (remapping tf.Output, num_present tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"new_vocab_offset": new_vocab_offset, "num_new_vocab": num_new_vocab}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DecodeWav",
+		Type: "GenerateVocabRemapping",
 		Input: []tf.Input{
-			contents,
+			new_vocab_file, old_vocab_file,
 		},
 		Attrs: attrs,
 	}
@@ -10752,37 +10523,22 @@ func DecodeWav(scope *Scope, contents tf.Output, optional ...DecodeWavAttr) (aud
 	return op.Output(0), op.Output(1)
 }
 
-// LeakyReluGradAttr is an optional argument to LeakyReluGrad.
-type LeakyReluGradAttr func(optionalAttr)
-
-// LeakyReluGradAlpha sets the optional alpha attribute to value.
-// If not specified, defaults to 0.2
-func LeakyReluGradAlpha(value float32) LeakyReluGradAttr {
-	return func(m optionalAttr) {
-		m["alpha"] = value
-	}
-}
-
-// Computes rectified linear gradients for a LeakyRelu operation.
+// Creates a dataset that overrides the maximum intra-op parallelism.
 //
 // Arguments:
-//	gradients: The backpropagated gradients to the corresponding LeakyRelu operation.
-//	features: The features passed as input to the corresponding LeakyRelu operation,
-// OR the outputs of that operation (both work equivalently).
 //
-// Returns `gradients * (features > 0) + alpha * gradients * (featurs <= 0)`.
-func LeakyReluGrad(scope *Scope, gradients tf.Output, features tf.Output, optional ...LeakyReluGradAttr) (backprops tf.Output) {
+//	max_intra_op_parallelism: Identifies the maximum intra-op parallelism to use.
+//
+//
+func ExperimentalMaxIntraOpParallelismDataset(scope *Scope, input_dataset tf.Output, max_intra_op_parallelism tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "LeakyReluGrad",
+		Type: "ExperimentalMaxIntraOpParallelismDataset",
 		Input: []tf.Input{
-			gradients, features,
+			input_dataset, max_intra_op_parallelism,
 		},
 		Attrs: attrs,
 	}
@@ -10790,111 +10546,279 @@ func LeakyReluGrad(scope *Scope, gradients tf.Output, features tf.Output, option
 	return op.Output(0)
 }
 
-// Inverse 2D fast Fourier transform.
+// SpaceToBatch for N-D tensors of type T.
 //
-// Computes the inverse 2-dimensional discrete Fourier transform over the
-// inner-most 2 dimensions of `input`.
+// This operation divides "spatial" dimensions `[1, ..., M]` of the input into a
+// grid of blocks of shape `block_shape`, and interleaves these blocks with the
+// "batch" dimension (0) such that in the output, the spatial dimensions
+// `[1, ..., M]` correspond to the position within the grid, and the batch
+// dimension combines both the position within a spatial block and the original
+// batch position.  Prior to division into blocks, the spatial dimensions of the
+// input are optionally zero padded according to `paddings`.  See below for a
+// precise description.
 //
 // Arguments:
-//	input: A complex tensor.
+//	input: N-D with shape `input_shape = [batch] + spatial_shape + remaining_shape`,
+// where spatial_shape has `M` dimensions.
+//	block_shape: 1-D with shape `[M]`, all values must be >= 1.
+//	paddings: 2-D with shape `[M, 2]`, all values must be >= 0.
+//   `paddings[i] = [pad_start, pad_end]` specifies the padding for input dimension
+//   `i + 1`, which corresponds to spatial dimension `i`.  It is required that
+//   `block_shape[i]` divides `input_shape[i + 1] + pad_start + pad_end`.
 //
-// Returns A complex tensor of the same shape as `input`. The inner-most 2
-//   dimensions of `input` are replaced with their inverse 2D Fourier transform.
+// This operation is equivalent to the following steps:
 //
-// @compatibility(numpy)
-// Equivalent to np.fft.ifft2
-// @end_compatibility
-func IFFT2D(scope *Scope, input tf.Output) (output tf.Output) {
+// 1. Zero-pad the start and end of dimensions `[1, ..., M]` of the
+//    input according to `paddings` to produce `padded` of shape `padded_shape`.
+//
+// 2. Reshape `padded` to `reshaped_padded` of shape:
+//
+//      [batch] +
+//      [padded_shape[1] / block_shape[0],
+//        block_shape[0],
+//       ...,
+//       padded_shape[M] / block_shape[M-1],
+//       block_shape[M-1]] +
+//      remaining_shape
+//
+// 3. Permute dimensions of `reshaped_padded` to produce
+//    `permuted_reshaped_padded` of shape:
+//
+//      block_shape +
+//      [batch] +
+//      [padded_shape[1] / block_shape[0],
+//       ...,
+//       padded_shape[M] / block_shape[M-1]] +
+//      remaining_shape
+//
+// 4. Reshape `permuted_reshaped_padded` to flatten `block_shape` into the batch
+//    dimension, producing an output tensor of shape:
+//
+//      [batch * prod(block_shape)] +
+//      [padded_shape[1] / block_shape[0],
+//       ...,
+//       padded_shape[M] / block_shape[M-1]] +
+//      remaining_shape
+//
+// Some examples:
+//
+// (1) For the following input of shape `[1, 2, 2, 1]`, `block_shape = [2, 2]`, and
+//     `paddings = [[0, 0], [0, 0]]`:
+//
+// ```
+// x = [[[[1], [2]], [[3], [4]]]]
+// ```
+//
+// The output tensor has shape `[4, 1, 1, 1]` and value:
+//
+// ```
+// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
+// ```
+//
+// (2) For the following input of shape `[1, 2, 2, 3]`, `block_shape = [2, 2]`, and
+//     `paddings = [[0, 0], [0, 0]]`:
+//
+// ```
+// x = [[[[1, 2, 3], [4, 5, 6]],
+//       [[7, 8, 9], [10, 11, 12]]]]
+// ```
+//
+// The output tensor has shape `[4, 1, 1, 3]` and value:
+//
+// ```
+// [[[[1, 2, 3]]], [[[4, 5, 6]]], [[[7, 8, 9]]], [[[10, 11, 12]]]]
+// ```
+//
+// (3) For the following input of shape `[1, 4, 4, 1]`, `block_shape = [2, 2]`, and
+//     `paddings = [[0, 0], [0, 0]]`:
+//
+// ```
+// x = [[[[1],   [2],  [3],  [4]],
+//       [[5],   [6],  [7],  [8]],
+//       [[9],  [10], [11],  [12]],
+//       [[13], [14], [15],  [16]]]]
+// ```
+//
+// The output tensor has shape `[4, 2, 2, 1]` and value:
+//
+// ```
+// x = [[[[1], [3]], [[9], [11]]],
+//      [[[2], [4]], [[10], [12]]],
+//      [[[5], [7]], [[13], [15]]],
+//      [[[6], [8]], [[14], [16]]]]
+// ```
+//
+// (4) For the following input of shape `[2, 2, 4, 1]`, block_shape = `[2, 2]`, and
+//     paddings = `[[0, 0], [2, 0]]`:
+//
+// ```
+// x = [[[[1],   [2],  [3],  [4]],
+//       [[5],   [6],  [7],  [8]]],
+//      [[[9],  [10], [11],  [12]],
+//       [[13], [14], [15],  [16]]]]
+// ```
+//
+// The output tensor has shape `[8, 1, 3, 1]` and value:
+//
+// ```
+// x = [[[[0], [1], [3]]], [[[0], [9], [11]]],
+//      [[[0], [2], [4]]], [[[0], [10], [12]]],
+//      [[[0], [5], [7]]], [[[0], [13], [15]]],
+//      [[[0], [6], [8]]], [[[0], [14], [16]]]]
+// ```
+//
+// Among others, this operation is useful for reducing atrous convolution into
+// regular convolution.
+func SpaceToBatchND(scope *Scope, input tf.Output, block_shape tf.Output, paddings tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "IFFT2D",
+		Type: "SpaceToBatchND",
 		Input: []tf.Input{
-			input,
+			input, block_shape, paddings,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Get the current size of the TensorArray.
+// Returns a batched diagonal tensor with given batched diagonal values.
+//
+// Returns a tensor with the contents in `diagonal` as `k[0]`-th to `k[1]`-th
+// diagonals of a matrix, with everything else padded with `padding`. `num_rows`
+// and `num_cols` specify the dimension of the innermost matrix of the output. If
+// both are not specified, the op assumes the innermost matrix is square and infers
+// its size from `k` and the innermost dimension of `diagonal`. If only one of them
+// is specified, the op assumes the unspecified value is the smallest possible
+// based on other criteria.
+//
+// Let `diagonal` have `r` dimensions `[I, J, ..., L, M, N]`. The output tensor has
+// rank `r+1` with shape `[I, J, ..., L, M, num_rows, num_cols]` when only one
+// diagonal is given (`k` is an integer or `k[0] == k[1]`). Otherwise, it has rank
+// `r` with shape `[I, J, ..., L, num_rows, num_cols]`.
+//
+// The second innermost dimension of `diagonal` has double meaning.
+// When `k` is scalar or `k[0] == k[1]`, `M` is part of the batch size
+// [I, J, ..., M], and the output tensor is:
+//
+// ```
+// output[i, j, ..., l, m, n]
+//   = diagonal[i, j, ..., l, n-max(d_upper, 0)] ; if n - m == d_upper
+//     padding_value                             ; otherwise
+// ```
+//
+// Otherwise, `M` is treated as the number of diagonals for the matrix in the
+// same batch (`M = k[1]-k[0]+1`), and the output tensor is:
+//
+// ```
+// output[i, j, ..., l, m, n]
+//   = diagonal[i, j, ..., l, diag_index, index_in_diag] ; if k[0] <= d <= k[1]
+//     padding_value                                     ; otherwise
+// ```
+// where `d = n - m`, `diag_index = k[1] - d`, and `index_in_diag = n - max(d, 0)`.
+//
+// For example:
+//
+// ```
+// # The main diagonal.
+// diagonal = np.array([[1, 2, 3, 4],            # Input shape: (2, 4)
+//                      [5, 6, 7, 8]])
+// tf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0],  # Output shape: (2, 4, 4)
+//                                [0, 2, 0, 0],
+//                                [0, 0, 3, 0],
+//                                [0, 0, 0, 4]],
+//                               [[5, 0, 0, 0],
+//                                [0, 6, 0, 0],
+//                                [0, 0, 7, 0],
+//                                [0, 0, 0, 8]]]
+//
+// # A superdiagonal (per batch).
+// diagonal = np.array([[1, 2, 3],  # Input shape: (2, 3)
+//                      [4, 5, 6]])
+// tf.matrix_diag(diagonal, k = 1)
+//   ==> [[[0, 1, 0, 0],  # Output shape: (2, 4, 4)
+//         [0, 0, 2, 0],
+//         [0, 0, 0, 3],
+//         [0, 0, 0, 0]],
+//        [[0, 4, 0, 0],
+//         [0, 0, 5, 0],
+//         [0, 0, 0, 6],
+//         [0, 0, 0, 0]]]
+//
+// # A band of diagonals.
+// diagonals = np.array([[[1, 2, 3],  # Input shape: (2, 2, 3)
+//                        [4, 5, 0]],
+//                       [[6, 7, 9],
+//                        [9, 1, 0]]])
+// tf.matrix_diag(diagonals, k = (-1, 0))
+//   ==> [[[1, 0, 0],  # Output shape: (2, 3, 3)
+//         [4, 2, 0],
+//         [0, 5, 3]],
+//        [[6, 0, 0],
+//         [9, 7, 0],
+//         [0, 1, 9]]]
+//
+// # Rectangular matrix.
+// diagonal = np.array([1, 2])  # Input shape: (2)
+// tf.matrix_diag(diagonal, k = -1, num_rows = 3, num_cols = 4)
+//   ==> [[0, 0, 0, 0],  # Output shape: (3, 4)
+//        [1, 0, 0, 0],
+//        [0, 2, 0, 0]]
+//
+// # Rectangular matrix with inferred num_cols and padding_value = 9.
+// tf.matrix_diag(diagonal, k = -1, num_rows = 3, padding_value = 9)
+//   ==> [[9, 9],  # Output shape: (3, 2)
+//        [1, 9],
+//        [9, 2]]
+// ```
 //
 // Arguments:
-//	handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad).
-//	flow_in: A float scalar that enforces proper chaining of operations.
+//	diagonal: Rank `r`, where `r >= 1`
+//	k: Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main
+// diagonal, and negative value means subdiagonals. `k` can be a single integer
+// (for a single diagonal) or a pair of integers specifying the low and high ends
+// of a matrix band. `k[0]` must not be larger than `k[1]`.
+//	num_rows: The number of rows of the output matrix. If it is not provided, the op assumes
+// the output matrix is a square matrix and infers the matrix size from k and the
+// innermost dimension of `diagonal`.
+//	num_cols: The number of columns of the output matrix. If it is not provided, the op
+// assumes the output matrix is a square matrix and infers the matrix size from
+// k and the innermost dimension of `diagonal`.
+//	padding_value: The number to fill the area outside the specified diagonal band with.
+// Default is 0.
 //
-// Returns The current size of the TensorArray.
-func TensorArraySizeV3(scope *Scope, handle tf.Output, flow_in tf.Output) (size tf.Output) {
+// Returns Has rank `r+1` when `k` is an integer or `k[0] == k[1]`, rank `r` otherwise.
+func MatrixDiagV2(scope *Scope, diagonal tf.Output, k tf.Output, num_rows tf.Output, num_cols tf.Output, padding_value tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorArraySizeV3",
+		Type: "MatrixDiagV2",
 		Input: []tf.Input{
-			handle, flow_in,
+			diagonal, k, num_rows, num_cols, padding_value,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Conv3DBackpropFilterV2Attr is an optional argument to Conv3DBackpropFilterV2.
-type Conv3DBackpropFilterV2Attr func(optionalAttr)
-
-// Conv3DBackpropFilterV2DataFormat sets the optional data_format attribute to value.
-//
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func Conv3DBackpropFilterV2DataFormat(value string) Conv3DBackpropFilterV2Attr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Conv3DBackpropFilterV2Dilations sets the optional dilations attribute to value.
-//
-// value: 1-D tensor of length 5.  The dilation factor for each dimension of
-// `input`. If set to k > 1, there will be k-1 skipped cells between each
-// filter element on that dimension. The dimension order is determined by the
-// value of `data_format`, see above for details. Dilations in the batch and
-// depth dimensions must be 1.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 i:1 >
-func Conv3DBackpropFilterV2Dilations(value []int64) Conv3DBackpropFilterV2Attr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes the gradients of 3-D convolution with respect to the filter.
+// Creates a dataset that overrides the maximum intra-op parallelism.
 //
 // Arguments:
-//	input: Shape `[batch, depth, rows, cols, in_channels]`.
-//	filter_sizes: An integer vector representing the tensor shape of `filter`,
-// where `filter` is a 5-D
-// `[filter_depth, filter_height, filter_width, in_channels, out_channels]`
-// tensor.
-//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
-// out_channels]`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
-func Conv3DBackpropFilterV2(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropFilterV2Attr) (output tf.Output) {
+//
+//	max_intra_op_parallelism: Identifies the maximum intra-op parallelism to use.
+//
+//
+func MaxIntraOpParallelismDataset(scope *Scope, input_dataset tf.Output, max_intra_op_parallelism tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "Conv3DBackpropFilterV2",
+		Type: "MaxIntraOpParallelismDataset",
 		Input: []tf.Input{
-			input, filter_sizes, out_backprop,
+			input_dataset, max_intra_op_parallelism,
 		},
 		Attrs: attrs,
 	}
@@ -10902,406 +10826,138 @@ func Conv3DBackpropFilterV2(scope *Scope, input tf.Output, filter_sizes tf.Outpu
 	return op.Output(0)
 }
 
-// Conv3DBackpropInputV2Attr is an optional argument to Conv3DBackpropInputV2.
-type Conv3DBackpropInputV2Attr func(optionalAttr)
+// StageClearAttr is an optional argument to StageClear.
+type StageClearAttr func(optionalAttr)
 
-// Conv3DBackpropInputV2DataFormat sets the optional data_format attribute to value.
+// StageClearCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
 //
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func Conv3DBackpropInputV2DataFormat(value string) Conv3DBackpropInputV2Attr {
+// REQUIRES: value >= 0
+func StageClearCapacity(value int64) StageClearAttr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["capacity"] = value
 	}
 }
 
-// Conv3DBackpropInputV2Dilations sets the optional dilations attribute to value.
+// StageClearMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
 //
-// value: 1-D tensor of length 5.  The dilation factor for each dimension of
-// `input`. If set to k > 1, there will be k-1 skipped cells between each
-// filter element on that dimension. The dimension order is determined by the
-// value of `data_format`, see above for details. Dilations in the batch and
-// depth dimensions must be 1.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 i:1 >
-func Conv3DBackpropInputV2Dilations(value []int64) Conv3DBackpropInputV2Attr {
+// REQUIRES: value >= 0
+func StageClearMemoryLimit(value int64) StageClearAttr {
 	return func(m optionalAttr) {
-		m["dilations"] = value
+		m["memory_limit"] = value
 	}
 }
 
-// Computes the gradients of 3-D convolution with respect to the input.
-//
-// Arguments:
-//	input_sizes: An integer vector representing the tensor shape of `input`,
-// where `input` is a 5-D
-// `[batch, depth, rows, cols, in_channels]` tensor.
-//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
-// `in_channels` must match between `input` and `filter`.
-//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
-// out_channels]`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
-func Conv3DBackpropInputV2(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputV2Attr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Conv3DBackpropInputV2",
-		Input: []tf.Input{
-			input_sizes, filter, out_backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter.
-type DepthwiseConv2dNativeBackpropFilterAttr func(optionalAttr)
-
-// DepthwiseConv2dNativeBackpropFilterDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, height, width, channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, channels, height, width].
-// If not specified, defaults to "NHWC"
-func DepthwiseConv2dNativeBackpropFilterDataFormat(value string) DepthwiseConv2dNativeBackpropFilterAttr {
+// StageClearContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func StageClearContainer(value string) StageClearAttr {
 	return func(m optionalAttr) {
-		m["data_format"] = value
+		m["container"] = value
 	}
 }
 
-// DepthwiseConv2dNativeBackpropFilterDilations sets the optional dilations attribute to value.
-//
-// value: 1-D tensor of length 4.  The dilation factor for each dimension of
-// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
-// element on that dimension. The dimension order is determined by the value of
-// `data_format`, see above for details. Dilations in the batch and depth
-// dimensions must be 1.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 >
-func DepthwiseConv2dNativeBackpropFilterDilations(value []int64) DepthwiseConv2dNativeBackpropFilterAttr {
+// StageClearSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func StageClearSharedName(value string) StageClearAttr {
 	return func(m optionalAttr) {
-		m["dilations"] = value
+		m["shared_name"] = value
 	}
 }
 
-// Computes the gradients of depthwise convolution with respect to the filter.
-//
-// Arguments:
-//	input: 4-D with shape based on `data_format`.  For example, if
-// `data_format` is 'NHWC' then `input` is a 4-D `[batch, in_height,
-// in_width, in_channels]` tensor.
-//	filter_sizes: An integer vector representing the tensor shape of `filter`,
-// where `filter` is a 4-D
-// `[filter_height, filter_width, in_channels, depthwise_multiplier]` tensor.
-//	out_backprop: 4-D with shape  based on `data_format`.
-// For example, if `data_format` is 'NHWC' then
-// out_backprop shape is `[batch, out_height, out_width, out_channels]`.
-// Gradients w.r.t. the output of the convolution.
-//	strides: The stride of the sliding window for each dimension of the input
-// of the convolution.
-//	padding: The type of padding algorithm to use.
-//
-// Returns 4-D with shape
-// `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
-// the `filter` input of the convolution.
-func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropFilterAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DepthwiseConv2dNativeBackpropFilter",
-		Input: []tf.Input{
-			input, filter_sizes, out_backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Performs a padding as a preprocess during a convolution.
-//
-// Similar to FusedResizeAndPadConv2d, this op allows for an optimized
-// implementation where the spatial padding transformation stage is fused with the
-// im2col lookup, but in this case without the bilinear filtering required for
-// resizing. Fusing the padding prevents the need to write out the intermediate
-// results as whole tensors, reducing memory pressure, and we can get some latency
-// gains by merging the transformation calculations.
-// The data_format attribute for Conv2D isn't supported by this op, and 'NHWC'
-// order is used instead.
-// Internally this op uses a single per-graph scratch buffer, which means that it
-// will block if multiple versions are being run in parallel. This is because this
-// operator is primarily an optimization to minimize memory usage.
-//
-// Arguments:
-//	input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
-//	paddings: A two-column matrix specifying the padding sizes. The number of
-// rows must be the same as the rank of `input`.
-//	filter: 4-D with shape
-// `[filter_height, filter_width, in_channels, out_channels]`.
-//
-//	strides: 1-D of length 4.  The stride of the sliding window for each dimension
-// of `input`. Must be in the same order as the dimension specified with format.
-//	padding: The type of padding algorithm to use.
-func FusedPadConv2D(scope *Scope, input tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding}
-	opspec := tf.OpSpec{
-		Type: "FusedPadConv2D",
-		Input: []tf.Input{
-			input, paddings, filter,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Compute the regularized incomplete beta integral \\(I_x(a, b)\\).
-//
-// The regularized incomplete beta integral is defined as:
-//
-//
-// \\(I_x(a, b) = \frac{B(x; a, b)}{B(a, b)}\\)
-//
-// where
-//
-//
-// \\(B(x; a, b) = \int_0^x t^{a-1} (1 - t)^{b-1} dt\\)
-//
-//
-// is the incomplete beta function and \\(B(a, b)\\) is the *complete*
-// beta function.
-func Betainc(scope *Scope, a tf.Output, b tf.Output, x tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Betainc",
-		Input: []tf.Input{
-			a, b, x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// DataFormatVecPermuteAttr is an optional argument to DataFormatVecPermute.
-type DataFormatVecPermuteAttr func(optionalAttr)
-
-// DataFormatVecPermuteSrcFormat sets the optional src_format attribute to value.
-//
-// value: source data format.
-// If not specified, defaults to "NHWC"
-func DataFormatVecPermuteSrcFormat(value string) DataFormatVecPermuteAttr {
-	return func(m optionalAttr) {
-		m["src_format"] = value
-	}
-}
-
-// DataFormatVecPermuteDstFormat sets the optional dst_format attribute to value.
-//
-// value: destination data format.
-// If not specified, defaults to "NCHW"
-func DataFormatVecPermuteDstFormat(value string) DataFormatVecPermuteAttr {
-	return func(m optionalAttr) {
-		m["dst_format"] = value
-	}
-}
-
-// Returns the permuted vector/tensor in the destination data format given the
-//
-// one in the source data format.
-//
-// Arguments:
-//	x: Vector of size 4 or Tensor of shape (4, 2) in source data format.
-//
-// Returns Vector of size 4 or Tensor of shape (4, 2) in destination data format.
-func DataFormatVecPermute(scope *Scope, x tf.Output, optional ...DataFormatVecPermuteAttr) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DataFormatVecPermute",
-		Input: []tf.Input{
-			x,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Shuffle dimensions of x according to a permutation.
-//
-// The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy:
-//   `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]`
-func Transpose(scope *Scope, x tf.Output, perm tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Transpose",
-		Input: []tf.Input{
-			x, perm,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr is an optional argument to QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize.
-type QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr func(optionalAttr)
-
-// QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeOutType sets the optional out_type attribute to value.
-//
-// value: The type of the output.
-// If not specified, defaults to DT_QUINT8
-func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeOutType(value tf.DataType) QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeDilations sets the optional dilations attribute to value.
-//
-// value: List of dilation values.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 >
-func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeDilations(value []int64) QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes quantized depthwise Conv2D with Bias, Relu and Requantize.
-//
-// Arguments:
-//	input: The original input tensor.
-//	filter: The original filter tensor.
-//	bias: The original bias tensor.
-//	min_input: The float value that the minimum quantized input value represents.
-//	max_input: The float value that the maximum quantized input value represents.
-//	min_filter: The float value that the minimum quantized filter value represents.
-//	max_filter: The float value that the maximum quantized filter value represents.
-//	min_freezed_output: The minimum float value of the output tensor.
-//	max_freezed_output: The maximum float value of the output tensor.
-//	strides: List of stride values.
-//
-//
-// Returns The output tensor.The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents.
-func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize(scope *Scope, input tf.Output, filter tf.Output, bias tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, min_freezed_output tf.Output, max_freezed_output tf.Output, strides []int64, padding string, optional ...QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize",
-		Input: []tf.Input{
-			input, filter, bias, min_input, max_input, min_filter, max_filter, min_freezed_output, max_freezed_output,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Sends `input` to all devices that are connected to the output.
-//
-// Sends `input` to all devices that are connected to the output.
-//
-// The graph should be constructed so that all ops connected to the output have a
-// valid device assignment, and the op itself is assigned one of these devices.
-//
-// input: The input to the broadcast.
-// output: The same as input.
-// shape: The shape of the input tensor.
-//
-func NcclBroadcast(scope *Scope, input tf.Output, shape tf.Shape) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"shape": shape}
-	opspec := tf.OpSpec{
-		Type: "NcclBroadcast",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// BoostedTreesQuantileStreamResourceFlushAttr is an optional argument to BoostedTreesQuantileStreamResourceFlush.
-type BoostedTreesQuantileStreamResourceFlushAttr func(optionalAttr)
-
-// BoostedTreesQuantileStreamResourceFlushGenerateQuantiles sets the optional generate_quantiles attribute to value.
-//
-// value: bool; If True, the output will be the num_quantiles for each stream where the ith
-// entry is the ith quantile of the input with an approximation error of epsilon.
-// Duplicate values may be present.
-// If False, the output will be the points in the histogram that we got which roughly
-// translates to 1/epsilon boundaries and without any duplicates.
-// Default to False.
-// If not specified, defaults to false
-func BoostedTreesQuantileStreamResourceFlushGenerateQuantiles(value bool) BoostedTreesQuantileStreamResourceFlushAttr {
-	return func(m optionalAttr) {
-		m["generate_quantiles"] = value
-	}
-}
-
-// Flush the summaries for a quantile stream resource.
-//
-// An op that flushes the summaries for a quantile stream resource.
-//
-// Arguments:
-//	quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource.
-//	num_buckets: int; approximate number of buckets unless using generate_quantiles.
+// Op removes all elements in the underlying container.
 //
 // Returns the created operation.
-func BoostedTreesQuantileStreamResourceFlush(scope *Scope, quantile_stream_resource_handle tf.Output, num_buckets tf.Output, optional ...BoostedTreesQuantileStreamResourceFlushAttr) (o *tf.Operation) {
+func StageClear(scope *Scope, dtypes []tf.DataType, optional ...StageClearAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"dtypes": dtypes}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "BoostedTreesQuantileStreamResourceFlush",
-		Input: []tf.Input{
-			quantile_stream_resource_handle, num_buckets,
-		},
+		Type: "StageClear",
+
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
+// Records the latency of producing `input_dataset` elements in a StatsAggregator.
+func ExperimentalLatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalLatencyStatsDataset",
+		Input: []tf.Input{
+			input_dataset, tag,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the name of the device on which `resource` has been placed.
+func IteratorGetDevice(scope *Scope, resource tf.Output) (device tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "IteratorGetDevice",
+		Input: []tf.Input{
+			resource,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a Dataset that returns pseudorandom numbers.
+//
+// Arguments:
+//	seed: A scalar seed for the random number generator. If either seed or
+// seed2 is set to be non-zero, the random number generator is seeded
+// by the given seed.  Otherwise, a random seed is used.
+//	seed2: A second scalar seed to avoid seed collision.
+//
+//
+func ExperimentalRandomDataset(scope *Scope, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalRandomDataset",
+		Input: []tf.Input{
+			seed, seed2,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that contains the elements of `input_dataset` ignoring errors.
+func ExperimentalIgnoreErrorsDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalIgnoreErrorsDataset",
+		Input: []tf.Input{
+			input_dataset,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // CudnnRNNBackpropV2Attr is an optional argument to CudnnRNNBackpropV2.
 type CudnnRNNBackpropV2Attr func(optionalAttr)
 
@@ -11416,3410 +11072,6 @@ func CudnnRNNBackpropV2(scope *Scope, input tf.Output, input_h tf.Output, input_
 	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
 }
 
-// Returns which elements of x are Inf.
-//
-// @compatibility(numpy)
-// Equivalent to np.isinf
-// @end_compatibility
-func IsInf(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "IsInf",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RequantizePerChannelAttr is an optional argument to RequantizePerChannel.
-type RequantizePerChannelAttr func(optionalAttr)
-
-// RequantizePerChannelOutType sets the optional out_type attribute to value.
-//
-// value: The quantized type of output tensor that needs to be converted.
-// If not specified, defaults to DT_QUINT8
-func RequantizePerChannelOutType(value tf.DataType) RequantizePerChannelAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Requantizes input with min and max values known per channel.
-//
-// Arguments:
-//	input: The original input tensor.
-//	input_min: The minimum value of the input tensor
-//	input_max: The maximum value of the input tensor.
-//	requested_output_min: The minimum value of the output tensor requested.
-//	requested_output_max: The maximum value of the output tensor requested.
-//
-// Returns Output tensor.The minimum value of the final output tensorThe maximum value of the final output tensor.
-func RequantizePerChannel(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, optional ...RequantizePerChannelAttr) (output tf.Output, output_min tf.Output, output_max tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RequantizePerChannel",
-		Input: []tf.Input{
-			input, input_min, input_max, requested_output_min, requested_output_max,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// A dataset that creates window datasets from the input dataset.
-//
-// Arguments:
-//
-//	size: A scalar representing the number of elements to accumulate in a window.
-//	shift: A scalar representing the steps moving the sliding window forward in one
-// iteration. It must be positive.
-//	stride: A scalar representing the stride of the input elements of the sliding window.
-// It must be positive.
-//	drop_remainder: A scalar representing whether a window should be dropped in case its size is
-// smaller than desired.
-//
-//
-func WindowDataset(scope *Scope, input_dataset tf.Output, size tf.Output, shift tf.Output, stride tf.Output, drop_remainder tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "WindowDataset",
-		Input: []tf.Input{
-			input_dataset, size, shift, stride, drop_remainder,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that changes the batch size.
-//
-// Creates a dataset that changes the batch size of the dataset to current batch
-// size // num_workers.
-//
-// Arguments:
-//	input_dataset: A variant tensor representing the input dataset.
-//	num_workers: A scalar representing the number of workers to distribute this batch across. As
-// a result of this transformation the current batch size would end up being
-// divided  by this parameter.
-//
-//
-func ExperimentalRebatchDataset(scope *Scope, input_dataset tf.Output, num_workers tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalRebatchDataset",
-		Input: []tf.Input{
-			input_dataset, num_workers,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// BiasAddGradAttr is an optional argument to BiasAddGrad.
-type BiasAddGradAttr func(optionalAttr)
-
-// BiasAddGradDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the bias tensor will be added to the last dimension
-// of the value tensor.
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// The tensor will be added to "in_channels", the third-to-the-last
-//     dimension.
-// If not specified, defaults to "NHWC"
-func BiasAddGradDataFormat(value string) BiasAddGradAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// The backward operation for "BiasAdd" on the "bias" tensor.
-//
-// It accumulates all the values from out_backprop into the feature dimension.
-// For NHWC data format, the feature dimension is the last. For NCHW data format,
-// the feature dimension is the third-to-last.
-//
-// Arguments:
-//	out_backprop: Any number of dimensions.
-//
-// Returns 1-D with size the feature dimension of `out_backprop`.
-func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "BiasAddGrad",
-		Input: []tf.Input{
-			out_backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SumAttr is an optional argument to Sum.
-type SumAttr func(optionalAttr)
-
-// SumKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func SumKeepDims(value bool) SumAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
-	}
-}
-
-// Computes the sum of elements across dimensions of a tensor.
-//
-// Reduces `input` along the dimensions given in `axis`. Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `axis`. If `keep_dims` is true, the reduced dimensions are
-// retained with length 1.
-//
-// Arguments:
-//	input: The tensor to reduce.
-//	axis: The dimensions to reduce. Must be in the range
-// `[-rank(input), rank(input))`.
-//
-// Returns The reduced tensor.
-func Sum(scope *Scope, input tf.Output, axis tf.Output, optional ...SumAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Sum",
-		Input: []tf.Input{
-			input, axis,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// FusedBatchNormGradAttr is an optional argument to FusedBatchNormGrad.
-type FusedBatchNormGradAttr func(optionalAttr)
-
-// FusedBatchNormGradEpsilon sets the optional epsilon attribute to value.
-//
-// value: A small float number added to the variance of x.
-// If not specified, defaults to 0.0001
-func FusedBatchNormGradEpsilon(value float32) FusedBatchNormGradAttr {
-	return func(m optionalAttr) {
-		m["epsilon"] = value
-	}
-}
-
-// FusedBatchNormGradDataFormat sets the optional data_format attribute to value.
-//
-// value: The data format for y_backprop, x, x_backprop.
-// Either "NHWC" (default) or "NCHW".
-// If not specified, defaults to "NHWC"
-func FusedBatchNormGradDataFormat(value string) FusedBatchNormGradAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// FusedBatchNormGradIsTraining sets the optional is_training attribute to value.
-//
-// value: A bool value to indicate the operation is for training (default)
-// or inference.
-// If not specified, defaults to true
-func FusedBatchNormGradIsTraining(value bool) FusedBatchNormGradAttr {
-	return func(m optionalAttr) {
-		m["is_training"] = value
-	}
-}
-
-// Gradient for batch normalization.
-//
-// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
-// The size of 1D Tensors matches the dimension C of the 4D Tensors.
-//
-// Arguments:
-//	y_backprop: A 4D Tensor for the gradient with respect to y.
-//	x: A 4D Tensor for input data.
-//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
-//	reserve_space_1: When is_training is True, a 1D Tensor for the computed batch
-// mean to be reused in gradient computation. When is_training is
-// False, a 1D Tensor for the population mean to be reused in both
-// 1st and 2nd order gradient computation.
-//	reserve_space_2: When is_training is True, a 1D Tensor for the computed batch
-// variance (inverted variance in the cuDNN case) to be reused in
-// gradient computation. When is_training is False, a 1D Tensor
-// for the population variance to be reused in both 1st and 2nd
-// order gradient computation.
-//
-// Returns A 4D Tensor for the gradient with respect to x.A 1D Tensor for the gradient with respect to scale.A 1D Tensor for the gradient with respect to offset.Unused placeholder to match the mean input in FusedBatchNorm.Unused placeholder to match the variance input
-// in FusedBatchNorm.
-func FusedBatchNormGrad(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradAttr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FusedBatchNormGrad",
-		Input: []tf.Input{
-			y_backprop, x, scale, reserve_space_1, reserve_space_2,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
-}
-
-// TakeManySparseFromTensorsMapAttr is an optional argument to TakeManySparseFromTensorsMap.
-type TakeManySparseFromTensorsMapAttr func(optionalAttr)
-
-// TakeManySparseFromTensorsMapContainer sets the optional container attribute to value.
-//
-// value: The container name for the `SparseTensorsMap` read by this op.
-// If not specified, defaults to ""
-func TakeManySparseFromTensorsMapContainer(value string) TakeManySparseFromTensorsMapAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// TakeManySparseFromTensorsMapSharedName sets the optional shared_name attribute to value.
-//
-// value: The shared name for the `SparseTensorsMap` read by this op.
-// It should not be blank; rather the `shared_name` or unique Operation name
-// of the Op that created the original `SparseTensorsMap` should be used.
-// If not specified, defaults to ""
-func TakeManySparseFromTensorsMapSharedName(value string) TakeManySparseFromTensorsMapAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Read `SparseTensors` from a `SparseTensorsMap` and concatenate them.
-//
-// The input `sparse_handles` must be an `int64` matrix of shape `[N, 1]` where
-// `N` is the minibatch size and the rows correspond to the output handles of
-// `AddSparseToTensorsMap` or `AddManySparseToTensorsMap`.  The ranks of the
-// original `SparseTensor` objects that went into the given input ops must all
-// match.  When the final `SparseTensor` is created, it has rank one
-// higher than the ranks of the incoming `SparseTensor` objects
-// (they have been concatenated along a new row dimension on the left).
-//
-// The output `SparseTensor` object's shape values for all dimensions but the
-// first are the max across the input `SparseTensor` objects' shape values
-// for the corresponding dimensions.  Its first shape value is `N`, the minibatch
-// size.
-//
-// The input `SparseTensor` objects' indices are assumed ordered in
-// standard lexicographic order.  If this is not the case, after this
-// step run `SparseReorder` to restore index ordering.
-//
-// For example, if the handles represent an input, which is a `[2, 3]` matrix
-// representing two original `SparseTensor` objects:
-//
-// ```
-//     index = [ 0]
-//             [10]
-//             [20]
-//     values = [1, 2, 3]
-//     shape = [50]
-// ```
-//
-// and
-//
-// ```
-//     index = [ 2]
-//             [10]
-//     values = [4, 5]
-//     shape = [30]
-// ```
-//
-// then the final `SparseTensor` will be:
-//
-// ```
-//     index = [0  0]
-//             [0 10]
-//             [0 20]
-//             [1  2]
-//             [1 10]
-//     values = [1, 2, 3, 4, 5]
-//     shape = [2 50]
-// ```
-//
-// Arguments:
-//	sparse_handles: 1-D, The `N` serialized `SparseTensor` objects.
-// Shape: `[N]`.
-//	dtype: The `dtype` of the `SparseTensor` objects stored in the
-// `SparseTensorsMap`.
-//
-// Returns 2-D.  The `indices` of the minibatch `SparseTensor`.1-D.  The `values` of the minibatch `SparseTensor`.1-D.  The `shape` of the minibatch `SparseTensor`.
-func TakeManySparseFromTensorsMap(scope *Scope, sparse_handles tf.Output, dtype tf.DataType, optional ...TakeManySparseFromTensorsMapAttr) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TakeManySparseFromTensorsMap",
-		Input: []tf.Input{
-			sparse_handles,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Pads a tensor with mirrored values.
-//
-// This operation pads a `input` with mirrored values according to the `paddings`
-// you specify. `paddings` is an integer tensor with shape `[n, 2]`, where n is
-// the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
-// how many values to add before the contents of `input` in that dimension, and
-// `paddings[D, 1]` indicates how many values to add after the contents of `input`
-// in that dimension. Both `paddings[D, 0]` and `paddings[D, 1]` must be no greater
-// than `input.dim_size(D)` (or `input.dim_size(D) - 1`) if `copy_border` is true
-// (if false, respectively).
-//
-// The padded size of each dimension D of the output is:
-//
-// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
-//
-// For example:
-//
-// ```
-// # 't' is [[1, 2, 3], [4, 5, 6]].
-// # 'paddings' is [[1, 1]], [2, 2]].
-// # 'mode' is SYMMETRIC.
-// # rank of 't' is 2.
-// pad(t, paddings) ==> [[2, 1, 1, 2, 3, 3, 2]
-//                       [2, 1, 1, 2, 3, 3, 2]
-//                       [5, 4, 4, 5, 6, 6, 5]
-//                       [5, 4, 4, 5, 6, 6, 5]]
-// ```
-//
-// Arguments:
-//	input: The input tensor to be padded.
-//	paddings: A two-column matrix specifying the padding sizes. The number of
-// rows must be the same as the rank of `input`.
-//	mode: Either `REFLECT` or `SYMMETRIC`. In reflect mode the padded regions
-// do not include the borders, while in symmetric mode the padded regions
-// do include the borders. For example, if `input` is `[1, 2, 3]` and `paddings`
-// is `[0, 2]`, then the output is `[1, 2, 3, 2, 1]` in reflect mode, and
-// it is `[1, 2, 3, 3, 2]` in symmetric mode.
-//
-// Returns The padded tensor.
-func MirrorPad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"mode": mode}
-	opspec := tf.OpSpec{
-		Type: "MirrorPad",
-		Input: []tf.Input{
-			input, paddings,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// AllAttr is an optional argument to All.
-type AllAttr func(optionalAttr)
-
-// AllKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func AllKeepDims(value bool) AllAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
-	}
-}
-
-// Computes the "logical and" of elements across dimensions of a tensor.
-//
-// Reduces `input` along the dimensions given in `axis`. Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `axis`. If `keep_dims` is true, the reduced dimensions are
-// retained with length 1.
-//
-// Arguments:
-//	input: The tensor to reduce.
-//	axis: The dimensions to reduce. Must be in the range
-// `[-rank(input), rank(input))`.
-//
-// Returns The reduced tensor.
-func All(scope *Scope, input tf.Output, axis tf.Output, optional ...AllAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "All",
-		Input: []tf.Input{
-			input, axis,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Delete the TensorArray from its resource container.
-//
-// This enables the user to close and release the resource in the middle
-// of a step/run.
-//
-// Arguments:
-//	handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad).
-//
-// Returns the created operation.
-func TensorArrayCloseV3(scope *Scope, handle tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayCloseV3",
-		Input: []tf.Input{
-			handle,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// MaxPool3DGradGradAttr is an optional argument to MaxPool3DGradGrad.
-type MaxPool3DGradGradAttr func(optionalAttr)
-
-// MaxPool3DGradGradDataFormat sets the optional data_format attribute to value.
-//
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func MaxPool3DGradGradDataFormat(value string) MaxPool3DGradGradAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Computes second-order gradients of the maxpooling function.
-//
-// Arguments:
-//	orig_input: The original input tensor.
-//	orig_output: The original output tensor.
-//	grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
-//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
-// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
-//
-// Returns Gradients of gradients w.r.t. the input to `max_pool`.
-func MaxPool3DGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradGradAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MaxPool3DGradGrad",
-		Input: []tf.Input{
-			orig_input, orig_output, grad,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the number of tensors in the input tensor list.
-//
-// input_handle: the input list
-// length: the number of tensors in the list
-func TensorListLength(scope *Scope, input_handle tf.Output) (length tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorListLength",
-		Input: []tf.Input{
-			input_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// FusedBatchNormV3Attr is an optional argument to FusedBatchNormV3.
-type FusedBatchNormV3Attr func(optionalAttr)
-
-// FusedBatchNormV3Epsilon sets the optional epsilon attribute to value.
-//
-// value: A small float number added to the variance of x.
-// If not specified, defaults to 0.0001
-func FusedBatchNormV3Epsilon(value float32) FusedBatchNormV3Attr {
-	return func(m optionalAttr) {
-		m["epsilon"] = value
-	}
-}
-
-// FusedBatchNormV3DataFormat sets the optional data_format attribute to value.
-//
-// value: The data format for x and y. Either "NHWC" (default) or "NCHW".
-// If not specified, defaults to "NHWC"
-func FusedBatchNormV3DataFormat(value string) FusedBatchNormV3Attr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// FusedBatchNormV3IsTraining sets the optional is_training attribute to value.
-//
-// value: A bool value to indicate the operation is for training (default)
-// or inference.
-// If not specified, defaults to true
-func FusedBatchNormV3IsTraining(value bool) FusedBatchNormV3Attr {
-	return func(m optionalAttr) {
-		m["is_training"] = value
-	}
-}
-
-// Batch normalization.
-//
-// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
-// The size of 1D Tensors matches the dimension C of the 4D Tensors.
-//
-// Arguments:
-//	x: A 4D Tensor for input data.
-//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
-//	offset: A 1D Tensor for offset, to shift to the normalized x.
-//	mean: A 1D Tensor for population mean. Used for inference only;
-// must be empty for training.
-//	variance: A 1D Tensor for population variance. Used for inference only;
-// must be empty for training.
-//
-// Returns A 4D Tensor for output data.A 1D Tensor for the computed batch mean, to be used by TensorFlow
-// to compute the running mean.A 1D Tensor for the computed batch variance, to be used by
-// TensorFlow to compute the running variance.A 1D Tensor for the computed batch mean, to be reused
-// in the gradient computation.A 1D Tensor for the computed batch variance (inverted variance
-// in the cuDNN case), to be reused in the gradient computation.A 1D Tensor for some intermediate results, to be reused in the gradient
-// computation for better efficiency.
-func FusedBatchNormV3(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormV3Attr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, reserve_space_3 tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FusedBatchNormV3",
-		Input: []tf.Input{
-			x, scale, offset, mean, variance,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5)
-}
-
-// FusedBatchNormV2Attr is an optional argument to FusedBatchNormV2.
-type FusedBatchNormV2Attr func(optionalAttr)
-
-// FusedBatchNormV2Epsilon sets the optional epsilon attribute to value.
-//
-// value: A small float number added to the variance of x.
-// If not specified, defaults to 0.0001
-func FusedBatchNormV2Epsilon(value float32) FusedBatchNormV2Attr {
-	return func(m optionalAttr) {
-		m["epsilon"] = value
-	}
-}
-
-// FusedBatchNormV2DataFormat sets the optional data_format attribute to value.
-//
-// value: The data format for x and y. Either "NHWC" (default) or "NCHW".
-// If not specified, defaults to "NHWC"
-func FusedBatchNormV2DataFormat(value string) FusedBatchNormV2Attr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// FusedBatchNormV2IsTraining sets the optional is_training attribute to value.
-//
-// value: A bool value to indicate the operation is for training (default)
-// or inference.
-// If not specified, defaults to true
-func FusedBatchNormV2IsTraining(value bool) FusedBatchNormV2Attr {
-	return func(m optionalAttr) {
-		m["is_training"] = value
-	}
-}
-
-// Batch normalization.
-//
-// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
-// The size of 1D Tensors matches the dimension C of the 4D Tensors.
-//
-// Arguments:
-//	x: A 4D Tensor for input data.
-//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
-//	offset: A 1D Tensor for offset, to shift to the normalized x.
-//	mean: A 1D Tensor for population mean. Used for inference only;
-// must be empty for training.
-//	variance: A 1D Tensor for population variance. Used for inference only;
-// must be empty for training.
-//
-// Returns A 4D Tensor for output data.A 1D Tensor for the computed batch mean, to be used by TensorFlow
-// to compute the running mean.A 1D Tensor for the computed batch variance, to be used by
-// TensorFlow to compute the running variance.A 1D Tensor for the computed batch mean, to be reused
-// in the gradient computation.A 1D Tensor for the computed batch variance (inverted variance
-// in the cuDNN case), to be reused in the gradient computation.
-func FusedBatchNormV2(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormV2Attr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FusedBatchNormV2",
-		Input: []tf.Input{
-			x, scale, offset, mean, variance,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
-}
-
-// Gradients for batch normalization.
-//
-// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization()
-//
-// This op is deprecated. See `tf.nn.batch_normalization`.
-//
-// Arguments:
-//	t: A 4D input Tensor.
-//	m: A 1D mean Tensor with size matching the last dimension of t.
-// This is the first output from tf.nn.moments,
-// or a saved moving average thereof.
-//	v: A 1D variance Tensor with size matching the last dimension of t.
-// This is the second output from tf.nn.moments,
-// or a saved moving average thereof.
-//	gamma: A 1D gamma Tensor with size matching the last dimension of t.
-// If "scale_after_normalization" is true, this Tensor will be multiplied
-// with the normalized Tensor.
-//	backprop: 4D backprop Tensor.
-//	variance_epsilon: A small float number to avoid dividing by 0.
-//	scale_after_normalization: A bool indicating whether the resulted tensor
-// needs to be multiplied with gamma.
-//
-// Returns 4D backprop tensor for input.1D backprop tensor for mean.1D backprop tensor for variance.1D backprop tensor for beta.1D backprop tensor for gamma.
-func BatchNormWithGlobalNormalizationGrad(scope *Scope, t tf.Output, m tf.Output, v tf.Output, gamma tf.Output, backprop tf.Output, variance_epsilon float32, scale_after_normalization bool) (dx tf.Output, dm tf.Output, dv tf.Output, db tf.Output, dg tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization}
-	opspec := tf.OpSpec{
-		Type: "BatchNormWithGlobalNormalizationGrad",
-		Input: []tf.Input{
-			t, m, v, gamma, backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
-}
-
-// AvgPoolGradAttr is an optional argument to AvgPoolGrad.
-type AvgPoolGradAttr func(optionalAttr)
-
-// AvgPoolGradDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func AvgPoolGradDataFormat(value string) AvgPoolGradAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Computes gradients of the average pooling function.
-//
-// Arguments:
-//	orig_input_shape: 1-D.  Shape of the original input to `avg_pool`.
-//	grad: 4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t.
-// the output of `avg_pool`.
-//	ksize: The size of the sliding window for each dimension of the input.
-//	strides: The stride of the sliding window for each dimension of the input.
-//	padding: The type of padding algorithm to use.
-//
-// Returns 4-D.  Gradients w.r.t. the input of `avg_pool`.
-func AvgPoolGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolGradAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "AvgPoolGrad",
-		Input: []tf.Input{
-			orig_input_shape, grad,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that uses a custom thread pool to compute `input_dataset`.
-//
-// Arguments:
-//
-//	num_threads: Identifies the number of threads to use for the private threadpool.
-//
-//
-func PrivateThreadPoolDataset(scope *Scope, input_dataset tf.Output, num_threads tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "PrivateThreadPoolDataset",
-		Input: []tf.Input{
-			input_dataset, num_threads,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that batches and pads `batch_size` elements from the input.
-//
-// Arguments:
-//
-//	batch_size: A scalar representing the number of elements to accumulate in a
-// batch.
-//	padded_shapes: A list of int64 tensors representing the desired padded shapes
-// of the corresponding output components. These shapes may be partially
-// specified, using `-1` to indicate that a particular dimension should be
-// padded to the maximum size of all batch elements.
-//	padding_values: A list of scalars containing the padding value to use for
-// each of the outputs.
-//
-func PaddedBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, padded_shapes []tf.Output, padding_values []tf.Output, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "PaddedBatchDataset",
-		Input: []tf.Input{
-			input_dataset, batch_size, tf.OutputList(padded_shapes), tf.OutputList(padding_values),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Conv2DBackpropInputAttr is an optional argument to Conv2DBackpropInput.
-type Conv2DBackpropInputAttr func(optionalAttr)
-
-// Conv2DBackpropInputUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value.
-// If not specified, defaults to true
-func Conv2DBackpropInputUseCudnnOnGpu(value bool) Conv2DBackpropInputAttr {
-	return func(m optionalAttr) {
-		m["use_cudnn_on_gpu"] = value
-	}
-}
-
-// Conv2DBackpropInputExplicitPaddings sets the optional explicit_paddings attribute to value.
-//
-// value: If `padding` is `"EXPLICIT"`, the list of explicit padding amounts. For the ith
-// dimension, the amount of padding inserted before and after the dimension is
-// `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If
-// `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty.
-// If not specified, defaults to <>
-func Conv2DBackpropInputExplicitPaddings(value []int64) Conv2DBackpropInputAttr {
-	return func(m optionalAttr) {
-		m["explicit_paddings"] = value
-	}
-}
-
-// Conv2DBackpropInputDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func Conv2DBackpropInputDataFormat(value string) Conv2DBackpropInputAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Conv2DBackpropInputDilations sets the optional dilations attribute to value.
-//
-// value: 1-D tensor of length 4.  The dilation factor for each dimension of
-// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
-// element on that dimension. The dimension order is determined by the value of
-// `data_format`, see above for details. Dilations in the batch and depth
-// dimensions must be 1.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 >
-func Conv2DBackpropInputDilations(value []int64) Conv2DBackpropInputAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes the gradients of convolution with respect to the input.
-//
-// Arguments:
-//	input_sizes: An integer vector representing the shape of `input`,
-// where `input` is a 4-D `[batch, height, width, channels]` tensor.
-//	filter: 4-D with shape
-// `[filter_height, filter_width, in_channels, out_channels]`.
-//	out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`.
-// Gradients w.r.t. the output of the convolution.
-//	strides: The stride of the sliding window for each dimension of the input
-// of the convolution. Must be in the same order as the dimension specified with
-// format.
-//	padding: The type of padding algorithm to use.
-//
-// Returns 4-D with shape `[batch, in_height, in_width, in_channels]`.  Gradient
-// w.r.t. the input of the convolution.
-func Conv2DBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropInputAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Conv2DBackpropInput",
-		Input: []tf.Input{
-			input_sizes, filter, out_backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Does nothing. Only useful as a placeholder for control edges.
-//
-// Returns the created operation.
-func NoOp(scope *Scope) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "NoOp",
-	}
-	return scope.AddOperation(opspec)
-}
-
-// LSTMBlockCellAttr is an optional argument to LSTMBlockCell.
-type LSTMBlockCellAttr func(optionalAttr)
-
-// LSTMBlockCellForgetBias sets the optional forget_bias attribute to value.
-//
-// value: The forget gate bias.
-// If not specified, defaults to 1
-func LSTMBlockCellForgetBias(value float32) LSTMBlockCellAttr {
-	return func(m optionalAttr) {
-		m["forget_bias"] = value
-	}
-}
-
-// LSTMBlockCellCellClip sets the optional cell_clip attribute to value.
-//
-// value: Value to clip the 'cs' value to.
-// If not specified, defaults to 3
-func LSTMBlockCellCellClip(value float32) LSTMBlockCellAttr {
-	return func(m optionalAttr) {
-		m["cell_clip"] = value
-	}
-}
-
-// LSTMBlockCellUsePeephole sets the optional use_peephole attribute to value.
-//
-// value: Whether to use peephole weights.
-// If not specified, defaults to false
-func LSTMBlockCellUsePeephole(value bool) LSTMBlockCellAttr {
-	return func(m optionalAttr) {
-		m["use_peephole"] = value
-	}
-}
-
-// Computes the LSTM cell forward propagation for 1 time step.
-//
-// This implementation uses 1 weight matrix and 1 bias vector, and there's an
-// optional peephole connection.
-//
-// This kernel op implements the following mathematical equations:
-//
-// ```python
-// xh = [x, h_prev]
-// [i, f, ci, o] = xh * w + b
-// f = f + forget_bias
-//
-// if not use_peephole:
-//   wci = wcf = wco = 0
-//
-// i = sigmoid(cs_prev * wci + i)
-// f = sigmoid(cs_prev * wcf + f)
-// ci = tanh(ci)
-//
-// cs = ci .* i + cs_prev .* f
-// cs = clip(cs, cell_clip)
-//
-// o = sigmoid(cs * wco + o)
-// co = tanh(cs)
-// h = co .* o
-// ```
-//
-// Arguments:
-//	x: The input to the LSTM cell, shape (batch_size, num_inputs).
-//	cs_prev: Value of the cell state at previous time step.
-//	h_prev: Output of the previous cell at previous time step.
-//	w: The weight matrix.
-//	wci: The weight matrix for input gate peephole connection.
-//	wcf: The weight matrix for forget gate peephole connection.
-//	wco: The weight matrix for output gate peephole connection.
-//	b: The bias vector.
-//
-// Returns The input gate.The cell state before the tanh.The forget gate.The output gate.The cell input.The cell after the tanh.The output h vector.
-func LSTMBlockCell(scope *Scope, x tf.Output, cs_prev tf.Output, h_prev tf.Output, w tf.Output, wci tf.Output, wcf tf.Output, wco tf.Output, b tf.Output, optional ...LSTMBlockCellAttr) (i tf.Output, cs tf.Output, f tf.Output, o tf.Output, ci tf.Output, co tf.Output, h tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LSTMBlockCell",
-		Input: []tf.Input{
-			x, cs_prev, h_prev, w, wci, wcf, wco, b,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6)
-}
-
-// Transforms a serialized tensorflow.TensorProto proto into a Tensor.
-//
-// Arguments:
-//	serialized: A scalar string containing a serialized TensorProto proto.
-//	out_type: The type of the serialized tensor.  The provided type must match the
-// type of the serialized tensor and no implicit conversion will take place.
-//
-// Returns A Tensor of type `out_type`.
-func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"out_type": out_type}
-	opspec := tf.OpSpec{
-		Type: "ParseTensor",
-		Input: []tf.Input{
-			serialized,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ParseSequenceExampleAttr is an optional argument to ParseSequenceExample.
-type ParseSequenceExampleAttr func(optionalAttr)
-
-// ParseSequenceExampleNcontextSparse sets the optional Ncontext_sparse attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func ParseSequenceExampleNcontextSparse(value int64) ParseSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["Ncontext_sparse"] = value
-	}
-}
-
-// ParseSequenceExampleNcontextDense sets the optional Ncontext_dense attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func ParseSequenceExampleNcontextDense(value int64) ParseSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["Ncontext_dense"] = value
-	}
-}
-
-// ParseSequenceExampleNfeatureListSparse sets the optional Nfeature_list_sparse attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func ParseSequenceExampleNfeatureListSparse(value int64) ParseSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["Nfeature_list_sparse"] = value
-	}
-}
-
-// ParseSequenceExampleNfeatureListDense sets the optional Nfeature_list_dense attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func ParseSequenceExampleNfeatureListDense(value int64) ParseSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["Nfeature_list_dense"] = value
-	}
-}
-
-// ParseSequenceExampleContextSparseTypes sets the optional context_sparse_types attribute to value.
-//
-// value: A list of Ncontext_sparse types; the data types of data in
-// each context Feature given in context_sparse_keys.
-// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
-// DT_INT64 (Int64List), and DT_STRING (BytesList).
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func ParseSequenceExampleContextSparseTypes(value []tf.DataType) ParseSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["context_sparse_types"] = value
-	}
-}
-
-// ParseSequenceExampleFeatureListDenseTypes sets the optional feature_list_dense_types attribute to value.
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func ParseSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["feature_list_dense_types"] = value
-	}
-}
-
-// ParseSequenceExampleContextDenseShapes sets the optional context_dense_shapes attribute to value.
-//
-// value: A list of Ncontext_dense shapes; the shapes of data in
-// each context Feature given in context_dense_keys.
-// The number of elements in the Feature corresponding to context_dense_key[j]
-// must always equal context_dense_shapes[j].NumEntries().
-// The shape of context_dense_values[j] will match context_dense_shapes[j].
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func ParseSequenceExampleContextDenseShapes(value []tf.Shape) ParseSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["context_dense_shapes"] = value
-	}
-}
-
-// ParseSequenceExampleFeatureListSparseTypes sets the optional feature_list_sparse_types attribute to value.
-//
-// value: A list of Nfeature_list_sparse types; the data types
-// of data in each FeatureList given in feature_list_sparse_keys.
-// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
-// DT_INT64 (Int64List), and DT_STRING (BytesList).
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func ParseSequenceExampleFeatureListSparseTypes(value []tf.DataType) ParseSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["feature_list_sparse_types"] = value
-	}
-}
-
-// ParseSequenceExampleFeatureListDenseShapes sets the optional feature_list_dense_shapes attribute to value.
-//
-// value: A list of Nfeature_list_dense shapes; the shapes of
-// data in each FeatureList given in feature_list_dense_keys.
-// The shape of each Feature in the FeatureList corresponding to
-// feature_list_dense_key[j] must always equal
-// feature_list_dense_shapes[j].NumEntries().
-// If not specified, defaults to <>
-//
-// REQUIRES: len(value) >= 0
-func ParseSequenceExampleFeatureListDenseShapes(value []tf.Shape) ParseSequenceExampleAttr {
-	return func(m optionalAttr) {
-		m["feature_list_dense_shapes"] = value
-	}
-}
-
-// Transforms a vector of brain.SequenceExample protos (as strings) into typed tensors.
-//
-// Arguments:
-//	serialized: A vector containing binary serialized SequenceExample protos.
-//	debug_name: A vector containing the names of the serialized protos.
-// May contain, for example, table key (descriptive) name for the
-// corresponding serialized proto.  This is purely useful for debugging
-// purposes, and the presence of values here has no effect on the output.
-// May also be an empty vector if no name is available.
-//	context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty).
-// context_dense_defaults[j] provides default values
-// when the SequenceExample's context map lacks context_dense_key[j].
-// If an empty Tensor is provided for context_dense_defaults[j],
-// then the Feature context_dense_keys[j] is required.
-// The input type is inferred from context_dense_defaults[j], even when it's
-// empty.  If context_dense_defaults[j] is not empty, its shape must match
-// context_dense_shapes[j].
-//	feature_list_dense_missing_assumed_empty: A vector listing the
-// FeatureList keys which may be missing from the SequenceExamples.  If the
-// associated FeatureList is missing, it is treated as empty.  By default,
-// any FeatureList not listed in this vector must exist in the SequenceExamples.
-//	context_sparse_keys: A list of Ncontext_sparse string Tensors (scalars).
-// The keys expected in the Examples' features associated with context_sparse
-// values.
-//	context_dense_keys: A list of Ncontext_dense string Tensors (scalars).
-// The keys expected in the SequenceExamples' context features associated with
-// dense values.
-//	feature_list_sparse_keys: A list of Nfeature_list_sparse string Tensors
-// (scalars).  The keys expected in the FeatureLists associated with sparse
-// values.
-//	feature_list_dense_keys: A list of Nfeature_list_dense string Tensors (scalars).
-// The keys expected in the SequenceExamples' feature_lists associated
-// with lists of dense values.
-func ParseSequenceExample(scope *Scope, serialized tf.Output, debug_name tf.Output, context_dense_defaults []tf.Output, feature_list_dense_missing_assumed_empty []string, context_sparse_keys []string, context_dense_keys []string, feature_list_sparse_keys []string, feature_list_dense_keys []string, optional ...ParseSequenceExampleAttr) (context_sparse_indices []tf.Output, context_sparse_values []tf.Output, context_sparse_shapes []tf.Output, context_dense_values []tf.Output, feature_list_sparse_indices []tf.Output, feature_list_sparse_values []tf.Output, feature_list_sparse_shapes []tf.Output, feature_list_dense_values []tf.Output, feature_list_dense_lengths []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"feature_list_dense_missing_assumed_empty": feature_list_dense_missing_assumed_empty, "context_sparse_keys": context_sparse_keys, "context_dense_keys": context_dense_keys, "feature_list_sparse_keys": feature_list_sparse_keys, "feature_list_dense_keys": feature_list_dense_keys}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ParseSequenceExample",
-		Input: []tf.Input{
-			serialized, debug_name, tf.OutputList(context_dense_defaults),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if context_sparse_indices, idx, err = makeOutputList(op, idx, "context_sparse_indices"); err != nil {
-		scope.UpdateErr("ParseSequenceExample", err)
-		return
-	}
-	if context_sparse_values, idx, err = makeOutputList(op, idx, "context_sparse_values"); err != nil {
-		scope.UpdateErr("ParseSequenceExample", err)
-		return
-	}
-	if context_sparse_shapes, idx, err = makeOutputList(op, idx, "context_sparse_shapes"); err != nil {
-		scope.UpdateErr("ParseSequenceExample", err)
-		return
-	}
-	if context_dense_values, idx, err = makeOutputList(op, idx, "context_dense_values"); err != nil {
-		scope.UpdateErr("ParseSequenceExample", err)
-		return
-	}
-	if feature_list_sparse_indices, idx, err = makeOutputList(op, idx, "feature_list_sparse_indices"); err != nil {
-		scope.UpdateErr("ParseSequenceExample", err)
-		return
-	}
-	if feature_list_sparse_values, idx, err = makeOutputList(op, idx, "feature_list_sparse_values"); err != nil {
-		scope.UpdateErr("ParseSequenceExample", err)
-		return
-	}
-	if feature_list_sparse_shapes, idx, err = makeOutputList(op, idx, "feature_list_sparse_shapes"); err != nil {
-		scope.UpdateErr("ParseSequenceExample", err)
-		return
-	}
-	if feature_list_dense_values, idx, err = makeOutputList(op, idx, "feature_list_dense_values"); err != nil {
-		scope.UpdateErr("ParseSequenceExample", err)
-		return
-	}
-	if feature_list_dense_lengths, idx, err = makeOutputList(op, idx, "feature_list_dense_lengths"); err != nil {
-		scope.UpdateErr("ParseSequenceExample", err)
-		return
-	}
-	return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values, feature_list_dense_lengths
-}
-
-// BiasAddAttr is an optional argument to BiasAdd.
-type BiasAddAttr func(optionalAttr)
-
-// BiasAddDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the bias tensor will be added to the last dimension
-// of the value tensor.
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// The tensor will be added to "in_channels", the third-to-the-last
-//     dimension.
-// If not specified, defaults to "NHWC"
-func BiasAddDataFormat(value string) BiasAddAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Adds `bias` to `value`.
-//
-// This is a special case of `tf.add` where `bias` is restricted to be 1-D.
-// Broadcasting is supported, so `value` may have any number of dimensions.
-//
-// Arguments:
-//	value: Any number of dimensions.
-//	bias: 1-D with size the last dimension of `value`.
-//
-// Returns Broadcasted sum of `value` and `bias`.
-func BiasAdd(scope *Scope, value tf.Output, bias tf.Output, optional ...BiasAddAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "BiasAdd",
-		Input: []tf.Input{
-			value, bias,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// 2D real-valued fast Fourier transform.
-//
-// Computes the 2-dimensional discrete Fourier transform of a real-valued signal
-// over the inner-most 2 dimensions of `input`.
-//
-// Since the DFT of a real signal is Hermitian-symmetric, `RFFT2D` only returns the
-// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension
-// of `output`: the zero-frequency term, followed by the `fft_length / 2`
-// positive-frequency terms.
-//
-// Along each axis `RFFT2D` is computed on, if `fft_length` is smaller than the
-// corresponding dimension of `input`, the dimension is cropped. If it is larger,
-// the dimension is padded with zeros.
-//
-// Arguments:
-//	input: A float32 tensor.
-//	fft_length: An int32 tensor of shape [2]. The FFT length for each dimension.
-//
-// Returns A complex64 tensor of the same rank as `input`. The inner-most 2
-//   dimensions of `input` are replaced with their 2D Fourier transform. The
-//   inner-most dimension contains `fft_length / 2 + 1` unique frequency
-//   components.
-//
-// @compatibility(numpy)
-// Equivalent to np.fft.rfft2
-// @end_compatibility
-func RFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "RFFT2D",
-		Input: []tf.Input{
-			input, fft_length,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Transforms a vector of brain.Example protos (as strings) into typed tensors.
-//
-// Arguments:
-//	serialized: A vector containing a batch of binary serialized Example protos.
-//	names: A vector containing the names of the serialized protos.
-// May contain, for example, table key (descriptive) names for the
-// corresponding serialized protos.  These are purely useful for debugging
-// purposes, and the presence of values here has no effect on the output.
-// May also be an empty vector if no names are available.
-// If non-empty, this vector must be the same length as "serialized".
-//	sparse_keys: A list of Nsparse string Tensors (scalars).
-// The keys expected in the Examples' features associated with sparse values.
-//	dense_keys: A list of Ndense string Tensors (scalars).
-// The keys expected in the Examples' features associated with dense values.
-//	dense_defaults: A list of Ndense Tensors (some may be empty).
-// dense_defaults[j] provides default values
-// when the example's feature_map lacks dense_key[j].  If an empty Tensor is
-// provided for dense_defaults[j], then the Feature dense_keys[j] is required.
-// The input type is inferred from dense_defaults[j], even when it's empty.
-// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined,
-// then the shape of dense_defaults[j] must match that of dense_shapes[j].
-// If dense_shapes[j] has an undefined major dimension (variable strides dense
-// feature), dense_defaults[j] must contain a single element:
-// the padding element.
-//	sparse_types: A list of Nsparse types; the data types of data in each Feature
-// given in sparse_keys.
-// Currently the ParseExample supports DT_FLOAT (FloatList),
-// DT_INT64 (Int64List), and DT_STRING (BytesList).
-//	dense_shapes: A list of Ndense shapes; the shapes of data in each Feature
-// given in dense_keys.
-// The number of elements in the Feature corresponding to dense_key[j]
-// must always equal dense_shapes[j].NumEntries().
-// If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output
-// Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN):
-// The dense outputs are just the inputs row-stacked by batch.
-// This works for dense_shapes[j] = (-1, D1, ..., DN).  In this case
-// the shape of the output Tensor dense_values[j] will be
-// (|serialized|, M, D1, .., DN), where M is the maximum number of blocks
-// of elements of length D1 * .... * DN, across all minibatch entries
-// in the input.  Any minibatch entry with less than M blocks of elements of
-// length D1 * ... * DN will be padded with the corresponding default_value
-// scalar element along the second dimension.
-func ParseExample(scope *Scope, serialized tf.Output, names tf.Output, sparse_keys []tf.Output, dense_keys []tf.Output, dense_defaults []tf.Output, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"sparse_types": sparse_types, "dense_shapes": dense_shapes}
-	opspec := tf.OpSpec{
-		Type: "ParseExample",
-		Input: []tf.Input{
-			serialized, names, tf.OutputList(sparse_keys), tf.OutputList(dense_keys), tf.OutputList(dense_defaults),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil {
-		scope.UpdateErr("ParseExample", err)
-		return
-	}
-	if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil {
-		scope.UpdateErr("ParseExample", err)
-		return
-	}
-	if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil {
-		scope.UpdateErr("ParseExample", err)
-		return
-	}
-	if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil {
-		scope.UpdateErr("ParseExample", err)
-		return
-	}
-	return sparse_indices, sparse_values, sparse_shapes, dense_values
-}
-
-// Deserializes a proto into the tree handle
-//
-// Arguments:
-//	tree_handle: Handle to the tree resource to be restored.
-//	tree_config: Serialied proto string of the boosted_trees.Tree proto.
-//
-// Returns the created operation.
-func TensorForestTreeDeserialize(scope *Scope, tree_handle tf.Output, tree_config tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorForestTreeDeserialize",
-		Input: []tf.Input{
-			tree_handle, tree_config,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Decodes a `variant` Tensor into a `RaggedTensor`.
-//
-// Decodes the given `variant` Tensor and returns a `RaggedTensor`. The input
-// could be a scalar, meaning it encodes a single `RaggedTensor` with ragged_rank
-// `output_ragged_rank`. It could also have an arbitrary rank, in which case each
-// element is decoded into a `RaggedTensor` with ragged_rank `input_ragged_rank`
-// and these are then stacked according to the input shape to output a single
-// `RaggedTensor` with ragged_rank `output_ragged_rank`. Each `variant` element in
-// the input Tensor is decoded by retrieving from the element a 1-D `variant`
-// Tensor with `input_ragged_rank + 1` Tensors, corresponding to the splits and
-// values of the decoded `RaggedTensor`. If `input_ragged_rank` is -1, then it is
-// inferred as `output_ragged_rank` - `rank(encoded_ragged)`. See
-// `RaggedTensorToVariant` for the corresponding encoding logic.
-//
-//
-// Arguments:
-//	encoded_ragged: A `variant` Tensor containing encoded `RaggedTensor`s.
-//	input_ragged_rank: The ragged rank of each encoded `RaggedTensor` component in the input. If set to
-// -1, this is inferred as `output_ragged_rank` - `rank(encoded_ragged)`
-//	output_ragged_rank: The expected ragged rank of the output `RaggedTensor`. The following must hold:
-// `output_ragged_rank = rank(encoded_ragged) + input_ragged_rank`.
-//
-//
-//
-// Returns A list of one or more Tensors representing the splits of the output
-// `RaggedTensor`.A Tensor representing the values of the output `RaggedTensor`.
-func RaggedTensorFromVariant(scope *Scope, encoded_ragged tf.Output, input_ragged_rank int64, output_ragged_rank int64, Tvalues tf.DataType, Tsplits tf.DataType) (output_nested_splits []tf.Output, output_dense_values tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"input_ragged_rank": input_ragged_rank, "output_ragged_rank": output_ragged_rank, "Tvalues": Tvalues, "Tsplits": Tsplits}
-	opspec := tf.OpSpec{
-		Type: "RaggedTensorFromVariant",
-		Input: []tf.Input{
-			encoded_ragged,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output_nested_splits, idx, err = makeOutputList(op, idx, "output_nested_splits"); err != nil {
-		scope.UpdateErr("RaggedTensorFromVariant", err)
-		return
-	}
-	output_dense_values = op.Output(idx)
-	return output_nested_splits, output_dense_values
-}
-
-// ResourceSparseApplyKerasMomentumAttr is an optional argument to ResourceSparseApplyKerasMomentum.
-type ResourceSparseApplyKerasMomentumAttr func(optionalAttr)
-
-// ResourceSparseApplyKerasMomentumUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceSparseApplyKerasMomentumUseLocking(value bool) ResourceSparseApplyKerasMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// ResourceSparseApplyKerasMomentumUseNesterov sets the optional use_nesterov attribute to value.
-//
-// value: If `True`, the tensor passed to compute grad will be
-// var + momentum * accum, so in the end, the var you get is actually
-// var + momentum * accum.
-// If not specified, defaults to false
-func ResourceSparseApplyKerasMomentumUseNesterov(value bool) ResourceSparseApplyKerasMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_nesterov"] = value
-	}
-}
-
-// Update relevant entries in '*var' and '*accum' according to the momentum scheme.
-//
-// Set use_nesterov = True if you want to use Nesterov momentum.
-//
-// That is for rows we have grad for, we update var and accum as follows:
-//
-// accum = accum * momentum - lr * grad
-// var += accum
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Learning rate. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//	momentum: Momentum. Must be a scalar.
-//
-// Returns the created operation.
-func ResourceSparseApplyKerasMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyKerasMomentumAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyKerasMomentum",
-		Input: []tf.Input{
-			var_, accum, lr, grad, indices, momentum,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Creates a dataset that shuffles and repeats elements from `input_dataset`
-//
-// pseudorandomly.
-//
-// Arguments:
-//
-//	buffer_size: The number of output elements to buffer in an iterator over
-// this dataset. Compare with the `min_after_dequeue` attr when creating a
-// `RandomShuffleQueue`.
-//	seed: A scalar seed for the random number generator. If either `seed` or
-// `seed2` is set to be non-zero, the random number generator is seeded
-// by the given seed.  Otherwise, a random seed is used.
-//	seed2: A second scalar seed to avoid seed collision.
-//	count: A scalar representing the number of times the underlying dataset
-// should be repeated. The default is `-1`, which results in infinite repetition.
-//
-//
-func ShuffleAndRepeatDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, seed tf.Output, seed2 tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ShuffleAndRepeatDataset",
-		Input: []tf.Input{
-			input_dataset, buffer_size, seed, seed2, count,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a TensorList which, when stacked, has the value of `tensor`.
-//
-// Each tensor in the result list corresponds to one row of the input tensor.
-//
-// tensor: The input tensor.
-// output_handle: The list.
-func TensorListFromTensor(scope *Scope, tensor tf.Output, element_shape tf.Output) (output_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorListFromTensor",
-		Input: []tf.Input{
-			tensor, element_shape,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RandomPoissonV2Attr is an optional argument to RandomPoissonV2.
-type RandomPoissonV2Attr func(optionalAttr)
-
-// RandomPoissonV2Seed sets the optional seed attribute to value.
-//
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func RandomPoissonV2Seed(value int64) RandomPoissonV2Attr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// RandomPoissonV2Seed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomPoissonV2Seed2(value int64) RandomPoissonV2Attr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// RandomPoissonV2Dtype sets the optional dtype attribute to value.
-// If not specified, defaults to DT_INT64
-func RandomPoissonV2Dtype(value tf.DataType) RandomPoissonV2Attr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Outputs random values from the Poisson distribution(s) described by rate.
-//
-// This op uses two algorithms, depending on rate. If rate >= 10, then
-// the algorithm by Hormann is used to acquire samples via
-// transformation-rejection.
-// See http://www.sciencedirect.com/science/article/pii/0167668793909974.
-//
-// Otherwise, Knuth's algorithm is used to acquire samples via multiplying uniform
-// random variables.
-// See Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer
-// Programming, Volume 2. Addison Wesley
-//
-// Arguments:
-//	shape: 1-D integer tensor. Shape of independent samples to draw from each
-// distribution described by the shape parameters given in rate.
-//	rate: A tensor in which each scalar is a "rate" parameter describing the
-// associated poisson distribution.
-//
-// Returns A tensor with shape `shape + shape(rate)`. Each slice
-// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for
-// `rate[i0, i1, ...iN]`.
-func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonV2Attr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RandomPoissonV2",
-		Input: []tf.Input{
-			shape, rate,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the derivative of a Gamma random sample w.r.t. `alpha`.
-func RandomGammaGrad(scope *Scope, alpha tf.Output, sample tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "RandomGammaGrad",
-		Input: []tf.Input{
-			alpha, sample,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Conv3DBackpropFilterAttr is an optional argument to Conv3DBackpropFilter.
-type Conv3DBackpropFilterAttr func(optionalAttr)
-
-// Conv3DBackpropFilterDilations sets the optional dilations attribute to value.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 i:1 >
-func Conv3DBackpropFilterDilations(value []int64) Conv3DBackpropFilterAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes the gradients of 3-D convolution with respect to the filter.
-//
-// DEPRECATED at GraphDef version 10: Use Conv3DBackpropFilterV2
-//
-// Arguments:
-//	input: Shape `[batch, depth, rows, cols, in_channels]`.
-//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
-// `in_channels` must match between `input` and `filter`.
-//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
-// out_channels]`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
-func Conv3DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropFilterAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Conv3DBackpropFilter",
-		Input: []tf.Input{
-			input, filter, out_backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SdcaOptimizerAttr is an optional argument to SdcaOptimizer.
-type SdcaOptimizerAttr func(optionalAttr)
-
-// SdcaOptimizerAdaptative sets the optional adaptative attribute to value.
-//
-// value: Whether to use Adaptive SDCA for the inner loop.
-// If not specified, defaults to true
-func SdcaOptimizerAdaptative(value bool) SdcaOptimizerAttr {
-	return func(m optionalAttr) {
-		m["adaptative"] = value
-	}
-}
-
-// Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for
-//
-// linear models with L1 + L2 regularization. As global optimization objective is
-// strongly-convex, the optimizer optimizes the dual objective at each step. The
-// optimizer applies each update one example at a time. Examples are sampled
-// uniformly, and the optimizer is learning rate free and enjoys linear convergence
-// rate.
-//
-// [Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).<br>
-// Shai Shalev-Shwartz, Tong Zhang. 2012
-//
-// $$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$
-//
-// [Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).<br>
-// Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan,
-// Peter Richtarik, Martin Takac. 2015
-//
-// [Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).<br>
-// Dominik Csiba, Zheng Qu, Peter Richtarik. 2015
-//
-// Arguments:
-//	sparse_example_indices: a list of vectors which contain example indices.
-//	sparse_feature_indices: a list of vectors which contain feature indices.
-//	sparse_feature_values: a list of vectors which contains feature value
-// associated with each feature group.
-//	dense_features: a list of matrices which contains the dense feature values.
-//	example_weights: a vector which contains the weight associated with each
-// example.
-//	example_labels: a vector which contains the label/target associated with each
-// example.
-//	sparse_indices: a list of vectors where each value is the indices which has
-// corresponding weights in sparse_weights. This field maybe omitted for the
-// dense approach.
-//	sparse_weights: a list of vectors where each value is the weight associated with
-// a sparse feature group.
-//	dense_weights: a list of vectors where the values are the weights associated
-// with a dense feature group.
-//	example_state_data: a list of vectors containing the example state data.
-//	loss_type: Type of the primal loss. Currently SdcaSolver supports logistic,
-// squared and hinge losses.
-//	l1: Symmetric l1 regularization strength.
-//	l2: Symmetric l2 regularization strength.
-//	num_loss_partitions: Number of partitions of the global loss function.
-//	num_inner_iterations: Number of iterations per mini-batch.
-//
-// Returns a list of vectors containing the updated example state
-// data.a list of vectors where each value is the delta
-// weights associated with a sparse feature group.a list of vectors where the values are the delta
-// weights associated with a dense feature group.
-func SdcaOptimizer(scope *Scope, sparse_example_indices []tf.Output, sparse_feature_indices []tf.Output, sparse_feature_values []tf.Output, dense_features []tf.Output, example_weights tf.Output, example_labels tf.Output, sparse_indices []tf.Output, sparse_weights []tf.Output, dense_weights []tf.Output, example_state_data tf.Output, loss_type string, l1 float32, l2 float32, num_loss_partitions int64, num_inner_iterations int64, optional ...SdcaOptimizerAttr) (out_example_state_data tf.Output, out_delta_sparse_weights []tf.Output, out_delta_dense_weights []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"loss_type": loss_type, "l1": l1, "l2": l2, "num_loss_partitions": num_loss_partitions, "num_inner_iterations": num_inner_iterations}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SdcaOptimizer",
-		Input: []tf.Input{
-			tf.OutputList(sparse_example_indices), tf.OutputList(sparse_feature_indices), tf.OutputList(sparse_feature_values), tf.OutputList(dense_features), example_weights, example_labels, tf.OutputList(sparse_indices), tf.OutputList(sparse_weights), tf.OutputList(dense_weights), example_state_data,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	out_example_state_data = op.Output(idx)
-	if out_delta_sparse_weights, idx, err = makeOutputList(op, idx, "out_delta_sparse_weights"); err != nil {
-		scope.UpdateErr("SdcaOptimizer", err)
-		return
-	}
-	if out_delta_dense_weights, idx, err = makeOutputList(op, idx, "out_delta_dense_weights"); err != nil {
-		scope.UpdateErr("SdcaOptimizer", err)
-		return
-	}
-	return out_example_state_data, out_delta_sparse_weights, out_delta_dense_weights
-}
-
-// FractionalAvgPoolGradAttr is an optional argument to FractionalAvgPoolGrad.
-type FractionalAvgPoolGradAttr func(optionalAttr)
-
-// FractionalAvgPoolGradOverlapping sets the optional overlapping attribute to value.
-//
-// value: When set to True, it means when pooling, the values at the boundary
-// of adjacent pooling cells are used by both cells. For example:
-//
-// `index  0  1  2  3  4`
-//
-// `value  20 5  16 3  7`
-//
-// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
-// The result would be [41/3, 26/3] for fractional avg pooling.
-// If not specified, defaults to false
-func FractionalAvgPoolGradOverlapping(value bool) FractionalAvgPoolGradAttr {
-	return func(m optionalAttr) {
-		m["overlapping"] = value
-	}
-}
-
-// Computes gradient of the FractionalAvgPool function.
-//
-// Unlike FractionalMaxPoolGrad, we don't need to find arg_max for
-// FractionalAvgPoolGrad, we just need to evenly back-propagate each element of
-// out_backprop to those indices that form the same pooling cell. Therefore, we
-// just need to know the shape of original input tensor, instead of the whole
-// tensor.
-//
-// Arguments:
-//	orig_input_tensor_shape: Original input tensor shape for `fractional_avg_pool`
-//	out_backprop: 4-D with shape `[batch, height, width, channels]`.  Gradients
-// w.r.t. the output of `fractional_avg_pool`.
-//	row_pooling_sequence: row pooling sequence, form pooling region with
-// col_pooling_sequence.
-//	col_pooling_sequence: column pooling sequence, form pooling region with
-// row_pooling sequence.
-//
-// Returns 4-D.  Gradients w.r.t. the input of `fractional_avg_pool`.
-func FractionalAvgPoolGrad(scope *Scope, orig_input_tensor_shape tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalAvgPoolGradAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FractionalAvgPoolGrad",
-		Input: []tf.Input{
-			orig_input_tensor_shape, out_backprop, row_pooling_sequence, col_pooling_sequence,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ParameterizedTruncatedNormalAttr is an optional argument to ParameterizedTruncatedNormal.
-type ParameterizedTruncatedNormalAttr func(optionalAttr)
-
-// ParameterizedTruncatedNormalSeed sets the optional seed attribute to value.
-//
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func ParameterizedTruncatedNormalSeed(value int64) ParameterizedTruncatedNormalAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// ParameterizedTruncatedNormalSeed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func ParameterizedTruncatedNormalSeed2(value int64) ParameterizedTruncatedNormalAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Outputs random values from a normal distribution. The parameters may each be a
-//
-// scalar which applies to the entire output, or a vector of length shape[0] which
-// stores the parameters for each batch.
-//
-// Arguments:
-//	shape: The shape of the output tensor. Batches are indexed by the 0th dimension.
-//	means: The mean parameter of each batch.
-//	stdevs: The standard deviation parameter of each batch. Must be greater than 0.
-//	minvals: The minimum cutoff. May be -infinity.
-//	maxvals: The maximum cutoff. May be +infinity, and must be more than the minval
-// for each batch.
-//
-// Returns A matrix of shape num_batches x samples_per_batch, filled with random
-// truncated normal values using the parameters for each row.
-func ParameterizedTruncatedNormal(scope *Scope, shape tf.Output, means tf.Output, stdevs tf.Output, minvals tf.Output, maxvals tf.Output, optional ...ParameterizedTruncatedNormalAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ParameterizedTruncatedNormal",
-		Input: []tf.Input{
-			shape, means, stdevs, minvals, maxvals,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the reverse mode backpropagated gradient of the Cholesky algorithm.
-//
-// For an explanation see "Differentiation of the Cholesky algorithm" by
-// Iain Murray http://arxiv.org/abs/1602.07527.
-//
-// Arguments:
-//	l: Output of batch Cholesky algorithm l = cholesky(A). Shape is `[..., M, M]`.
-// Algorithm depends only on lower triangular part of the innermost matrices of
-// this tensor.
-//	grad: df/dl where f is some scalar function. Shape is `[..., M, M]`.
-// Algorithm depends only on lower triangular part of the innermost matrices of
-// this tensor.
-//
-// Returns Symmetrized version of df/dA . Shape is `[..., M, M]`
-func CholeskyGrad(scope *Scope, l tf.Output, grad tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "CholeskyGrad",
-		Input: []tf.Input{
-			l, grad,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes softmax activations.
-//
-// For each batch `i` and class `j` we have
-//
-//     $$softmax[i, j] = exp(logits[i, j]) / sum_j(exp(logits[i, j]))$$
-//
-// Arguments:
-//	logits: 2-D with shape `[batch_size, num_classes]`.
-//
-// Returns Same shape as `logits`.
-func Softmax(scope *Scope, logits tf.Output) (softmax tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Softmax",
-		Input: []tf.Input{
-			logits,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the gradient for the inverse of `x` wrt its input.
-//
-// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy`
-// is the corresponding input gradient.
-func InvGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "InvGrad",
-		Input: []tf.Input{
-			y, dy,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// TensorArrayConcatV2Attr is an optional argument to TensorArrayConcatV2.
-type TensorArrayConcatV2Attr func(optionalAttr)
-
-// TensorArrayConcatV2ElementShapeExcept0 sets the optional element_shape_except0 attribute to value.
-// If not specified, defaults to <unknown_rank:true >
-func TensorArrayConcatV2ElementShapeExcept0(value tf.Shape) TensorArrayConcatV2Attr {
-	return func(m optionalAttr) {
-		m["element_shape_except0"] = value
-	}
-}
-
-// Deprecated. Use TensorArrayConcatV3
-func TensorArrayConcatV2(scope *Scope, handle tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayConcatV2Attr) (value tf.Output, lengths tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayConcatV2",
-		Input: []tf.Input{
-			handle, flow_in,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// RandomStandardNormalAttr is an optional argument to RandomStandardNormal.
-type RandomStandardNormalAttr func(optionalAttr)
-
-// RandomStandardNormalSeed sets the optional seed attribute to value.
-//
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func RandomStandardNormalSeed(value int64) RandomStandardNormalAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// RandomStandardNormalSeed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomStandardNormalSeed2(value int64) RandomStandardNormalAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Outputs random values from a normal distribution.
-//
-// The generated values will have mean 0 and standard deviation 1.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	dtype: The type of the output.
-//
-// Returns A tensor of the specified shape filled with random normal values.
-func RandomStandardNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomStandardNormalAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RandomStandardNormal",
-		Input: []tf.Input{
-			shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the LSTM cell backward propagation for 1 timestep.
-//
-// This implementation is to be used in conjunction of LSTMBlockCell.
-//
-// Arguments:
-//	x: The input to the LSTM cell, shape (batch_size, num_inputs).
-//	cs_prev: The previous cell state.
-//	h_prev: The previous h state.
-//	w: The weight matrix.
-//	wci: The weight matrix for input gate peephole connection.
-//	wcf: The weight matrix for forget gate peephole connection.
-//	wco: The weight matrix for output gate peephole connection.
-//	b: The bias vector.
-//	i: The input gate.
-//	cs: The cell state before the tanh.
-//	f: The forget gate.
-//	o: The output gate.
-//	ci: The cell input.
-//	co: The cell after the tanh.
-//	cs_grad: The current gradient of cs.
-//	h_grad: The gradient of h vector.
-//	use_peephole: Whether the cell uses peephole connections.
-//
-// Returns The gradient of cs to be back-propped.The derivative wrt to [i, cs, f, o].The gradient for wci to be back-propped.The gradient for wcf to be back-propped.The gradient for wco to be back-propped.
-func LSTMBlockCellGrad(scope *Scope, x tf.Output, cs_prev tf.Output, h_prev tf.Output, w tf.Output, wci tf.Output, wcf tf.Output, wco tf.Output, b tf.Output, i tf.Output, cs tf.Output, f tf.Output, o tf.Output, ci tf.Output, co tf.Output, cs_grad tf.Output, h_grad tf.Output, use_peephole bool) (cs_prev_grad tf.Output, dicfo tf.Output, wci_grad tf.Output, wcf_grad tf.Output, wco_grad tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"use_peephole": use_peephole}
-	opspec := tf.OpSpec{
-		Type: "LSTMBlockCellGrad",
-		Input: []tf.Input{
-			x, cs_prev, h_prev, w, wci, wcf, wco, b, i, cs, f, o, ci, co, cs_grad, h_grad,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
-}
-
-// ResourceScatterNdUpdateAttr is an optional argument to ResourceScatterNdUpdate.
-type ResourceScatterNdUpdateAttr func(optionalAttr)
-
-// ResourceScatterNdUpdateUseLocking sets the optional use_locking attribute to value.
-//
-// value: An optional bool. Defaults to True. If True, the assignment will
-// be protected by a lock; otherwise the behavior is undefined,
-// but may exhibit less contention.
-// If not specified, defaults to true
-func ResourceScatterNdUpdateUseLocking(value bool) ResourceScatterNdUpdateAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Applies sparse `updates` to individual values or slices within a given
-//
-// variable according to `indices`.
-//
-// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
-//
-// `indices` must be integer tensor, containing indices into `ref`.
-// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
-//
-// The innermost dimension of `indices` (with length `K`) corresponds to
-// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
-// dimension of `ref`.
-//
-// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
-//
-// ```
-// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
-// ```
-//
-// For example, say we want to update 4 scattered elements to a rank-1 tensor to
-// 8 elements. In Python, that update would look like this:
-//
-// ```python
-//     ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8])
-//     indices = tf.constant([[4], [3], [1] ,[7]])
-//     updates = tf.constant([9, 10, 11, 12])
-//     update = tf.scatter_nd_update(ref, indices, updates)
-//     with tf.Session() as sess:
-//       print sess.run(update)
-// ```
-//
-// The resulting update to ref would look like this:
-//
-//     [1, 11, 3, 10, 9, 6, 7, 12]
-//
-// See `tf.scatter_nd` for more details about how to make updates to
-// slices.
-//
-// Arguments:
-//	ref: A resource handle. Must be from a VarHandleOp.
-//	indices: A Tensor. Must be one of the following types: int32, int64.
-// A tensor of indices into ref.
-//	updates: A Tensor. Must have the same type as ref. A tensor of updated
-// values to add to ref.
-//
-// Returns the created operation.
-func ResourceScatterNdUpdate(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdUpdateAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceScatterNdUpdate",
-		Input: []tf.Input{
-			ref, indices, updates,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Aggregates the summary of accumulated stats for the batch.
-//
-// The summary stats contains gradients and hessians accumulated for each node, bucket and dimension id.
-//
-// Arguments:
-//	node_ids: int32; Rank 1 Tensor containing node ids for each example, shape [batch_size].
-//	gradients: float32; Rank 2 Tensor (shape=[batch_size, logits_dimension]) with gradients for each example.
-//	hessians: float32; Rank 2 Tensor (shape=[batch_size, hessian_dimension]) with hessians for each example.
-//	feature_indices: int32; Rank 2 indices of feature sparse Tensors (shape=[number of sparse entries, 2]).
-// Number of sparse entries across all instances from the batch. The first value is
-// the index of the instance, the second is dimension of the feature. The second axis
-// can only have 2 values, i.e., the input dense version of Tensor can only be matrix.
-//	feature_values: int32; Rank 1 values of feature sparse Tensors (shape=[number of sparse entries]).
-// Number of sparse entries across all instances from the batch. The first value is
-// the index of the instance, the second is dimension of the feature.
-//	feature_shape: int32; Rank 1 dense shape of feature sparse Tensors (shape=[2]).
-// The first axis can only have 2 values, [batch_size, feature_dimension].
-//	max_splits: int; the maximum number of splits possible in the whole tree.
-//	num_buckets: int; equals to the maximum possible value of bucketized feature + 1.
-//
-// Returns int32; Rank 2 indices of summary sparse Tensors (shape=[number of non zero statistics, 4])
-// The second axis can only be 4 including node id, feature dimension, bucket id, and statistics_dimension.
-// statistics_dimension = logits_dimension + hessian_dimension.output Rank 1 Tensor (shape=[number of non zero statistics])output Rank 1 Tensor (shape=[4])
-// The tensor has following 4 values: [max_splits, feature_dimension, num_buckets, statistics_dimension],
-// where statistics_dimension = gradient_dimension + hessian_dimension. gradient_dimension
-// is the same as label_dimension, i.e., the output space. hessian_dimension can be the same
-// as logits dimension when diagonal hessian is used, or label_dimension^2 when full
-// hessian is used.
-func BoostedTreesSparseAggregateStats(scope *Scope, node_ids tf.Output, gradients tf.Output, hessians tf.Output, feature_indices tf.Output, feature_values tf.Output, feature_shape tf.Output, max_splits int64, num_buckets int64) (stats_summary_indices tf.Output, stats_summary_values tf.Output, stats_summary_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"max_splits": max_splits, "num_buckets": num_buckets}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesSparseAggregateStats",
-		Input: []tf.Input{
-			node_ids, gradients, hessians, feature_indices, feature_values, feature_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Computes the GRU cell back-propagation for 1 time step.
-//
-// Args
-//     x: Input to the GRU cell.
-//     h_prev: State input from the previous GRU cell.
-//     w_ru: Weight matrix for the reset and update gate.
-//     w_c: Weight matrix for the cell connection gate.
-//     b_ru: Bias vector for the reset and update gate.
-//     b_c: Bias vector for the cell connection gate.
-//     r: Output of the reset gate.
-//     u: Output of the update gate.
-//     c: Output of the cell connection gate.
-//     d_h: Gradients of the h_new wrt to objective function.
-//
-// Returns
-//     d_x: Gradients of the x wrt to objective function.
-//     d_h_prev: Gradients of the h wrt to objective function.
-//     d_c_bar Gradients of the c_bar wrt to objective function.
-//     d_r_bar_u_bar Gradients of the r_bar & u_bar wrt to objective function.
-//
-// This kernel op implements the following mathematical equations:
-//
-// Note on notation of the variables:
-//
-// Concatenation of a and b is represented by a_b
-// Element-wise dot product of a and b is represented by ab
-// Element-wise dot product is represented by \circ
-// Matrix multiplication is represented by *
-//
-// Additional notes for clarity:
-//
-// `w_ru` can be segmented into 4 different matrices.
-// ```
-// w_ru = [w_r_x w_u_x
-//         w_r_h_prev w_u_h_prev]
-// ```
-// Similarly, `w_c` can be segmented into 2 different matrices.
-// ```
-// w_c = [w_c_x w_c_h_prevr]
-// ```
-// Same goes for biases.
-// ```
-// b_ru = [b_ru_x b_ru_h]
-// b_c = [b_c_x b_c_h]
-// ```
-// Another note on notation:
-// ```
-// d_x = d_x_component_1 + d_x_component_2
-//
-// where d_x_component_1 = d_r_bar * w_r_x^T + d_u_bar * w_r_x^T
-// and d_x_component_2 = d_c_bar * w_c_x^T
-//
-// d_h_prev = d_h_prev_component_1 + d_h_prevr \circ r + d_h \circ u
-// where d_h_prev_componenet_1 = d_r_bar * w_r_h_prev^T + d_u_bar * w_r_h_prev^T
-// ```
-//
-// Mathematics behind the Gradients below:
-// ```
-// d_c_bar = d_h \circ (1-u) \circ (1-c \circ c)
-// d_u_bar = d_h \circ (h-c) \circ u \circ (1-u)
-//
-// d_r_bar_u_bar = [d_r_bar d_u_bar]
-//
-// [d_x_component_1 d_h_prev_component_1] = d_r_bar_u_bar * w_ru^T
-//
-// [d_x_component_2 d_h_prevr] = d_c_bar * w_c^T
-//
-// d_x = d_x_component_1 + d_x_component_2
-//
-// d_h_prev = d_h_prev_component_1 + d_h_prevr \circ r + u
-// ```
-// Below calculation is performed in the python wrapper for the Gradients
-// (not in the gradient kernel.)
-// ```
-// d_w_ru = x_h_prevr^T * d_c_bar
-//
-// d_w_c = x_h_prev^T * d_r_bar_u_bar
-//
-// d_b_ru = sum of d_r_bar_u_bar along axis = 0
-//
-// d_b_c = sum of d_c_bar along axis = 0
-// ```
-func GRUBlockCellGrad(scope *Scope, x tf.Output, h_prev tf.Output, w_ru tf.Output, w_c tf.Output, b_ru tf.Output, b_c tf.Output, r tf.Output, u tf.Output, c tf.Output, d_h tf.Output) (d_x tf.Output, d_h_prev tf.Output, d_c_bar tf.Output, d_r_bar_u_bar tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "GRUBlockCellGrad",
-		Input: []tf.Input{
-			x, h_prev, w_ru, w_c, b_ru, b_c, r, u, c, d_h,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
-}
-
-// Says whether the targets are in the top `K` predictions.
-//
-// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the
-// prediction for the target class is among the top `k` predictions among
-// all predictions for example `i`. Note that the behavior of `InTopK` differs
-// from the `TopK` op in its handling of ties; if multiple classes have the
-// same prediction value and straddle the top-`k` boundary, all of those
-// classes are considered to be in the top `k`.
-//
-// More formally, let
-//
-//   \\(predictions_i\\) be the predictions for all classes for example `i`,
-//   \\(targets_i\\) be the target class for example `i`,
-//   \\(out_i\\) be the output for example `i`,
-//
-// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$
-//
-// Arguments:
-//	predictions: A `batch_size` x `classes` tensor.
-//	targets: A `batch_size` vector of class ids.
-//	k: Number of top elements to look at for computing precision.
-//
-// Returns Computed precision at `k` as a `bool Tensor`.
-func InTopKV2(scope *Scope, predictions tf.Output, targets tf.Output, k tf.Output) (precision tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "InTopKV2",
-		Input: []tf.Input{
-			predictions, targets, k,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the GRU cell forward propagation for 1 time step.
-//
-// Args
-//     x: Input to the GRU cell.
-//     h_prev: State input from the previous GRU cell.
-//     w_ru: Weight matrix for the reset and update gate.
-//     w_c: Weight matrix for the cell connection gate.
-//     b_ru: Bias vector for the reset and update gate.
-//     b_c: Bias vector for the cell connection gate.
-//
-// Returns
-//     r: Output of the reset gate.
-//     u: Output of the update gate.
-//     c: Output of the cell connection gate.
-//     h: Current state of the GRU cell.
-//
-// Note on notation of the variables:
-//
-// Concatenation of a and b is represented by a_b
-// Element-wise dot product of a and b is represented by ab
-// Element-wise dot product is represented by \circ
-// Matrix multiplication is represented by *
-//
-// Biases are initialized with :
-// `b_ru` - constant_initializer(1.0)
-// `b_c` - constant_initializer(0.0)
-//
-// This kernel op implements the following mathematical equations:
-//
-// ```
-// x_h_prev = [x, h_prev]
-//
-// [r_bar u_bar] = x_h_prev * w_ru + b_ru
-//
-// r = sigmoid(r_bar)
-// u = sigmoid(u_bar)
-//
-// h_prevr = h_prev \circ r
-//
-// x_h_prevr = [x h_prevr]
-//
-// c_bar = x_h_prevr * w_c + b_c
-// c = tanh(c_bar)
-//
-// h = (1-u) \circ c + u \circ h_prev
-// ```
-func GRUBlockCell(scope *Scope, x tf.Output, h_prev tf.Output, w_ru tf.Output, w_c tf.Output, b_ru tf.Output, b_c tf.Output) (r tf.Output, u tf.Output, c tf.Output, h tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "GRUBlockCell",
-		Input: []tf.Input{
-			x, h_prev, w_ru, w_c, b_ru, b_c,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
-}
-
-// Advance the counter of a counter-based RNG.
-//
-// The state of the RNG after
-// `rng_skip(n)` will be the same as that after `stateful_uniform([n])`
-// (or any other distribution). The actual increment added to the
-// counter is an unspecified implementation detail.
-//
-// Arguments:
-//	resource: The handle of the resource variable that stores the state of the RNG.
-//	algorithm: The RNG algorithm.
-//	delta: The amount of advancement.
-//
-// Returns the created operation.
-func RngSkip(scope *Scope, resource tf.Output, algorithm tf.Output, delta tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "RngSkip",
-		Input: []tf.Input{
-			resource, algorithm, delta,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Outputs the single element from the given dataset.
-//
-// Arguments:
-//	dataset: A handle to a dataset that contains a single element.
-//
-//
-//
-// Returns The components of the single element of `input`.
-func DatasetToSingleElement(scope *Scope, dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "DatasetToSingleElement",
-		Input: []tf.Input{
-			dataset,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
-		scope.UpdateErr("DatasetToSingleElement", err)
-		return
-	}
-	return components
-}
-
-// Computes rectified linear: `max(features, 0)`.
-func Relu(scope *Scope, features tf.Output) (activations tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Relu",
-		Input: []tf.Input{
-			features,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RetrieveTPUEmbeddingCenteredRMSPropParametersAttr is an optional argument to RetrieveTPUEmbeddingCenteredRMSPropParameters.
-type RetrieveTPUEmbeddingCenteredRMSPropParametersAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingCenteredRMSPropParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func RetrieveTPUEmbeddingCenteredRMSPropParametersTableId(value int64) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingCenteredRMSPropParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingCenteredRMSPropParametersTableName(value string) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Retrieve centered RMSProp embedding parameters.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns Parameter parameters updated by the centered RMSProp optimization algorithm.Parameter ms updated by the centered RMSProp optimization algorithm.Parameter mom updated by the centered RMSProp optimization algorithm.Parameter mg updated by the centered RMSProp optimization algorithm.
-func RetrieveTPUEmbeddingCenteredRMSPropParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingCenteredRMSPropParametersAttr) (parameters tf.Output, ms tf.Output, mom tf.Output, mg tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingCenteredRMSPropParameters",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
-}
-
-// StatefulUniformFullIntAttr is an optional argument to StatefulUniformFullInt.
-type StatefulUniformFullIntAttr func(optionalAttr)
-
-// StatefulUniformFullIntDtype sets the optional dtype attribute to value.
-//
-// value: The type of the output.
-// If not specified, defaults to DT_UINT64
-func StatefulUniformFullIntDtype(value tf.DataType) StatefulUniformFullIntAttr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Outputs random integers from a uniform distribution.
-//
-// The generated values are uniform integers covering the whole range of `dtype`.
-//
-// Arguments:
-//	resource: The handle of the resource variable that stores the state of the RNG.
-//	algorithm: The RNG algorithm.
-//	shape: The shape of the output tensor.
-//
-// Returns Random values with specified shape.
-func StatefulUniformFullInt(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, optional ...StatefulUniformFullIntAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StatefulUniformFullInt",
-		Input: []tf.Input{
-			resource, algorithm, shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// VariableShapeAttr is an optional argument to VariableShape.
-type VariableShapeAttr func(optionalAttr)
-
-// VariableShapeOutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_INT32
-func VariableShapeOutType(value tf.DataType) VariableShapeAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Returns the shape of the variable pointed to by `resource`.
-//
-// This operation returns a 1-D integer tensor representing the shape of `input`.
-//
-// For example:
-//
-// ```
-// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
-// shape(t) ==> [2, 2, 3]
-// ```
-func VariableShape(scope *Scope, input tf.Output, optional ...VariableShapeAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "VariableShape",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingAdadeltaParametersGradAccumDebug.
-type LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr func(optionalAttr)
-
-// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Load Adadelta parameters with debug support.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the Adadelta optimization algorithm.
-//	accumulators: Value of accumulators used in the Adadelta optimization algorithm.
-//	updates: Value of updates used in the Adadelta optimization algorithm.
-//	gradient_accumulators: Value of gradient_accumulators used in the Adadelta optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingAdadeltaParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, updates tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingAdadeltaParametersGradAccumDebug",
-		Input: []tf.Input{
-			parameters, accumulators, updates, gradient_accumulators,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Assigns a new value to a variable.
-//
-// Any ReadVariableOp with a control dependency on this op is guaranteed to return
-// this value or a subsequent newer value of the variable.
-//
-// Arguments:
-//	resource: handle to the resource in which to store the variable.
-//	value: the value to set the new tensor to use.
-//
-// Returns the created operation.
-func AssignVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "AssignVariableOp",
-		Input: []tf.Input{
-			resource, value,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingRMSPropParametersGradAccumDebug.
-type LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr func(optionalAttr)
-
-// LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Load RMSProp embedding parameters with debug support.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the RMSProp optimization algorithm.
-//	ms: Value of ms used in the RMSProp optimization algorithm.
-//	mom: Value of mom used in the RMSProp optimization algorithm.
-//	gradient_accumulators: Value of gradient_accumulators used in the RMSProp optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingRMSPropParametersGradAccumDebug(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingRMSPropParametersGradAccumDebug",
-		Input: []tf.Input{
-			parameters, ms, mom, gradient_accumulators,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// DestroyResourceOpAttr is an optional argument to DestroyResourceOp.
-type DestroyResourceOpAttr func(optionalAttr)
-
-// DestroyResourceOpIgnoreLookupError sets the optional ignore_lookup_error attribute to value.
-//
-// value: whether to ignore the error when the resource
-// doesn't exist.
-// If not specified, defaults to true
-func DestroyResourceOpIgnoreLookupError(value bool) DestroyResourceOpAttr {
-	return func(m optionalAttr) {
-		m["ignore_lookup_error"] = value
-	}
-}
-
-// Deletes the resource specified by the handle.
-//
-// All subsequent operations using the resource will result in a NotFound
-// error status.
-//
-// Arguments:
-//	resource: handle to the resource to delete.
-//
-// Returns the created operation.
-func DestroyResourceOp(scope *Scope, resource tf.Output, optional ...DestroyResourceOpAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DestroyResourceOp",
-		Input: []tf.Input{
-			resource,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Convert JSON-encoded Example records to binary protocol buffer strings.
-//
-// This op translates a tensor containing Example records, encoded using
-// the [standard JSON
-// mapping](https://developers.google.com/protocol-buffers/docs/proto3#json),
-// into a tensor containing the same records encoded as binary protocol
-// buffers. The resulting tensor can then be fed to any of the other
-// Example-parsing ops.
-//
-// Arguments:
-//	json_examples: Each string is a JSON object serialized according to the JSON
-// mapping of the Example proto.
-//
-// Returns Each string is a binary Example protocol buffer corresponding
-// to the respective element of `json_examples`.
-func DecodeJSONExample(scope *Scope, json_examples tf.Output) (binary_examples tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DecodeJSONExample",
-		Input: []tf.Input{
-			json_examples,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Receives a tensor value broadcast from another device.
-func CollectiveBcastRecv(scope *Scope, T tf.DataType, group_size int64, group_key int64, instance_key int64, shape tf.Shape) (data tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"T": T, "group_size": group_size, "group_key": group_key, "instance_key": instance_key, "shape": shape}
-	opspec := tf.OpSpec{
-		Type: "CollectiveBcastRecv",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2.
-type ResourceApplyFtrlV2Attr func(optionalAttr)
-
-// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the Ftrl-proximal scheme.
-//
-// grad_with_shrinkage = grad + 2 * l2_shrinkage * var
-// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
-// linear += grad_with_shrinkage +
-//     (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-// accum = accum_new
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	linear: Should be from a Variable().
-//	grad: The gradient.
-//	lr: Scaling factor. Must be a scalar.
-//	l1: L1 regulariation. Must be a scalar.
-//	l2: L2 shrinkage regulariation. Must be a scalar.
-//
-//	lr_power: Scaling factor. Must be a scalar.
-//
-// Returns the created operation.
-func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyFtrlV2",
-		Input: []tf.Input{
-			var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// VarHandleOpAttr is an optional argument to VarHandleOp.
-type VarHandleOpAttr func(optionalAttr)
-
-// VarHandleOpContainer sets the optional container attribute to value.
-//
-// value: the container this variable is placed in.
-// If not specified, defaults to ""
-func VarHandleOpContainer(value string) VarHandleOpAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// VarHandleOpSharedName sets the optional shared_name attribute to value.
-//
-// value: the name by which this variable is referred to.
-// If not specified, defaults to ""
-func VarHandleOpSharedName(value string) VarHandleOpAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Creates a handle to a Variable resource.
-//
-// Arguments:
-//	dtype: the type of this variable. Must agree with the dtypes
-// of all ops using this variable.
-//	shape: The (possibly partially specified) shape of this variable.
-func VarHandleOp(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...VarHandleOpAttr) (resource tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype, "shape": shape}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "VarHandleOp",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceSparseApplyProximalAdagradAttr is an optional argument to ResourceSparseApplyProximalAdagrad.
-type ResourceSparseApplyProximalAdagradAttr func(optionalAttr)
-
-// ResourceSparseApplyProximalAdagradUseLocking sets the optional use_locking attribute to value.
-//
-// value: If True, updating of the var and accum tensors will be protected by
-// a lock; otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceSparseApplyProximalAdagradUseLocking(value bool) ResourceSparseApplyProximalAdagradAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Sparse update entries in '*var' and '*accum' according to FOBOS algorithm.
-//
-// That is for rows we have grad for, we update var and accum as follows:
-// accum += grad * grad
-// prox_v = var
-// prox_v -= lr * grad * (1 / sqrt(accum))
-// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Learning rate. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//
-// Returns the created operation.
-func ResourceSparseApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalAdagradAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyProximalAdagrad",
-		Input: []tf.Input{
-			var_, accum, lr, l1, l2, grad, indices,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Eagerly executes a python function to compute func(input)->output. The
-//
-// semantics of the input, output, and attributes are the same as those for
-// PyFunc.
-func EagerPyFunc(scope *Scope, input []tf.Output, token string, Tout []tf.DataType) (output []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"token": token, "Tout": Tout}
-	opspec := tf.OpSpec{
-		Type: "EagerPyFunc",
-		Input: []tf.Input{
-			tf.OutputList(input),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
-		scope.UpdateErr("EagerPyFunc", err)
-		return
-	}
-	return output
-}
-
-// Computes the gradient of morphological 2-D dilation with respect to the filter.
-//
-// Arguments:
-//	input: 4-D with shape `[batch, in_height, in_width, depth]`.
-//	filter: 3-D with shape `[filter_height, filter_width, depth]`.
-//	out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`.
-//	strides: 1-D of length 4. The stride of the sliding window for each dimension of
-// the input tensor. Must be: `[1, stride_height, stride_width, 1]`.
-//	rates: 1-D of length 4. The input stride for atrous morphological dilation.
-// Must be: `[1, rate_height, rate_width, 1]`.
-//	padding: The type of padding algorithm to use.
-//
-// Returns 3-D with shape `[filter_height, filter_width, depth]`.
-func Dilation2DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (filter_backprop tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding}
-	opspec := tf.OpSpec{
-		Type: "Dilation2DBackpropFilter",
-		Input: []tf.Input{
-			input, filter, out_backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SparseReduceMaxAttr is an optional argument to SparseReduceMax.
-type SparseReduceMaxAttr func(optionalAttr)
-
-// SparseReduceMaxKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func SparseReduceMaxKeepDims(value bool) SparseReduceMaxAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
-	}
-}
-
-// Computes the max of elements across dimensions of a SparseTensor.
-//
-// This Op takes a SparseTensor and is the sparse counterpart to
-// `tf.reduce_max()`.  In particular, this Op also returns a dense `Tensor`
-// instead of a sparse one.
-//
-// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
-// with length 1.
-//
-// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
-// with a single element is returned.  Additionally, the axes can be negative,
-// which are interpreted according to the indexing rules in Python.
-//
-// Arguments:
-//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-//	input_shape: 1-D.  Shape of the input SparseTensor.
-//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
-//
-// Returns `R-K`-D.  The reduced Tensor.
-func SparseReduceMax(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceMaxAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseReduceMax",
-		Input: []tf.Input{
-			input_indices, input_values, input_shape, reduction_axes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Adjust the saturation of one or more images.
-//
-// `images` is a tensor of at least 3 dimensions.  The last dimension is
-// interpretted as channels, and must be three.
-//
-// The input image is considered in the RGB colorspace. Conceptually, the RGB
-// colors are first mapped into HSV. A scale is then applied all the saturation
-// values, and then remapped back to RGB colorspace.
-//
-// Arguments:
-//	images: Images to adjust.  At least 3-D.
-//	scale: A float scale to add to the saturation.
-//
-// Returns The hue-adjusted image or images.
-func AdjustSaturation(scope *Scope, images tf.Output, scale tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "AdjustSaturation",
-		Input: []tf.Input{
-			images, scale,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Adds sparse updates to the variable referenced by `resource`.
-//
-// This operation computes
-//
-//     # Scalar indices
-//     ref[indices, ...] += updates[...]
-//
-//     # Vector indices (for each i)
-//     ref[indices[i], ...] += updates[i, ...]
-//
-//     # High rank indices (for each i, ..., j)
-//     ref[indices[i, ..., j], ...] += updates[i, ..., j, ...]
-//
-// Duplicate entries are handled correctly: if multiple `indices` reference
-// the same location, their contributions add.
-//
-// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
-// </div>
-//
-// Arguments:
-//	resource: Should be from a `Variable` node.
-//	indices: A tensor of indices into the first dimension of `ref`.
-//	updates: A tensor of updated values to add to `ref`.
-//
-// Returns the created operation.
-func ResourceScatterAdd(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceScatterAdd",
-		Input: []tf.Input{
-			resource, indices, updates,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Computes fingerprints of the input strings.
-//
-// Arguments:
-//	input: vector of strings to compute fingerprints on.
-//
-// Returns a (N,2) shaped matrix where N is the number of elements in the input
-// vector. Each row contains the low and high parts of the fingerprint.
-func SdcaFprint(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SdcaFprint",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// CTCGreedyDecoderAttr is an optional argument to CTCGreedyDecoder.
-type CTCGreedyDecoderAttr func(optionalAttr)
-
-// CTCGreedyDecoderMergeRepeated sets the optional merge_repeated attribute to value.
-//
-// value: If True, merge repeated classes in output.
-// If not specified, defaults to false
-func CTCGreedyDecoderMergeRepeated(value bool) CTCGreedyDecoderAttr {
-	return func(m optionalAttr) {
-		m["merge_repeated"] = value
-	}
-}
-
-// Performs greedy decoding on the logits given in inputs.
-//
-// A note about the attribute merge_repeated: if enabled, when
-// consecutive logits' maximum indices are the same, only the first of
-// these is emitted.  Labeling the blank '*', the sequence "A B B * B B"
-// becomes "A B B" if merge_repeated = True and "A B B B B" if
-// merge_repeated = False.
-//
-// Regardless of the value of merge_repeated, if the maximum index of a given
-// time and batch corresponds to the blank, index `(num_classes - 1)`, no new
-// element is emitted.
-//
-// Arguments:
-//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
-//	sequence_length: A vector containing sequence lengths, size `(batch_size)`.
-//
-// Returns Indices matrix, size `(total_decoded_outputs x 2)`,
-// of a `SparseTensor<int64, 2>`.  The rows store: [batch, time].Values vector, size: `(total_decoded_outputs)`,
-// of a `SparseTensor<int64, 2>`.  The vector stores the decoded classes.Shape vector, size `(2)`, of the decoded SparseTensor.
-// Values are: `[batch_size, max_decoded_length]`.Matrix, size `(batch_size x 1)`, containing sequence
-// log-probabilities.
-func CTCGreedyDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, optional ...CTCGreedyDecoderAttr) (decoded_indices tf.Output, decoded_values tf.Output, decoded_shape tf.Output, log_probability tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CTCGreedyDecoder",
-		Input: []tf.Input{
-			inputs, sequence_length,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
-}
-
-// MapSizeAttr is an optional argument to MapSize.
-type MapSizeAttr func(optionalAttr)
-
-// MapSizeCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func MapSizeCapacity(value int64) MapSizeAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// MapSizeMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func MapSizeMemoryLimit(value int64) MapSizeAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// MapSizeContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func MapSizeContainer(value string) MapSizeAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// MapSizeSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func MapSizeSharedName(value string) MapSizeAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op returns the number of elements in the underlying container.
-func MapSize(scope *Scope, dtypes []tf.DataType, optional ...MapSizeAttr) (size tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MapSize",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Adjust the hue of one or more images.
-//
-// `images` is a tensor of at least 3 dimensions.  The last dimension is
-// interpretted as channels, and must be three.
-//
-// The input image is considered in the RGB colorspace. Conceptually, the RGB
-// colors are first mapped into HSV. A delta is then applied all the hue values,
-// and then remapped back to RGB colorspace.
-//
-// Arguments:
-//	images: Images to adjust.  At least 3-D.
-//	delta: A float delta to add to the hue.
-//
-// Returns The hue-adjusted image or images.
-func AdjustHue(scope *Scope, images tf.Output, delta tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "AdjustHue",
-		Input: []tf.Input{
-			images, delta,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // A substitute for `InterleaveDataset` on a fixed list of `N` datasets.
 //
 // Arguments:
@@ -14845,113 +11097,26 @@ func DirectedInterleaveDataset(scope *Scope, selector_input_dataset tf.Output, d
 	return op.Output(0)
 }
 
-// SpaceToBatch for 4-D tensors of type T.
-//
-// This is a legacy version of the more general SpaceToBatchND.
-//
-// Zero-pads and then rearranges (permutes) blocks of spatial data into batch.
-// More specifically, this op outputs a copy of the input tensor where values from
-// the `height` and `width` dimensions are moved to the `batch` dimension. After
-// the zero-padding, both `height` and `width` of the input must be divisible by the
-// block size.
+// Creates a dataset that batches input elements into a SparseTensor.
 //
 // Arguments:
-//	input: 4-D with shape `[batch, height, width, depth]`.
-//	paddings: 2-D tensor of non-negative integers with shape `[2, 2]`. It specifies
-//   the padding of the input with zeros across the spatial dimensions as follows:
+//	input_dataset: A handle to an input dataset. Must have a single component.
+//	batch_size: A scalar representing the number of elements to accumulate in a
+// batch.
+//	row_shape: A vector representing the dense shape of each row in the produced
+// SparseTensor. The shape may be partially specified, using `-1` to indicate
+// that a particular dimension should use the maximum size of all batch elements.
 //
-//       paddings = [[pad_top, pad_bottom], [pad_left, pad_right]]
 //
-//   The effective spatial dimensions of the zero-padded input tensor will be:
-//
-//       height_pad = pad_top + height + pad_bottom
-//       width_pad = pad_left + width + pad_right
-//
-// The attr `block_size` must be greater than one. It indicates the block size.
-//
-//   * Non-overlapping blocks of size `block_size x block size` in the height and
-//     width dimensions are rearranged into the batch dimension at each location.
-//   * The batch of the output tensor is `batch * block_size * block_size`.
-//   * Both height_pad and width_pad must be divisible by block_size.
-//
-// The shape of the output will be:
-//
-//     [batch*block_size*block_size, height_pad/block_size, width_pad/block_size,
-//      depth]
-//
-// Some examples:
-//
-// (1) For the following input of shape `[1, 2, 2, 1]` and block_size of 2:
-//
-// ```
-// x = [[[[1], [2]], [[3], [4]]]]
-// ```
-//
-// The output tensor has shape `[4, 1, 1, 1]` and value:
-//
-// ```
-// [[[[1]]], [[[2]]], [[[3]]], [[[4]]]]
-// ```
-//
-// (2) For the following input of shape `[1, 2, 2, 3]` and block_size of 2:
-//
-// ```
-// x = [[[[1, 2, 3], [4, 5, 6]],
-//       [[7, 8, 9], [10, 11, 12]]]]
-// ```
-//
-// The output tensor has shape `[4, 1, 1, 3]` and value:
-//
-// ```
-// [[[[1, 2, 3]]], [[[4, 5, 6]]], [[[7, 8, 9]]], [[[10, 11, 12]]]]
-// ```
-//
-// (3) For the following input of shape `[1, 4, 4, 1]` and block_size of 2:
-//
-// ```
-// x = [[[[1],   [2],  [3],  [4]],
-//       [[5],   [6],  [7],  [8]],
-//       [[9],  [10], [11],  [12]],
-//       [[13], [14], [15],  [16]]]]
-// ```
-//
-// The output tensor has shape `[4, 2, 2, 1]` and value:
-//
-// ```
-// x = [[[[1], [3]], [[9], [11]]],
-//      [[[2], [4]], [[10], [12]]],
-//      [[[5], [7]], [[13], [15]]],
-//      [[[6], [8]], [[14], [16]]]]
-// ```
-//
-// (4) For the following input of shape `[2, 2, 4, 1]` and block_size of 2:
-//
-// ```
-// x = [[[[1],   [2],  [3],  [4]],
-//       [[5],   [6],  [7],  [8]]],
-//      [[[9],  [10], [11],  [12]],
-//       [[13], [14], [15],  [16]]]]
-// ```
-//
-// The output tensor has shape `[8, 1, 2, 1]` and value:
-//
-// ```
-// x = [[[[1], [3]]], [[[9], [11]]], [[[2], [4]]], [[[10], [12]]],
-//      [[[5], [7]]], [[[13], [15]]], [[[6], [8]]], [[[14], [16]]]]
-// ```
-//
-// Among others, this operation is useful for reducing atrous convolution into
-// regular convolution.
-//
-func SpaceToBatch(scope *Scope, input tf.Output, paddings tf.Output, block_size int64) (output tf.Output) {
+func ExperimentalDenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, row_shape tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"block_size": block_size}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "SpaceToBatch",
+		Type: "ExperimentalDenseToSparseBatchDataset",
 		Input: []tf.Input{
-			input, paddings,
+			input_dataset, batch_size, row_shape,
 		},
 		Attrs: attrs,
 	}
@@ -14959,393 +11124,6 @@ func SpaceToBatch(scope *Scope, input tf.Output, paddings tf.Output, block_size
 	return op.Output(0)
 }
 
-// RegexReplaceAttr is an optional argument to RegexReplace.
-type RegexReplaceAttr func(optionalAttr)
-
-// RegexReplaceReplaceGlobal sets the optional replace_global attribute to value.
-//
-// value: If True, the replacement is global (that is, all matches of the `pattern` regular
-// expression in each input string are rewritten), otherwise the `rewrite`
-// substitution is only made for the first `pattern` match.
-// If not specified, defaults to true
-func RegexReplaceReplaceGlobal(value bool) RegexReplaceAttr {
-	return func(m optionalAttr) {
-		m["replace_global"] = value
-	}
-}
-
-// Replaces matches of the `pattern` regular expression in `input` with the
-// replacement string provided in `rewrite`.
-//
-// It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
-//
-// Arguments:
-//	input: The text to be processed.
-//	pattern: The regular expression to be matched in the `input` strings.
-//	rewrite: The rewrite string to be substituted for the `pattern` expression where it is
-// matched in the `input` strings.
-//
-// Returns The text after applying pattern match and rewrite substitution.
-func RegexReplace(scope *Scope, input tf.Output, pattern tf.Output, rewrite tf.Output, optional ...RegexReplaceAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RegexReplace",
-		Input: []tf.Input{
-			input, pattern, rewrite,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// CudnnRNNV3Attr is an optional argument to CudnnRNNV3.
-type CudnnRNNV3Attr func(optionalAttr)
-
-// CudnnRNNV3RnnMode sets the optional rnn_mode attribute to value.
-// If not specified, defaults to "lstm"
-func CudnnRNNV3RnnMode(value string) CudnnRNNV3Attr {
-	return func(m optionalAttr) {
-		m["rnn_mode"] = value
-	}
-}
-
-// CudnnRNNV3InputMode sets the optional input_mode attribute to value.
-// If not specified, defaults to "linear_input"
-func CudnnRNNV3InputMode(value string) CudnnRNNV3Attr {
-	return func(m optionalAttr) {
-		m["input_mode"] = value
-	}
-}
-
-// CudnnRNNV3Direction sets the optional direction attribute to value.
-// If not specified, defaults to "unidirectional"
-func CudnnRNNV3Direction(value string) CudnnRNNV3Attr {
-	return func(m optionalAttr) {
-		m["direction"] = value
-	}
-}
-
-// CudnnRNNV3Dropout sets the optional dropout attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNV3Dropout(value float32) CudnnRNNV3Attr {
-	return func(m optionalAttr) {
-		m["dropout"] = value
-	}
-}
-
-// CudnnRNNV3Seed sets the optional seed attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNV3Seed(value int64) CudnnRNNV3Attr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// CudnnRNNV3Seed2 sets the optional seed2 attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNV3Seed2(value int64) CudnnRNNV3Attr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// CudnnRNNV3NumProj sets the optional num_proj attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNV3NumProj(value int64) CudnnRNNV3Attr {
-	return func(m optionalAttr) {
-		m["num_proj"] = value
-	}
-}
-
-// CudnnRNNV3IsTraining sets the optional is_training attribute to value.
-// If not specified, defaults to true
-func CudnnRNNV3IsTraining(value bool) CudnnRNNV3Attr {
-	return func(m optionalAttr) {
-		m["is_training"] = value
-	}
-}
-
-// CudnnRNNV3TimeMajor sets the optional time_major attribute to value.
-// If not specified, defaults to true
-func CudnnRNNV3TimeMajor(value bool) CudnnRNNV3Attr {
-	return func(m optionalAttr) {
-		m["time_major"] = value
-	}
-}
-
-// A RNN backed by cuDNN.
-//
-// Computes the RNN from the input and initial states, with respect to the params
-// buffer. Accepts one extra input "sequence_lengths" than CudnnRNN.
-//
-// rnn_mode: Indicates the type of the RNN model.
-// input_mode: Indicates whether there is a linear projection between the input and
-//   the actual computation before the first layer. 'skip_input' is only allowed
-//   when input_size == num_units; 'auto_select' implies 'skip_input' when
-//   input_size == num_units; otherwise, it implies 'linear_input'.
-// direction: Indicates whether a bidirectional model will be used. Should be
-//   "unidirectional" or "bidirectional".
-// dropout: Dropout probability. When set to 0., dropout is disabled.
-// seed: The 1st part of a seed to initialize dropout.
-// seed2: The 2nd part of a seed to initialize dropout.
-// input: If time_major is true, this is a 3-D tensor with the shape of
-//     [seq_length, batch_size, input_size]. If time_major is false, the shape is
-//     [batch_size, seq_length, input_size].
-// input_h: If time_major is true, this is a 3-D tensor with the shape of
-//     [num_layer * dir, batch_size, num_units]. If time_major is false, the shape
-//     is [batch_size, num_layer * dir, num_units].
-// input_c: For LSTM, a 3-D tensor with the shape of
-//     [num_layer * dir, batch, num_units]. For other models, it is ignored.
-// params: A 1-D tensor that contains the weights and biases in an opaque layout.
-//     The size must be created through CudnnRNNParamsSize, and initialized
-//     separately. Note that they might not be compatible across different
-//     generations. So it is a good idea to save and restore
-// sequence_lengths: a vector of lengths of each input sequence.
-// output: If time_major is true, this is a 3-D tensor with the shape of
-//     [seq_length, batch_size, dir * num_units]. If time_major is false, the
-//     shape is [batch_size, seq_length, dir * num_units].
-// output_h: The same shape has input_h.
-// output_c: The same shape as input_c for LSTM. An empty tensor for other models.
-// is_training: Indicates whether this operation is used for inferenece or
-//   training.
-// time_major: Indicates whether the input/output format is time major or batch
-//     major.
-// reserve_space: An opaque tensor that can be used in backprop calculation. It
-//   is only produced if is_training is true.
-func CudnnRNNV3(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, sequence_lengths tf.Output, optional ...CudnnRNNV3Attr) (output tf.Output, output_h tf.Output, output_c tf.Output, reserve_space tf.Output, host_reserved tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CudnnRNNV3",
-		Input: []tf.Input{
-			input, input_h, input_c, params, sequence_lengths,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
-}
-
-// Returns the diagonal part of the tensor.
-//
-// This operation returns a tensor with the `diagonal` part
-// of the `input`. The `diagonal` part is computed as follows:
-//
-// Assume `input` has dimensions `[D1,..., Dk, D1,..., Dk]`, then the output is a
-// tensor of rank `k` with dimensions `[D1,..., Dk]` where:
-//
-// `diagonal[i1,..., ik] = input[i1, ..., ik, i1,..., ik]`.
-//
-// For example:
-//
-// ```
-// # 'input' is [[1, 0, 0, 0]
-//               [0, 2, 0, 0]
-//               [0, 0, 3, 0]
-//               [0, 0, 0, 4]]
-//
-// tf.diag_part(input) ==> [1, 2, 3, 4]
-// ```
-//
-// Arguments:
-//	input: Rank k tensor where k is even and not zero.
-//
-// Returns The extracted diagonal.
-func DiagPart(scope *Scope, input tf.Output) (diagonal tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DiagPart",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns x / y element-wise for real types.
-//
-// If `x` and `y` are reals, this will return the floating-point division.
-//
-// *NOTE*: `Div` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func RealDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "RealDiv",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ThreadUnsafeUnigramCandidateSamplerAttr is an optional argument to ThreadUnsafeUnigramCandidateSampler.
-type ThreadUnsafeUnigramCandidateSamplerAttr func(optionalAttr)
-
-// ThreadUnsafeUnigramCandidateSamplerSeed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func ThreadUnsafeUnigramCandidateSamplerSeed(value int64) ThreadUnsafeUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// ThreadUnsafeUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value.
-//
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func ThreadUnsafeUnigramCandidateSamplerSeed2(value int64) ThreadUnsafeUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Generates labels for candidate sampling with a learned unigram distribution.
-//
-// See explanations of candidate sampling and the data formats at
-// go/candidate-sampling.
-//
-// For each batch, this op picks a single set of sampled candidate labels.
-//
-// The advantages of sampling candidates per-batch are simplicity and the
-// possibility of efficient dense matrix multiplication. The disadvantage is that
-// the sampled candidates must be chosen independently of the context and of the
-// true labels.
-//
-// Arguments:
-//	true_classes: A batch_size * num_true matrix, in which each row contains the
-// IDs of the num_true target_classes in the corresponding original label.
-//	num_true: Number of true labels per context.
-//	num_sampled: Number of candidates to randomly sample.
-//	unique: If unique is true, we sample with rejection, so that all sampled
-// candidates in a batch are unique. This requires some approximation to
-// estimate the post-rejection sampling probabilities.
-//	range_max: The sampler will sample integers from the interval [0, range_max).
-//
-// Returns A vector of length num_sampled, in which each element is
-// the ID of a sampled candidate.A batch_size * num_true matrix, representing
-// the number of times each candidate is expected to occur in a batch
-// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
-// candidate representing the number of times the candidate is expected
-// to occur in a batch of sampled candidates.  If unique=true, then this is a
-// probability.
-func ThreadUnsafeUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...ThreadUnsafeUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ThreadUnsafeUnigramCandidateSampler",
-		Input: []tf.Input{
-			true_classes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// SparseToSparseSetOperationAttr is an optional argument to SparseToSparseSetOperation.
-type SparseToSparseSetOperationAttr func(optionalAttr)
-
-// SparseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value.
-// If not specified, defaults to true
-func SparseToSparseSetOperationValidateIndices(value bool) SparseToSparseSetOperationAttr {
-	return func(m optionalAttr) {
-		m["validate_indices"] = value
-	}
-}
-
-// Applies set operation along last dimension of 2 `SparseTensor` inputs.
-//
-// See SetOperationOp::SetOperationFromContext for values of `set_operation`.
-//
-// If `validate_indices` is `True`, `SparseToSparseSetOperation` validates the
-// order and range of `set1` and `set2` indices.
-//
-// Input `set1` is a `SparseTensor` represented by `set1_indices`, `set1_values`,
-// and `set1_shape`. For `set1` ranked `n`, 1st `n-1` dimensions must be the same
-// as `set2`. Dimension `n` contains values in a set, duplicates are allowed but
-// ignored.
-//
-// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,
-// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same
-// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but
-// ignored.
-//
-// If `validate_indices` is `True`, this op validates the order and range of `set1`
-// and `set2` indices.
-//
-// Output `result` is a `SparseTensor` represented by `result_indices`,
-// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
-// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
-// dimension contains the result of `set_operation` applied to the corresponding
-// `[0...n-1]` dimension of `set`.
-//
-// Arguments:
-//	set1_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
-// order.
-//	set1_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
-// order.
-//	set1_shape: 1D `Tensor`, shape of a `SparseTensor`. `set1_shape[0...n-1]` must
-// be the same as `set2_shape[0...n-1]`, `set1_shape[n]` is the
-// max set size across `0...n-1` dimensions.
-//	set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
-// order.
-//	set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
-// order.
-//	set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must
-// be the same as `set1_shape[0...n-1]`, `set2_shape[n]` is the
-// max set size across `0...n-1` dimensions.
-//
-//
-// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
-// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
-// is the max result set size across all `0...n-1` dimensions.
-func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_values tf.Output, set1_shape tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...SparseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"set_operation": set_operation}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseToSparseSetOperation",
-		Input: []tf.Input{
-			set1_indices, set1_values, set1_shape, set2_indices, set2_values, set2_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
 // Writes the given dataset to the given file using the TFRecord format.
 //
 // Arguments:
@@ -15368,1740 +11146,137 @@ func ExperimentalDatasetToTFRecord(scope *Scope, input_dataset tf.Output, filena
 	return scope.AddOperation(opspec)
 }
 
-// Divides sparse updates into the variable referenced by `resource`.
+// Creates a dataset from the given `graph_def`.
 //
-// This operation computes
-//
-//     # Scalar indices
-//     ref[indices, ...] /= updates[...]
-//
-//     # Vector indices (for each i)
-//     ref[indices[i], ...] /= updates[i, ...]
-//
-//     # High rank indices (for each i, ..., j)
-//     ref[indices[i, ..., j], ...] /= updates[i, ..., j, ...]
-//
-// Duplicate entries are handled correctly: if multiple `indices` reference
-// the same location, their contributions multiply.
-//
-// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
-// </div>
+// Creates a dataset from the provided `graph_def`.
 //
 // Arguments:
-//	resource: Should be from a `Variable` node.
-//	indices: A tensor of indices into the first dimension of `ref`.
-//	updates: A tensor of updated values to add to `ref`.
+//	graph_def: The graph representation of the dataset (as serialized GraphDef).
 //
-// Returns the created operation.
-func ResourceScatterDiv(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
+// Returns A variant tensor representing the dataset.
+func DatasetFromGraph(scope *Scope, graph_def tf.Output) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceScatterDiv",
+		Type: "DatasetFromGraph",
 		Input: []tf.Input{
-			resource, indices, updates,
+			graph_def,
 		},
 	}
-	return scope.AddOperation(opspec)
-}
-
-// DenseToDenseSetOperationAttr is an optional argument to DenseToDenseSetOperation.
-type DenseToDenseSetOperationAttr func(optionalAttr)
-
-// DenseToDenseSetOperationValidateIndices sets the optional validate_indices attribute to value.
-// If not specified, defaults to true
-func DenseToDenseSetOperationValidateIndices(value bool) DenseToDenseSetOperationAttr {
-	return func(m optionalAttr) {
-		m["validate_indices"] = value
-	}
-}
-
-// Applies set operation along last dimension of 2 `Tensor` inputs.
-//
-// See SetOperationOp::SetOperationFromContext for values of `set_operation`.
-//
-// Output `result` is a `SparseTensor` represented by `result_indices`,
-// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
-// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
-// dimension contains the result of `set_operation` applied to the corresponding
-// `[0...n-1]` dimension of `set`.
-//
-// Arguments:
-//	set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`.
-// Dimension `n` contains values in a set, duplicates are allowed but ignored.
-//	set2: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set1`.
-// Dimension `n` contains values in a set, duplicates are allowed but ignored.
-//
-//
-// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
-// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
-// is the max result set size across all `0...n-1` dimensions.
-func DenseToDenseSetOperation(scope *Scope, set1 tf.Output, set2 tf.Output, set_operation string, optional ...DenseToDenseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"set_operation": set_operation}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DenseToDenseSetOperation",
-		Input: []tf.Input{
-			set1, set2,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// CTCBeamSearchDecoderAttr is an optional argument to CTCBeamSearchDecoder.
-type CTCBeamSearchDecoderAttr func(optionalAttr)
-
-// CTCBeamSearchDecoderMergeRepeated sets the optional merge_repeated attribute to value.
-//
-// value: If true, merge repeated classes in output.
-// If not specified, defaults to true
-func CTCBeamSearchDecoderMergeRepeated(value bool) CTCBeamSearchDecoderAttr {
-	return func(m optionalAttr) {
-		m["merge_repeated"] = value
-	}
-}
-
-// Performs beam search decoding on the logits given in input.
-//
-// A note about the attribute merge_repeated: For the beam search decoder,
-// this means that if consecutive entries in a beam are the same, only
-// the first of these is emitted.  That is, when the top path is "A B B B B",
-// "A B" is returned if merge_repeated = True but "A B B B B" is
-// returned if merge_repeated = False.
-//
-// Arguments:
-//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
-//	sequence_length: A vector containing sequence lengths, size `(batch)`.
-//	beam_width: A scalar >= 0 (beam search beam width).
-//	top_paths: A scalar >= 0, <= beam_width (controls output size).
-//
-// Returns A list (length: top_paths) of indices matrices.  Matrix j,
-// size `(total_decoded_outputs[j] x 2)`, has indices of a
-// `SparseTensor<int64, 2>`.  The rows store: [batch, time].A list (length: top_paths) of values vectors.  Vector j,
-// size `(length total_decoded_outputs[j])`, has the values of a
-// `SparseTensor<int64, 2>`.  The vector stores the decoded classes for beam j.A list (length: top_paths) of shape vector.  Vector j,
-// size `(2)`, stores the shape of the decoded `SparseTensor[j]`.
-// Its values are: `[batch_size, max_decoded_length[j]]`.A matrix, shaped: `(batch_size x top_paths)`.  The
-// sequence log-probabilities.
-func CTCBeamSearchDecoder(scope *Scope, inputs tf.Output, sequence_length tf.Output, beam_width int64, top_paths int64, optional ...CTCBeamSearchDecoderAttr) (decoded_indices []tf.Output, decoded_values []tf.Output, decoded_shape []tf.Output, log_probability tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"beam_width": beam_width, "top_paths": top_paths}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CTCBeamSearchDecoder",
-		Input: []tf.Input{
-			inputs, sequence_length,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if decoded_indices, idx, err = makeOutputList(op, idx, "decoded_indices"); err != nil {
-		scope.UpdateErr("CTCBeamSearchDecoder", err)
-		return
-	}
-	if decoded_values, idx, err = makeOutputList(op, idx, "decoded_values"); err != nil {
-		scope.UpdateErr("CTCBeamSearchDecoder", err)
-		return
-	}
-	if decoded_shape, idx, err = makeOutputList(op, idx, "decoded_shape"); err != nil {
-		scope.UpdateErr("CTCBeamSearchDecoder", err)
-		return
-	}
-	log_probability = op.Output(idx)
-	return decoded_indices, decoded_values, decoded_shape, log_probability
-}
-
-// RetrieveTPUEmbeddingAdagradParametersAttr is an optional argument to RetrieveTPUEmbeddingAdagradParameters.
-type RetrieveTPUEmbeddingAdagradParametersAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingAdagradParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func RetrieveTPUEmbeddingAdagradParametersTableId(value int64) RetrieveTPUEmbeddingAdagradParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingAdagradParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingAdagradParametersTableName(value string) RetrieveTPUEmbeddingAdagradParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Retrieve Adagrad embedding parameters.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns Parameter parameters updated by the Adagrad optimization algorithm.Parameter accumulators updated by the Adagrad optimization algorithm.
-func RetrieveTPUEmbeddingAdagradParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdagradParametersAttr) (parameters tf.Output, accumulators tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingAdagradParameters",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// QuantizedReluXAttr is an optional argument to QuantizedReluX.
-type QuantizedReluXAttr func(optionalAttr)
-
-// QuantizedReluXOutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_QUINT8
-func QuantizedReluXOutType(value tf.DataType) QuantizedReluXAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)`
-//
-// Arguments:
-//
-//
-//	min_features: The float value that the lowest quantized value represents.
-//	max_features: The float value that the highest quantized value represents.
-//
-// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents.
-func QuantizedReluX(scope *Scope, features tf.Output, max_value tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluXAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedReluX",
-		Input: []tf.Input{
-			features, max_value, min_features, max_features,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// ResourceApplyAdamAttr is an optional argument to ResourceApplyAdam.
-type ResourceApplyAdamAttr func(optionalAttr)
-
-// ResourceApplyAdamUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var, m, and v tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyAdamUseLocking(value bool) ResourceApplyAdamAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// ResourceApplyAdamUseNesterov sets the optional use_nesterov attribute to value.
-//
-// value: If `True`, uses the nesterov update.
-// If not specified, defaults to false
-func ResourceApplyAdamUseNesterov(value bool) ResourceApplyAdamAttr {
-	return func(m optionalAttr) {
-		m["use_nesterov"] = value
-	}
-}
-
-// Update '*var' according to the Adam algorithm.
-//
-// $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$
-// $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
-// $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
-// $$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	m: Should be from a Variable().
-//	v: Should be from a Variable().
-//	beta1_power: Must be a scalar.
-//	beta2_power: Must be a scalar.
-//	lr: Scaling factor. Must be a scalar.
-//	beta1: Momentum factor. Must be a scalar.
-//	beta2: Momentum factor. Must be a scalar.
-//	epsilon: Ridge term. Must be a scalar.
-//	grad: The gradient.
-//
-// Returns the created operation.
-func ResourceApplyAdam(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, beta2_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdamAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdam",
-		Input: []tf.Input{
-			var_, m, v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Saves the input tensors to disk.
-//
-// The size of `tensor_names` must match the number of tensors in `data`. `data[i]`
-// is written to `filename` with name `tensor_names[i]`.
-//
-// See also `SaveSlices`.
-//
-// Arguments:
-//	filename: Must have a single element. The name of the file to which we write
-// the tensor.
-//	tensor_names: Shape `[N]`. The names of the tensors to be saved.
-//	data: `N` tensors to save.
-//
-// Returns the created operation.
-func Save(scope *Scope, filename tf.Output, tensor_names tf.Output, data []tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Save",
-		Input: []tf.Input{
-			filename, tensor_names, tf.OutputList(data),
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// MaxPoolGradGradV2Attr is an optional argument to MaxPoolGradGradV2.
-type MaxPoolGradGradV2Attr func(optionalAttr)
-
-// MaxPoolGradGradV2DataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func MaxPoolGradGradV2DataFormat(value string) MaxPoolGradGradV2Attr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Computes second-order gradients of the maxpooling function.
-//
-// Arguments:
-//	orig_input: The original input tensor.
-//	orig_output: The original output tensor.
-//	grad: 4-D.  Gradients of gradients w.r.t. the input of `max_pool`.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
-//
-// Returns Gradients of gradients w.r.t. the input to `max_pool`.
-func MaxPoolGradGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradGradV2Attr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MaxPoolGradGradV2",
-		Input: []tf.Input{
-			orig_input, orig_output, grad, ksize, strides,
-		},
-		Attrs: attrs,
-	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// RaggedRangeAttr is an optional argument to RaggedRange.
-type RaggedRangeAttr func(optionalAttr)
-
-// RaggedRangeTsplits sets the optional Tsplits attribute to value.
-// If not specified, defaults to DT_INT64
-func RaggedRangeTsplits(value tf.DataType) RaggedRangeAttr {
-	return func(m optionalAttr) {
-		m["Tsplits"] = value
+// Returns the cardinality of `input_dataset`.
+//
+// Returns the cardinality of `input_dataset`.
+//
+// Arguments:
+//	input_dataset: A variant tensor representing the dataset to return cardinality for.
+//
+// Returns The cardinality of `input_dataset`. Named constants are used to represent
+// infinite and unknown cardinality.
+func ExperimentalDatasetCardinality(scope *Scope, input_dataset tf.Output) (cardinality tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalDatasetCardinality",
+		Input: []tf.Input{
+			input_dataset,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Returns a `RaggedTensor` containing the specified sequences of numbers.
+// Interleave the values from the `data` tensors into a single tensor.
 //
-//
-// Returns a `RaggedTensor` `result` composed from `rt_dense_values` and
-// `rt_nested_splits`, such that
-// `result[i] = range(starts[i], limits[i], deltas[i])`.
+// Builds a merged tensor such that
 //
 // ```python
-// >>> (rt_nested_splits, rt_dense_values) = gen_ragged_ops.ragged_range(
-// ...     starts=[2, 5, 8], limits=[3, 5, 12], deltas=1)
-// >>> result = ragged.from_nested_row_splits(rt_dense_values, rt_nested_splits)
-// >>> print result.eval().tolist()
-// [[2],               # result[0] = range(2, 3)
-//  [],                # result[1] = range(5, 5)
-//  [8, 9, 10, 11]]    # result[2] = range(8, 12)
+//     merged[indices[m][i, ..., j], ...] = data[m][i, ..., j, ...]
 // ```
 //
-// The input tensors `starts`, `limits`, and `deltas` may be scalars or vectors.
-// The vector inputs must all have the same size.  Scalar inputs are broadcast
-// to match the size of the vector inputs.
+// For example, if each `indices[m]` is scalar or vector, we have
 //
-// Arguments:
-//	starts: The starts of each range.
-//	limits: The limits of each range.
-//	deltas: The deltas of each range.
+// ```python
+//     # Scalar indices:
+//     merged[indices[m], ...] = data[m][...]
 //
-// Returns The `row_splits` for the returned `RaggedTensor`.The `flat_values` for the returned `RaggedTensor`.
-func RaggedRange(scope *Scope, starts tf.Output, limits tf.Output, deltas tf.Output, optional ...RaggedRangeAttr) (rt_nested_splits tf.Output, rt_dense_values tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RaggedRange",
-		Input: []tf.Input{
-			starts, limits, deltas,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Creates a Dataset that returns pseudorandom numbers.
+//     # Vector indices:
+//     merged[indices[m][i], ...] = data[m][i, ...]
+// ```
 //
-// Arguments:
-//	seed: A scalar seed for the random number generator. If either seed or
-// seed2 is set to be non-zero, the random number generator is seeded
-// by the given seed.  Otherwise, a random seed is used.
-//	seed2: A second scalar seed to avoid seed collision.
+// Each `data[i].shape` must start with the corresponding `indices[i].shape`,
+// and the rest of `data[i].shape` must be constant w.r.t. `i`.  That is, we
+// must have `data[i].shape = indices[i].shape + constant`.  In terms of this
+// `constant`, the output shape is
 //
+//     merged.shape = [max(indices)] + constant
 //
-func ExperimentalRandomDataset(scope *Scope, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalRandomDataset",
-		Input: []tf.Input{
-			seed, seed2,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the element-wise min of two SparseTensors.
-//
-// Assumes the two SparseTensors have the same shape, i.e., no broadcasting.
-//
-// Arguments:
-//	a_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, in the canonical lexicographic ordering.
-//	a_values: 1-D.  `N` non-empty values corresponding to `a_indices`.
-//	a_shape: 1-D.  Shape of the input SparseTensor.
-//	b_indices: counterpart to `a_indices` for the other operand.
-//	b_values: counterpart to `a_values` for the other operand; must be of the same dtype.
-//	b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal.
-//
-// Returns 2-D.  The indices of the output SparseTensor.1-D.  The values of the output SparseTensor.
-func SparseSparseMinimum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSparseMinimum",
-		Input: []tf.Input{
-			a_indices, a_values, a_shape, b_indices, b_values, b_shape,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// StringToNumberAttr is an optional argument to StringToNumber.
-type StringToNumberAttr func(optionalAttr)
-
-// StringToNumberOutType sets the optional out_type attribute to value.
-//
-// value: The numeric type to interpret each string in `string_tensor` as.
-// If not specified, defaults to DT_FLOAT
-func StringToNumberOutType(value tf.DataType) StringToNumberAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Converts each string in the input Tensor to the specified numeric type.
-//
-// (Note that int32 overflow results in an error while float overflow
-// results in a rounded value.)
-//
-// Returns A Tensor of the same shape as the input `string_tensor`.
-func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToNumberAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StringToNumber",
-		Input: []tf.Input{
-			string_tensor,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Transforms a tf.Example proto (as a string) into typed tensors.
-//
-// Arguments:
-//	serialized: A vector containing a batch of binary serialized Example protos.
-//	dense_defaults: A list of Tensors (some may be empty), whose length matches
-// the length of `dense_keys`. dense_defaults[j] provides default values
-// when the example's feature_map lacks dense_key[j].  If an empty Tensor is
-// provided for dense_defaults[j], then the Feature dense_keys[j] is required.
-// The input type is inferred from dense_defaults[j], even when it's empty.
-// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined,
-// then the shape of dense_defaults[j] must match that of dense_shapes[j].
-// If dense_shapes[j] has an undefined major dimension (variable strides dense
-// feature), dense_defaults[j] must contain a single element:
-// the padding element.
-//	num_sparse: The number of sparse features to be parsed from the example. This
-// must match the lengths of `sparse_keys` and `sparse_types`.
-//	sparse_keys: A list of `num_sparse` strings.
-// The keys expected in the Examples' features associated with sparse values.
-//	dense_keys: The keys expected in the Examples' features associated with dense
-// values.
-//	sparse_types: A list of `num_sparse` types; the data types of data in each
-// Feature given in sparse_keys.
-// Currently the ParseSingleExample op supports DT_FLOAT (FloatList),
-// DT_INT64 (Int64List), and DT_STRING (BytesList).
-//	dense_shapes: The shapes of data in each Feature given in dense_keys.
-// The length of this list must match the length of `dense_keys`.  The
-// number of elements in the Feature corresponding to dense_key[j] must
-// always equal dense_shapes[j].NumEntries().  If dense_shapes[j] ==
-// (D0, D1, ..., DN) then the shape of output Tensor dense_values[j]
-// will be (D0, D1, ..., DN): In the case dense_shapes[j] = (-1, D1,
-// ..., DN), the shape of the output Tensor dense_values[j] will be (M,
-// D1, .., DN), where M is the number of blocks of elements of length
-// D1 * .... * DN, in the input.
-func ParseSingleExample(scope *Scope, serialized tf.Output, dense_defaults []tf.Output, num_sparse int64, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_sparse": num_sparse, "sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes}
-	opspec := tf.OpSpec{
-		Type: "ParseSingleExample",
-		Input: []tf.Input{
-			serialized, tf.OutputList(dense_defaults),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil {
-		scope.UpdateErr("ParseSingleExample", err)
-		return
-	}
-	if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil {
-		scope.UpdateErr("ParseSingleExample", err)
-		return
-	}
-	if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil {
-		scope.UpdateErr("ParseSingleExample", err)
-		return
-	}
-	if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil {
-		scope.UpdateErr("ParseSingleExample", err)
-		return
-	}
-	return sparse_indices, sparse_values, sparse_shapes, dense_values
-}
-
-// MaxAttr is an optional argument to Max.
-type MaxAttr func(optionalAttr)
-
-// MaxKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func MaxKeepDims(value bool) MaxAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
-	}
-}
-
-// Computes the maximum of elements across dimensions of a tensor.
-//
-// Reduces `input` along the dimensions given in `axis`. Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `axis`. If `keep_dims` is true, the reduced dimensions are
-// retained with length 1.
-//
-// Arguments:
-//	input: The tensor to reduce.
-//	axis: The dimensions to reduce. Must be in the range
-// `[-rank(input), rank(input))`.
-//
-// Returns The reduced tensor.
-func Max(scope *Scope, input tf.Output, axis tf.Output, optional ...MaxAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Max",
-		Input: []tf.Input{
-			input, axis,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the element-wise max of two SparseTensors.
-//
-// Assumes the two SparseTensors have the same shape, i.e., no broadcasting.
-//
-// Arguments:
-//	a_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, in the canonical lexicographic ordering.
-//	a_values: 1-D.  `N` non-empty values corresponding to `a_indices`.
-//	a_shape: 1-D.  Shape of the input SparseTensor.
-//	b_indices: counterpart to `a_indices` for the other operand.
-//	b_values: counterpart to `a_values` for the other operand; must be of the same dtype.
-//	b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal.
-//
-// Returns 2-D.  The indices of the output SparseTensor.1-D.  The values of the output SparseTensor.
-func SparseSparseMaximum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSparseMaximum",
-		Input: []tf.Input{
-			a_indices, a_values, a_shape, b_indices, b_values, b_shape,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Adds up a SparseTensor and a dense Tensor, using these special rules:
-//
-// (1) Broadcasts the dense side to have the same shape as the sparse side, if
-//     eligible;
-// (2) Then, only the dense values pointed to by the indices of the SparseTensor
-//     participate in the cwise addition.
-//
-// By these rules, the result is a logical SparseTensor with exactly the same
-// indices and shape, but possibly with different non-zero values.  The output of
-// this Op is the resultant non-zero values.
-//
-// Arguments:
-//	sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
-//	sp_shape: 1-D.  Shape of the input SparseTensor.
-//	dense: `R`-D.  The dense Tensor operand.
-//
-// Returns 1-D.  The `N` values that are operated on.
-func SparseDenseCwiseAdd(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseDenseCwiseAdd",
-		Input: []tf.Input{
-			sp_indices, sp_values, sp_shape, dense,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// UnicodeDecodeAttr is an optional argument to UnicodeDecode.
-type UnicodeDecodeAttr func(optionalAttr)
-
-// UnicodeDecodeErrors sets the optional errors attribute to value.
-//
-// value: Error handling policy when there is invalid formatting found in the input.
-// The value of 'strict' will cause the operation to produce a InvalidArgument
-// error on any invalid input formatting. A value of 'replace' (the default) will
-// cause the operation to replace any invalid formatting in the input with the
-// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
-// skip any invalid formatting in the input and produce no corresponding output
-// character.
-// If not specified, defaults to "replace"
-func UnicodeDecodeErrors(value string) UnicodeDecodeAttr {
-	return func(m optionalAttr) {
-		m["errors"] = value
-	}
-}
-
-// UnicodeDecodeReplacementChar sets the optional replacement_char attribute to value.
-//
-// value: The replacement character codepoint to be used in place of any invalid
-// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
-// be used. The default value is the default unicode replacement character is
-// 0xFFFD or U+65533.)
-// If not specified, defaults to 65533
-func UnicodeDecodeReplacementChar(value int64) UnicodeDecodeAttr {
-	return func(m optionalAttr) {
-		m["replacement_char"] = value
-	}
-}
-
-// UnicodeDecodeReplaceControlCharacters sets the optional replace_control_characters attribute to value.
-//
-// value: Whether to replace the C0 control characters (00-1F) with the
-// `replacement_char`. Default is false.
-// If not specified, defaults to false
-func UnicodeDecodeReplaceControlCharacters(value bool) UnicodeDecodeAttr {
-	return func(m optionalAttr) {
-		m["replace_control_characters"] = value
-	}
-}
-
-// UnicodeDecodeTsplits sets the optional Tsplits attribute to value.
-// If not specified, defaults to DT_INT64
-func UnicodeDecodeTsplits(value tf.DataType) UnicodeDecodeAttr {
-	return func(m optionalAttr) {
-		m["Tsplits"] = value
-	}
-}
-
-// Decodes each string in `input` into a sequence of Unicode code points.
-//
-// The character codepoints for all strings are returned using a single vector
-// `char_values`, with strings expanded to characters in row-major order.
-//
-// The `row_splits` tensor indicates where the codepoints for
-// each input string begin and end within the `char_values` tensor.
-// In particular, the values for the `i`th
-// string (in row-major order) are stored in the slice
-// `[row_splits[i]:row_splits[i+1]]`. Thus:
-//
-// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th
-//   character in the `i`th string (in row-major order).
-// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th
-//   string (in row-major order).
-//
-// Arguments:
-//	input: The text to be decoded. Can have any shape. Note that the output is flattened
-// to a vector of char values.
-//	input_encoding: Text encoding of the input strings. This is any of the encodings supported
-// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
-//
-// Returns A 1D int32 tensor containing the row splits.A 1D int32 Tensor containing the decoded codepoints.
-func UnicodeDecode(scope *Scope, input tf.Output, input_encoding string, optional ...UnicodeDecodeAttr) (row_splits tf.Output, char_values tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"input_encoding": input_encoding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "UnicodeDecode",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Outputs all keys and values in the table.
-//
-// Arguments:
-//	table_handle: Handle to the table.
-//
-//
-//
-// Returns Vector of all keys present in the table.Tensor of all values in the table. Indexed in parallel with `keys`.
-func LookupTableExportV2(scope *Scope, table_handle tf.Output, Tkeys tf.DataType, Tvalues tf.DataType) (keys tf.Output, values tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"Tkeys": Tkeys, "Tvalues": Tvalues}
-	opspec := tf.OpSpec{
-		Type: "LookupTableExportV2",
-		Input: []tf.Input{
-			table_handle,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Component-wise multiplies a SparseTensor by a dense Tensor.
-//
-// The output locations corresponding to the implicitly zero elements in the sparse
-// tensor will be zero (i.e., will not take up storage space), regardless of the
-// contents of the dense tensor (even if it's +/-INF and that INF*0 == NaN).
-//
-// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not
-// the other direction.
-//
-// Arguments:
-//	sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
-//	sp_shape: 1-D.  Shape of the input SparseTensor.
-//	dense: `R`-D.  The dense Tensor operand.
-//
-// Returns 1-D.  The `N` values that are operated on.
-func SparseDenseCwiseMul(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseDenseCwiseMul",
-		Input: []tf.Input{
-			sp_indices, sp_values, sp_shape, dense,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// PreventGradientAttr is an optional argument to PreventGradient.
-type PreventGradientAttr func(optionalAttr)
-
-// PreventGradientMessage sets the optional message attribute to value.
-//
-// value: Will be printed in the error when anyone tries to differentiate
-// this operation.
-// If not specified, defaults to ""
-func PreventGradientMessage(value string) PreventGradientAttr {
-	return func(m optionalAttr) {
-		m["message"] = value
-	}
-}
-
-// An identity op that triggers an error if a gradient is requested.
-//
-// When executed in a graph, this op outputs its input tensor as-is.
-//
-// When building ops to compute gradients, the TensorFlow gradient system
-// will return an error when trying to lookup the gradient of this op,
-// because no gradient must ever be registered for this function.  This
-// op exists to prevent subtle bugs from silently returning unimplemented
-// gradients in some corner cases.
-//
-// Arguments:
-//	input: any tensor.
-//
-// Returns the same input tensor.
-func PreventGradient(scope *Scope, input tf.Output, optional ...PreventGradientAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "PreventGradient",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SparseReduceSumAttr is an optional argument to SparseReduceSum.
-type SparseReduceSumAttr func(optionalAttr)
-
-// SparseReduceSumKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func SparseReduceSumKeepDims(value bool) SparseReduceSumAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
-	}
-}
-
-// Computes the sum of elements across dimensions of a SparseTensor.
-//
-// This Op takes a SparseTensor and is the sparse counterpart to
-// `tf.reduce_sum()`.  In particular, this Op also returns a dense `Tensor`
-// instead of a sparse one.
-//
-// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
-// with length 1.
-//
-// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
-// with a single element is returned.  Additionally, the axes can be negative,
-// which are interpreted according to the indexing rules in Python.
-//
-// Arguments:
-//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-//	input_shape: 1-D.  Shape of the input SparseTensor.
-//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
-//
-// Returns `R-K`-D.  The reduced Tensor.
-func SparseReduceSum(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseReduceSum",
-		Input: []tf.Input{
-			input_indices, input_values, input_shape, reduction_axes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the min of x and y (i.e. x < y ? x : y) element-wise.
-//
-// *NOTE*: `Minimum` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Minimum",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Extracts the average gradient in the given ConditionalAccumulator.
-//
-// The op blocks until sufficient (i.e., more than num_required)
-// gradients have been accumulated.  If the accumulator has already
-// aggregated more than num_required gradients, it returns the average of
-// the accumulated gradients.  Also automatically increments the recorded
-// global_step in the accumulator by 1, and resets the aggregate to 0.
-//
-// Arguments:
-//	handle: The handle to an accumulator.
-//	num_required: Number of gradients required before we return an aggregate.
-//	dtype: The data type of accumulated gradients. Needs to correspond to the type
-// of the accumulator.
-//
-// Returns The average of the accumulated gradients.
-func ResourceAccumulatorTakeGradient(scope *Scope, handle tf.Output, num_required tf.Output, dtype tf.DataType) (average tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	opspec := tf.OpSpec{
-		Type: "ResourceAccumulatorTakeGradient",
-		Input: []tf.Input{
-			handle, num_required,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes softplus: `log(exp(features) + 1)`.
-func Softplus(scope *Scope, features tf.Output) (activations tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Softplus",
-		Input: []tf.Input{
-			features,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SparseReduceMaxSparseAttr is an optional argument to SparseReduceMaxSparse.
-type SparseReduceMaxSparseAttr func(optionalAttr)
-
-// SparseReduceMaxSparseKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func SparseReduceMaxSparseKeepDims(value bool) SparseReduceMaxSparseAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
-	}
-}
-
-// Computes the max of elements across dimensions of a SparseTensor.
-//
-// This Op takes a SparseTensor and is the sparse counterpart to
-// `tf.reduce_max()`.  In contrast to SparseReduceMax, this Op returns a
-// SparseTensor.
-//
-// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
-// with length 1.
-//
-// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
-// with a single element is returned.  Additionally, the axes can be negative,
-// which are interpreted according to the indexing rules in Python.
-//
-// Arguments:
-//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-//	input_shape: 1-D.  Shape of the input SparseTensor.
-//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
-func SparseReduceMaxSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceMaxSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseReduceMaxSparse",
-		Input: []tf.Input{
-			input_indices, input_values, input_shape, reduction_axes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// MaxPool3DGradAttr is an optional argument to MaxPool3DGrad.
-type MaxPool3DGradAttr func(optionalAttr)
-
-// MaxPool3DGradDataFormat sets the optional data_format attribute to value.
-//
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func MaxPool3DGradDataFormat(value string) MaxPool3DGradAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Computes gradients of max pooling function.
-//
-// Arguments:
-//	orig_input: The original input tensor.
-//	orig_output: The original output tensor.
-//	grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
-//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
-// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
-func MaxPool3DGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MaxPool3DGrad",
-		Input: []tf.Input{
-			orig_input, orig_output, grad,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Applies softmax to a batched N-D `SparseTensor`.
-//
-// The inputs represent an N-D SparseTensor  with logical shape `[..., B, C]`
-// (where `N >= 2`), and with indices sorted in the canonical lexicographic order.
-//
-// This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost
-// logical submatrix with shape `[B, C]`, but with the catch that *the implicitly
-// zero elements do not participate*.  Specifically, the algorithm is equivalent
-// to the following:
-//
-//   (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix
-//       with shape `[B, C]`, along the size-C dimension;
-//   (2) Masks out the original implicitly-zero locations;
-//   (3) Renormalizes the remaining elements.
-//
-// Hence, the `SparseTensor` result has exactly the same non-zero indices and
-// shape.
-//
-// Arguments:
-//	sp_indices: 2-D.  `NNZ x R` matrix with the indices of non-empty values in a
-// SparseTensor, in canonical ordering.
-//	sp_values: 1-D.  `NNZ` non-empty values corresponding to `sp_indices`.
-//	sp_shape: 1-D.  Shape of the input SparseTensor.
-//
-// Returns 1-D.  The `NNZ` values for the result `SparseTensor`.
-func SparseSoftmax(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSoftmax",
-		Input: []tf.Input{
-			sp_indices, sp_values, sp_shape,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Pads a tensor with zeros.
-//
-// This operation pads a `input` with zeros according to the `paddings` you
-// specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the
-// rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
-// how many zeros to add before the contents of `input` in that dimension, and
-// `paddings[D, 1]` indicates how many zeros to add after the contents of `input`
-// in that dimension.
-//
-// The padded size of each dimension D of the output is:
-//
-// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
+// Values are merged in order, so if an index appears in both `indices[m][i]` and
+// `indices[n][j]` for `(m,i) < (n,j)` the slice `data[n][j]` will appear in the
+// merged result. If you do not need this guarantee, ParallelDynamicStitch might
+// perform better on some devices.
 //
 // For example:
 //
-// ```
-// # 't' is [[1, 1], [2, 2]]
-// # 'paddings' is [[1, 1], [2, 2]]
-// # rank of 't' is 2
-// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
-//                       [0, 0, 1, 1, 0, 0]
-//                       [0, 0, 2, 2, 0, 0]
-//                       [0, 0, 0, 0, 0, 0]]
+// ```python
+//     indices[0] = 6
+//     indices[1] = [4, 1]
+//     indices[2] = [[5, 2], [0, 3]]
+//     data[0] = [61, 62]
+//     data[1] = [[41, 42], [11, 12]]
+//     data[2] = [[[51, 52], [21, 22]], [[1, 2], [31, 32]]]
+//     merged = [[1, 2], [11, 12], [21, 22], [31, 32], [41, 42],
+//               [51, 52], [61, 62]]
 // ```
 //
-func Pad(scope *Scope, input tf.Output, paddings tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Pad",
-		Input: []tf.Input{
-			input, paddings,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// EuclideanNormAttr is an optional argument to EuclideanNorm.
-type EuclideanNormAttr func(optionalAttr)
-
-// EuclideanNormKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func EuclideanNormKeepDims(value bool) EuclideanNormAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
-	}
-}
-
-// Computes the euclidean norm of elements across dimensions of a tensor.
-//
-// Reduces `input` along the dimensions given in `axis`. Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `axis`. If `keep_dims` is true, the reduced dimensions are
-// retained with length 1.
-//
-// Arguments:
-//	input: The tensor to reduce.
-//	axis: The dimensions to reduce. Must be in the range
-// `[-rank(input), rank(input))`.
-//
-// Returns The reduced tensor.
-func EuclideanNorm(scope *Scope, input tf.Output, axis tf.Output, optional ...EuclideanNormAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "EuclideanNorm",
-		Input: []tf.Input{
-			input, axis,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MaxPoolV2Attr is an optional argument to MaxPoolV2.
-type MaxPoolV2Attr func(optionalAttr)
-
-// MaxPoolV2DataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func MaxPoolV2DataFormat(value string) MaxPoolV2Attr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Performs max pooling on the input.
-//
-// Arguments:
-//	input: 4-D input to pool over.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
-//
-// Returns The max pooled output tensor.
-func MaxPoolV2(scope *Scope, input tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolV2Attr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MaxPoolV2",
-		Input: []tf.Input{
-			input, ksize, strides,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the truth value of (x == y) element-wise.
-//
-// *NOTE*: `Equal` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+// This method can be used to merge partitions created by `dynamic_partition`
+// as illustrated on the following example:
 //
 // ```python
-// x = tf.constant([2, 4])
-// y = tf.constant(2)
-// tf.math.equal(x, y) ==> array([True, False])
-//
-// x = tf.constant([2, 4])
-// y = tf.constant([2, 4])
-// tf.math.equal(x, y) ==> array([True,  True])
+//     # Apply function (increments x_i) on elements for which a certain condition
+//     # apply (x_i != -1 in this example).
+//     x=tf.constant([0.1, -1., 5.2, 4.3, -1., 7.4])
+//     condition_mask=tf.not_equal(x,tf.constant(-1.))
+//     partitioned_data = tf.dynamic_partition(
+//         x, tf.cast(condition_mask, tf.int32) , 2)
+//     partitioned_data[1] = partitioned_data[1] + 1.0
+//     condition_indices = tf.dynamic_partition(
+//         tf.range(tf.shape(x)[0]), tf.cast(condition_mask, tf.int32) , 2)
+//     x = tf.dynamic_stitch(condition_indices, partitioned_data)
+//     # Here x=[1.1, -1., 6.2, 5.3, -1, 8.4], the -1. values remain
+//     # unchanged.
 // ```
-func Equal(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/DynamicStitch.png" alt>
+// </div>
+func DynamicStitch(scope *Scope, indices []tf.Output, data []tf.Output) (merged tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Equal",
+		Type: "DynamicStitch",
 		Input: []tf.Input{
-			x, y,
+			tf.OutputList(indices), tf.OutputList(data),
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns element-wise remainder of division. When `x < 0` xor `y < 0` is
-//
-// true, this follows Python semantics in that the result here is consistent
-// with a flooring divide. E.g. `floor(x / y) * y + mod(x, y) = x`.
-//
-// *NOTE*: `FloorMod` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func FloorMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "FloorMod",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QuantizedInstanceNormAttr is an optional argument to QuantizedInstanceNorm.
-type QuantizedInstanceNormAttr func(optionalAttr)
-
-// QuantizedInstanceNormOutputRangeGiven sets the optional output_range_given attribute to value.
-//
-// value: If True, `given_y_min` and `given_y_min`
-// and `given_y_max` are used as the output range. Otherwise,
-// the implementation computes the output range.
-// If not specified, defaults to false
-func QuantizedInstanceNormOutputRangeGiven(value bool) QuantizedInstanceNormAttr {
-	return func(m optionalAttr) {
-		m["output_range_given"] = value
-	}
-}
-
-// QuantizedInstanceNormGivenYMin sets the optional given_y_min attribute to value.
-//
-// value: Output in `y_min` if `output_range_given` is True.
-// If not specified, defaults to 0
-func QuantizedInstanceNormGivenYMin(value float32) QuantizedInstanceNormAttr {
-	return func(m optionalAttr) {
-		m["given_y_min"] = value
-	}
-}
-
-// QuantizedInstanceNormGivenYMax sets the optional given_y_max attribute to value.
-//
-// value: Output in `y_max` if `output_range_given` is True.
-// If not specified, defaults to 0
-func QuantizedInstanceNormGivenYMax(value float32) QuantizedInstanceNormAttr {
-	return func(m optionalAttr) {
-		m["given_y_max"] = value
-	}
-}
-
-// QuantizedInstanceNormVarianceEpsilon sets the optional variance_epsilon attribute to value.
-//
-// value: A small float number to avoid dividing by 0.
-// If not specified, defaults to 1e-05
-func QuantizedInstanceNormVarianceEpsilon(value float32) QuantizedInstanceNormAttr {
-	return func(m optionalAttr) {
-		m["variance_epsilon"] = value
-	}
-}
-
-// QuantizedInstanceNormMinSeparation sets the optional min_separation attribute to value.
-//
-// value: Minimum value of `y_max - y_min`
-// If not specified, defaults to 0.001
-func QuantizedInstanceNormMinSeparation(value float32) QuantizedInstanceNormAttr {
-	return func(m optionalAttr) {
-		m["min_separation"] = value
-	}
-}
-
-// Quantized Instance normalization.
-//
-// Arguments:
-//	x: A 4D input Tensor.
-//	x_min: The value represented by the lowest quantized input.
-//	x_max: The value represented by the highest quantized input.
-//
-// Returns A 4D Tensor.The value represented by the lowest quantized output.The value represented by the highest quantized output.
-func QuantizedInstanceNorm(scope *Scope, x tf.Output, x_min tf.Output, x_max tf.Output, optional ...QuantizedInstanceNormAttr) (y tf.Output, y_min tf.Output, y_max tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedInstanceNorm",
-		Input: []tf.Input{
-			x, x_min, x_max,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Concatenates a list of `SparseTensor` along the specified dimension.
-//
-// Concatenation is with respect to the dense versions of these sparse tensors.
-// It is assumed that each input is a `SparseTensor` whose elements are ordered
-// along increasing dimension number.
-//
-// All inputs' shapes must match, except for the concat dimension.  The
-// `indices`, `values`, and `shapes` lists must have the same length.
-//
-// The output shape is identical to the inputs', except along the concat
-// dimension, where it is the sum of the inputs' sizes along that dimension.
-//
-// The output elements will be resorted to preserve the sort order along
-// increasing dimension number.
-//
-// This op runs in `O(M log M)` time, where `M` is the total number of non-empty
-// values across all inputs. This is due to the need for an internal sort in
-// order to concatenate efficiently across an arbitrary dimension.
-//
-// For example, if `concat_dim = 1` and the inputs are
-//
-//     sp_inputs[0]: shape = [2, 3]
-//     [0, 2]: "a"
-//     [1, 0]: "b"
-//     [1, 1]: "c"
-//
-//     sp_inputs[1]: shape = [2, 4]
-//     [0, 1]: "d"
-//     [0, 2]: "e"
-//
-// then the output will be
-//
-//     shape = [2, 7]
-//     [0, 2]: "a"
-//     [0, 4]: "d"
-//     [0, 5]: "e"
-//     [1, 0]: "b"
-//     [1, 1]: "c"
-//
-// Graphically this is equivalent to doing
-//
-//     [    a] concat [  d e  ] = [    a   d e  ]
-//     [b c  ]        [       ]   [b c          ]
-//
-// Arguments:
-//	indices: 2-D.  Indices of each input `SparseTensor`.
-//	values: 1-D.  Non-empty values of each `SparseTensor`.
-//	shapes: 1-D.  Shapes of each `SparseTensor`.
-//	concat_dim: Dimension to concatenate along. Must be in range [-rank, rank),
-// where rank is the number of dimensions in each input `SparseTensor`.
-//
-// Returns 2-D.  Indices of the concatenated `SparseTensor`.1-D.  Non-empty values of the concatenated `SparseTensor`.1-D.  Shape of the concatenated `SparseTensor`.
-func SparseConcat(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, concat_dim int64) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"concat_dim": concat_dim}
-	opspec := tf.OpSpec{
-		Type: "SparseConcat",
-		Input: []tf.Input{
-			tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// SparseToDenseAttr is an optional argument to SparseToDense.
-type SparseToDenseAttr func(optionalAttr)
-
-// SparseToDenseValidateIndices sets the optional validate_indices attribute to value.
-//
-// value: If true, indices are checked to make sure they are sorted in
-// lexicographic order and that there are no repeats.
-// If not specified, defaults to true
-func SparseToDenseValidateIndices(value bool) SparseToDenseAttr {
-	return func(m optionalAttr) {
-		m["validate_indices"] = value
-	}
-}
-
-// Converts a sparse representation into a dense tensor.
-//
-// Builds an array `dense` with shape `output_shape` such that
-//
-// ```
-// # If sparse_indices is scalar
-// dense[i] = (i == sparse_indices ? sparse_values : default_value)
-//
-// # If sparse_indices is a vector, then for each i
-// dense[sparse_indices[i]] = sparse_values[i]
-//
-// # If sparse_indices is an n by d matrix, then for each i in [0, n)
-// dense[sparse_indices[i][0], ..., sparse_indices[i][d-1]] = sparse_values[i]
-// ```
-//
-// All other values in `dense` are set to `default_value`.  If `sparse_values` is a
-// scalar, all sparse indices are set to this single value.
-//
-// Indices should be sorted in lexicographic order, and indices must not
-// contain any repeats. If `validate_indices` is true, these properties
-// are checked during execution.
-//
-// Arguments:
-//	sparse_indices: 0-D, 1-D, or 2-D.  `sparse_indices[i]` contains the complete
-// index where `sparse_values[i]` will be placed.
-//	output_shape: 1-D.  Shape of the dense output tensor.
-//	sparse_values: 1-D.  Values corresponding to each row of `sparse_indices`,
-// or a scalar value to be used for all sparse indices.
-//	default_value: Scalar value to set for indices not specified in
-// `sparse_indices`.
-//
-// Returns Dense output tensor of shape `output_shape`.
-func SparseToDense(scope *Scope, sparse_indices tf.Output, output_shape tf.Output, sparse_values tf.Output, default_value tf.Output, optional ...SparseToDenseAttr) (dense tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseToDense",
-		Input: []tf.Input{
-			sparse_indices, output_shape, sparse_values, default_value,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that caches elements from `input_dataset`.
-//
-// A CacheDataset will iterate over the input_dataset, and store tensors. If the
-// cache already exists, the cache will be used. If the cache is inappropriate
-// (e.g. cannot be opened, contains tensors of the wrong shape / size), an error
-// will the returned when used.
-//
-// Arguments:
-//
-//	filename: A path on the filesystem where we should cache the dataset. Note: this
-// will be a directory.
-//
-//
-func CacheDataset(scope *Scope, input_dataset tf.Output, filename tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// Records the bytes size of each element of `input_dataset` in a StatsAggregator.
+func BytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "CacheDataset",
-		Input: []tf.Input{
-			input_dataset, filename,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SerializeSparseAttr is an optional argument to SerializeSparse.
-type SerializeSparseAttr func(optionalAttr)
-
-// SerializeSparseOutType sets the optional out_type attribute to value.
-//
-// value: The `dtype` to use for serialization; the supported types are `string`
-// (default) and `variant`.
-// If not specified, defaults to DT_STRING
-func SerializeSparseOutType(value tf.DataType) SerializeSparseAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Serialize a `SparseTensor` into a `[3]` `Tensor` object.
-//
-// Arguments:
-//	sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
-//	sparse_values: 1-D.  The `values` of the `SparseTensor`.
-//	sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
-func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeSparseAttr) (serialized_sparse tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SerializeSparse",
-		Input: []tf.Input{
-			sparse_indices, sparse_values, sparse_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes rectified linear 6: `min(max(features, 0), 6)`.
-func Relu6(scope *Scope, features tf.Output) (activations tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Relu6",
-		Input: []tf.Input{
-			features,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes `exp(x) - 1` element-wise.
-//
-//   i.e. `exp(x) - 1` or `e^(x) - 1`, where `x` is the input tensor.
-//   `e` denotes Euler's number and is approximately equal to 2.718281.
-//
-//   ```python
-//   x = tf.constant(2.0)
-//   tf.math.expm1(x) ==> 6.389056
-//
-//   x = tf.constant([2.0, 8.0])
-//   tf.math.expm1(x) ==> array([6.389056, 2979.958], dtype=float32)
-//
-//   x = tf.constant(1 + 1j)
-//   tf.math.expm1(x) ==> (0.46869393991588515+2.2873552871788423j)
-//   ```
-func Expm1(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Expm1",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ThreadPoolHandleAttr is an optional argument to ThreadPoolHandle.
-type ThreadPoolHandleAttr func(optionalAttr)
-
-// ThreadPoolHandleMaxIntraOpParallelism sets the optional max_intra_op_parallelism attribute to value.
-//
-// value: The maximum degree of parallelism to use within operations that execute on this
-// threadpool.
-// If not specified, defaults to 1
-func ThreadPoolHandleMaxIntraOpParallelism(value int64) ThreadPoolHandleAttr {
-	return func(m optionalAttr) {
-		m["max_intra_op_parallelism"] = value
-	}
-}
-
-// ThreadPoolHandleContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func ThreadPoolHandleContainer(value string) ThreadPoolHandleAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// ThreadPoolHandleSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func ThreadPoolHandleSharedName(value string) ThreadPoolHandleAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Creates a dataset that uses a custom thread pool to compute `input_dataset`.
-//
-// Arguments:
-//	num_threads: The number of threads in the thread pool.
-//	display_name: A human-readable name for the threads that may be visible in some
-// visualizations.
-// threadpool.
-//
-// Returns A resource that can be consumed by one or more ExperimentalThreadPoolDataset
-// ops.
-func ThreadPoolHandle(scope *Scope, num_threads int64, display_name string, optional ...ThreadPoolHandleAttr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_threads": num_threads, "display_name": display_name}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ThreadPoolHandle",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Records the latency of producing `input_dataset` elements in a StatsAggregator.
-func ExperimentalLatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalLatencyStatsDataset",
+		Type: "BytesProducedStatsDataset",
 		Input: []tf.Input{
 			input_dataset, tag,
 		},
@@ -17111,128 +11286,45 @@ func ExperimentalLatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag
 	return op.Output(0)
 }
 
-// OneHotAttr is an optional argument to OneHot.
-type OneHotAttr func(optionalAttr)
+// ExperimentalAutoShardDatasetAttr is an optional argument to ExperimentalAutoShardDataset.
+type ExperimentalAutoShardDatasetAttr func(optionalAttr)
 
-// OneHotAxis sets the optional axis attribute to value.
-//
-// value: The axis to fill (default: -1, a new inner-most axis).
-// If not specified, defaults to -1
-func OneHotAxis(value int64) OneHotAttr {
+// ExperimentalAutoShardDatasetAutoShardPolicy sets the optional auto_shard_policy attribute to value.
+// If not specified, defaults to 0
+func ExperimentalAutoShardDatasetAutoShardPolicy(value int64) ExperimentalAutoShardDatasetAttr {
 	return func(m optionalAttr) {
-		m["axis"] = value
+		m["auto_shard_policy"] = value
 	}
 }
 
-// Returns a one-hot tensor.
+// Creates a dataset that shards the input dataset.
 //
-// The locations represented by indices in `indices` take value `on_value`,
-// while all other locations take value `off_value`.
+// Creates a dataset that shards the input dataset by num_workers, returning a
+// sharded dataset for the index-th worker. This attempts to automatically shard
+// a dataset by examining the Dataset graph and inserting a shard op before the
+// inputs to a reader Dataset (e.g. CSVDataset, TFRecordDataset).
 //
-// If the input `indices` is rank `N`, the output will have rank `N+1`,
-// The new axis is created at dimension `axis` (default: the new axis is
-// appended at the end).
-//
-// If `indices` is a scalar the output shape will be a vector of length `depth`.
-//
-// If `indices` is a vector of length `features`, the output shape will be:
-// ```
-//   features x depth if axis == -1
-//   depth x features if axis == 0
-// ```
-//
-// If `indices` is a matrix (batch) with shape `[batch, features]`,
-// the output shape will be:
-// ```
-//   batch x features x depth if axis == -1
-//   batch x depth x features if axis == 1
-//   depth x batch x features if axis == 0
-// ```
-//
-//
-// Examples
-// =========
-//
-// Suppose that
-// ```
-//   indices = [0, 2, -1, 1]
-//   depth = 3
-//   on_value = 5.0
-//   off_value = 0.0
-//   axis = -1
-// ```
-//
-// Then output is `[4 x 3]`:
-// ```
-// output =
-//   [5.0 0.0 0.0]  // one_hot(0)
-//   [0.0 0.0 5.0]  // one_hot(2)
-//   [0.0 0.0 0.0]  // one_hot(-1)
-//   [0.0 5.0 0.0]  // one_hot(1)
-// ```
-//
-// Suppose that
-// ```
-//   indices = [0, 2, -1, 1]
-//   depth = 3
-//   on_value = 0.0
-//   off_value = 3.0
-//   axis = 0
-// ```
-//
-// Then output is `[3 x 4]`:
-// ```
-// output =
-//   [0.0 3.0 3.0 3.0]
-//   [3.0 3.0 3.0 0.0]
-//   [3.0 3.0 3.0 3.0]
-//   [3.0 0.0 3.0 3.0]
-// //  ^                one_hot(0)
-// //      ^            one_hot(2)
-// //          ^        one_hot(-1)
-// //              ^    one_hot(1)
-// ```
-//
-// Suppose that
-// ```
-//   indices = [[0, 2], [1, -1]]
-//   depth = 3
-//   on_value = 1.0
-//   off_value = 0.0
-//   axis = -1
-// ```
-//
-// Then output is `[2 x 2 x 3]`:
-// ```
-// output =
-//   [
-//     [1.0, 0.0, 0.0]  // one_hot(0)
-//     [0.0, 0.0, 1.0]  // one_hot(2)
-//   ][
-//     [0.0, 1.0, 0.0]  // one_hot(1)
-//     [0.0, 0.0, 0.0]  // one_hot(-1)
-//   ]
-// ```
+// This dataset will throw a NotFound error if we cannot shard the dataset
+// automatically.
 //
 // Arguments:
-//	indices: A tensor of indices.
-//	depth: A scalar defining the depth of the one hot dimension.
-//	on_value: A scalar defining the value to fill in output when `indices[j] = i`.
-//	off_value: A scalar defining the value to fill in output when `indices[j] != i`.
+//	input_dataset: A variant tensor representing the input dataset.
+//	num_workers: A scalar representing the number of workers to distribute this dataset across.
+//	index: A scalar representing the index of the current worker out of num_workers.
 //
-// Returns The one-hot tensor.
-func OneHot(scope *Scope, indices tf.Output, depth tf.Output, on_value tf.Output, off_value tf.Output, optional ...OneHotAttr) (output tf.Output) {
+//
+func ExperimentalAutoShardDataset(scope *Scope, input_dataset tf.Output, num_workers tf.Output, index tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ExperimentalAutoShardDatasetAttr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "OneHot",
+		Type: "ExperimentalAutoShardDataset",
 		Input: []tf.Input{
-			indices, depth, on_value, off_value,
+			input_dataset, num_workers, index,
 		},
 		Attrs: attrs,
 	}
@@ -17240,647 +11332,33 @@ func OneHot(scope *Scope, indices tf.Output, depth tf.Output, on_value tf.Output
 	return op.Output(0)
 }
 
-// Checks whether a quantile stream has been initialized.
+// A transformation that asserts which transformations happen next.
 //
-// An Op that checks if quantile stream resource is initialized.
+// This transformation checks whether the camel-case names (i.e. "FlatMap", not
+// "flat_map") of the transformations following this transformation match the list
+// of names in the `transformations` argument. If there is a mismatch, the
+// transformation raises an exception.
+//
+// The check occurs when iterating over the contents of the dataset, which
+// means that the check happens *after* any static optimizations are applied
+// to the dataset graph.
 //
 // Arguments:
-//	quantile_stream_resource_handle: resource; The reference to quantile stream resource handle.
+//	input_dataset: A variant tensor representing the input dataset.
+// `AssertNextDataset` passes through the outputs of its input dataset.
+//	transformations: A `tf.string` vector `tf.Tensor` identifying the transformations that are
+// expected to happen next.
 //
-// Returns bool; True if the resource is initialized, False otherwise.
-func IsBoostedTreesQuantileStreamResourceInitialized(scope *Scope, quantile_stream_resource_handle tf.Output) (is_initialized tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "IsBoostedTreesQuantileStreamResourceInitialized",
-		Input: []tf.Input{
-			quantile_stream_resource_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// The gradient operator for the SparseAdd op.
 //
-// The SparseAdd op calculates A + B, where A, B, and the sum are all represented
-// as `SparseTensor` objects.  This op takes in the upstream gradient w.r.t.
-// non-empty values of the sum, and outputs the gradients w.r.t. the non-empty
-// values of A and B.
-//
-// Arguments:
-//	backprop_val_grad: 1-D with shape `[nnz(sum)]`.  The gradient with respect to
-// the non-empty values of the sum.
-//	a_indices: 2-D.  The `indices` of the `SparseTensor` A, size `[nnz(A), ndims]`.
-//	b_indices: 2-D.  The `indices` of the `SparseTensor` B, size `[nnz(B), ndims]`.
-//	sum_indices: 2-D.  The `indices` of the sum `SparseTensor`, size
-// `[nnz(sum), ndims]`.
-//
-// Returns 1-D with shape `[nnz(A)]`. The gradient with respect to the
-// non-empty values of A.1-D with shape `[nnz(B)]`. The gradient with respect to the
-// non-empty values of B.
-func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Output, b_indices tf.Output, sum_indices tf.Output) (a_val_grad tf.Output, b_val_grad tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseAddGrad",
-		Input: []tf.Input{
-			backprop_val_grad, a_indices, b_indices, sum_indices,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// CumsumAttr is an optional argument to Cumsum.
-type CumsumAttr func(optionalAttr)
-
-// CumsumExclusive sets the optional exclusive attribute to value.
-//
-// value: If `True`, perform exclusive cumsum.
-// If not specified, defaults to false
-func CumsumExclusive(value bool) CumsumAttr {
-	return func(m optionalAttr) {
-		m["exclusive"] = value
-	}
-}
-
-// CumsumReverse sets the optional reverse attribute to value.
-//
-// value: A `bool` (default: False).
-// If not specified, defaults to false
-func CumsumReverse(value bool) CumsumAttr {
-	return func(m optionalAttr) {
-		m["reverse"] = value
-	}
-}
-
-// Compute the cumulative sum of the tensor `x` along `axis`.
-//
-// By default, this op performs an inclusive cumsum, which means that the first
-// element of the input is identical to the first element of the output:
-//
-// ```python
-// tf.cumsum([a, b, c])  # => [a, a + b, a + b + c]
-// ```
-//
-// By setting the `exclusive` kwarg to `True`, an exclusive cumsum is
-// performed instead:
-//
-// ```python
-// tf.cumsum([a, b, c], exclusive=True)  # => [0, a, a + b]
-// ```
-//
-// By setting the `reverse` kwarg to `True`, the cumsum is performed in the
-// opposite direction:
-//
-// ```python
-// tf.cumsum([a, b, c], reverse=True)  # => [a + b + c, b + c, c]
-// ```
-//
-// This is more efficient than using separate `tf.reverse` ops.
-//
-// The `reverse` and `exclusive` kwargs can also be combined:
-//
-// ```python
-// tf.cumsum([a, b, c], exclusive=True, reverse=True)  # => [b + c, c, 0]
-// ```
-//
-// Arguments:
-//	x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
-// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
-// `complex128`, `qint8`, `quint8`, `qint32`, `half`.
-//	axis: A `Tensor` of type `int32` (default: 0). Must be in the range
-// `[-rank(x), rank(x))`.
-func Cumsum(scope *Scope, x tf.Output, axis tf.Output, optional ...CumsumAttr) (out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Cumsum",
-		Input: []tf.Input{
-			x, axis,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Real-valued fast Fourier transform.
-//
-// Computes the 1-dimensional discrete Fourier transform of a real-valued signal
-// over the inner-most dimension of `input`.
-//
-// Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the
-// `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term,
-// followed by the `fft_length / 2` positive-frequency terms.
-//
-// Along the axis `RFFT` is computed on, if `fft_length` is smaller than the
-// corresponding dimension of `input`, the dimension is cropped. If it is larger,
-// the dimension is padded with zeros.
-//
-// Arguments:
-//	input: A float32 tensor.
-//	fft_length: An int32 tensor of shape [1]. The FFT length.
-//
-// Returns A complex64 tensor of the same rank as `input`. The inner-most
-//   dimension of `input` is replaced with the `fft_length / 2 + 1` unique
-//   frequency components of its 1D Fourier transform.
-//
-// @compatibility(numpy)
-// Equivalent to np.fft.rfft
-// @end_compatibility
-func RFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "RFFT",
-		Input: []tf.Input{
-			input, fft_length,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that emits the records from one or more TFRecord files.
-//
-// Arguments:
-//	filenames: A scalar or vector containing the name(s) of the file(s) to be
-// read.
-//	compression_type: A scalar containing either (i) the empty string (no
-// compression), (ii) "ZLIB", or (iii) "GZIP".
-//	buffer_size: A scalar representing the number of bytes to buffer. A value of
-// 0 means no buffering will be performed.
-func TFRecordDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TFRecordDataset",
-		Input: []tf.Input{
-			filenames, compression_type, buffer_size,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ArgMaxAttr is an optional argument to ArgMax.
-type ArgMaxAttr func(optionalAttr)
-
-// ArgMaxOutputType sets the optional output_type attribute to value.
-// If not specified, defaults to DT_INT64
-func ArgMaxOutputType(value tf.DataType) ArgMaxAttr {
-	return func(m optionalAttr) {
-		m["output_type"] = value
-	}
-}
-
-// Returns the index with the largest value across dimensions of a tensor.
-//
-// Note that in case of ties the identity of the return value is not guaranteed.
-//
-// Usage:
-//   ```python
-//   import tensorflow as tf
-//   a = [1, 10, 26.9, 2.8, 166.32, 62.3]
-//   b = tf.math.argmax(input = a)
-//   c = tf.keras.backend.eval(b)
-//   # c = 4
-//   # here a[4] = 166.32 which is the largest element of a across axis 0
-//   ```
-//
-// Arguments:
-//
-//	dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
-// Describes which dimension of the input Tensor to reduce across. For vectors,
-// use dimension = 0.
-func ArgMax(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMaxAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ArgMax",
-		Input: []tf.Input{
-			input, dimension,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes rectified linear 6 gradients for a Relu6 operation.
-//
-// Arguments:
-//	gradients: The backpropagated gradients to the corresponding Relu6 operation.
-//	features: The features passed as input to the corresponding Relu6 operation, or
-// its output; using either one produces the same result.
-//
-// Returns The gradients:
-// `gradients * (features > 0) * (features < 6)`.
-func Relu6Grad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Relu6Grad",
-		Input: []tf.Input{
-			gradients, features,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Transforms a Tensor into a serialized TensorProto proto.
-//
-// Arguments:
-//	tensor: A Tensor of type `T`.
-//
-// Returns A serialized TensorProto proto of the input tensor.
-func SerializeTensor(scope *Scope, tensor tf.Output) (serialized tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SerializeTensor",
-		Input: []tf.Input{
-			tensor,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QuantizedConv2DPerChannelAttr is an optional argument to QuantizedConv2DPerChannel.
-type QuantizedConv2DPerChannelAttr func(optionalAttr)
-
-// QuantizedConv2DPerChannelOutType sets the optional out_type attribute to value.
-//
-// value: The quantized type of output tensor that needs to be converted.
-// If not specified, defaults to DT_QINT32
-func QuantizedConv2DPerChannelOutType(value tf.DataType) QuantizedConv2DPerChannelAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// QuantizedConv2DPerChannelDilations sets the optional dilations attribute to value.
-//
-// value: list of dilation values.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 >
-func QuantizedConv2DPerChannelDilations(value []int64) QuantizedConv2DPerChannelAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes QuantizedConv2D per channel.
-//
-// Arguments:
-//	input: The original input tensor.
-//	filter: The original filter tensor.
-//	min_input: The minimum value of the input tensor
-//	max_input: The maximum value of the input tensor.
-//	min_filter: The minimum value of the filter tensor.
-//	max_filter: The maximum value of the filter tensor.
-//	strides: list of stride values.
-//
-//
-// Returns The output tensor.The minimum value of the final output tensor.The maximum value of the final output tensor.
-func QuantizedConv2DPerChannel(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedConv2DPerChannelAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedConv2DPerChannel",
-		Input: []tf.Input{
-			input, filter, min_input, max_input, min_filter, max_filter,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Computes the Bessel i1e function of `x` element-wise.
-//
-// Exponentially scaled modified Bessel function of order 0 defined as
-// `bessel_i1e(x) = exp(-abs(x)) bessel_i1(x)`.
-//
-// This function is faster and numerically stabler than `bessel_i1(x)`.
-func BesselI1e(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BesselI1e",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Deprecated. Use TensorArraySplitV3
-//
-// DEPRECATED at GraphDef version 26: Use TensorArraySplitV3
-func TensorArraySplitV2(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArraySplitV2",
-		Input: []tf.Input{
-			handle, value, lengths, flow_in,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QuantizedMatMulWithBiasAndReluAttr is an optional argument to QuantizedMatMulWithBiasAndRelu.
-type QuantizedMatMulWithBiasAndReluAttr func(optionalAttr)
-
-// QuantizedMatMulWithBiasAndReluToutput sets the optional Toutput attribute to value.
-// If not specified, defaults to DT_QINT32
-func QuantizedMatMulWithBiasAndReluToutput(value tf.DataType) QuantizedMatMulWithBiasAndReluAttr {
-	return func(m optionalAttr) {
-		m["Toutput"] = value
-	}
-}
-
-// QuantizedMatMulWithBiasAndReluTransposeA sets the optional transpose_a attribute to value.
-//
-// value: If true, `a` is transposed before multiplication.
-// If not specified, defaults to false
-func QuantizedMatMulWithBiasAndReluTransposeA(value bool) QuantizedMatMulWithBiasAndReluAttr {
-	return func(m optionalAttr) {
-		m["transpose_a"] = value
-	}
-}
-
-// QuantizedMatMulWithBiasAndReluTransposeB sets the optional transpose_b attribute to value.
-//
-// value: If true, `b` is transposed before multiplication.
-// If not specified, defaults to false
-func QuantizedMatMulWithBiasAndReluTransposeB(value bool) QuantizedMatMulWithBiasAndReluAttr {
-	return func(m optionalAttr) {
-		m["transpose_b"] = value
-	}
-}
-
-// QuantizedMatMulWithBiasAndReluInputQuantMode sets the optional input_quant_mode attribute to value.
-//
-// value: Input data quantization mode. Either MIN_FIRST(default) or SCALED.
-// If not specified, defaults to "MIN_FIRST"
-func QuantizedMatMulWithBiasAndReluInputQuantMode(value string) QuantizedMatMulWithBiasAndReluAttr {
-	return func(m optionalAttr) {
-		m["input_quant_mode"] = value
-	}
-}
-
-// Perform a quantized matrix multiplication of  `a` by the matrix `b` with bias
-// add and relu fusion.
-//
-// The inputs must be two-dimensional matrices and 1D bias vector. And the inner
-// dimension of `a` (after being transposed if `transpose_a` is non-zero) must
-// match the outer dimension of `b` (after being transposed if `transposed_b` is
-// non-zero). Then do broadcast add operation with bias values on the matrix
-// mulplication result. The bias size must match inner dimension of `b`. Then do
-// relu activation to get non-negative result.
-//
-// Arguments:
-//	a: A matrix to be multiplied. Must be a two-dimensional tensor of type `quint8`.
-//	b: A matrix to be multiplied and must be a two-dimensional tensor of type `qint8`.
-//	bias: A 1D bias tensor with size matching with inner dimension of `b` (after being
-// transposed if `transposed_b` is non-zero).
-//	min_a: The float value that the lowest quantized `a` value represents.
-//	max_a: The float value that the highest quantized `a` value represents.
-//	min_b: The float value that the lowest quantized `b` value represents.
-//	max_b: The float value that the highest quantized `b` value represents.
-//
-// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
-func QuantizedMatMulWithBiasAndRelu(scope *Scope, a tf.Output, b tf.Output, bias tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, optional ...QuantizedMatMulWithBiasAndReluAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedMatMulWithBiasAndRelu",
-		Input: []tf.Input{
-			a, b, bias, min_a, max_a, min_b, max_b,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Creates a dataset that emits `components` as a tuple of tensors once.
-func TensorDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "TensorDataset",
-		Input: []tf.Input{
-			tf.OutputList(components),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Adjust the contrast of one or more images.
-//
-// `images` is a tensor of at least 3 dimensions.  The last 3 dimensions are
-// interpreted as `[height, width, channels]`.  The other dimensions only
-// represent a collection of images, such as `[batch, height, width, channels].`
-//
-// Contrast is adjusted independently for each channel of each image.
-//
-// For each channel, the Op first computes the mean of the image pixels in the
-// channel and then adjusts each component of each pixel to
-// `(x - mean) * contrast_factor + mean`.
-//
-// Arguments:
-//	images: Images to adjust.  At least 3-D.
-//	contrast_factor: A float multiplier for adjusting contrast.
-//
-// Returns The contrast-adjusted image or images.
-func AdjustContrastv2(scope *Scope, images tf.Output, contrast_factor tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "AdjustContrastv2",
-		Input: []tf.Input{
-			images, contrast_factor,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// DecodeCompressedAttr is an optional argument to DecodeCompressed.
-type DecodeCompressedAttr func(optionalAttr)
-
-// DecodeCompressedCompressionType sets the optional compression_type attribute to value.
-//
-// value: A scalar containing either (i) the empty string (no
-// compression), (ii) "ZLIB", or (iii) "GZIP".
-// If not specified, defaults to ""
-func DecodeCompressedCompressionType(value string) DecodeCompressedAttr {
-	return func(m optionalAttr) {
-		m["compression_type"] = value
-	}
-}
-
-// Decompress strings.
-//
-// This op decompresses each element of the `bytes` input `Tensor`, which
-// is assumed to be compressed using the given `compression_type`.
-//
-// The `output` is a string `Tensor` of the same shape as `bytes`,
-// each element containing the decompressed data from the corresponding
-// element in `bytes`.
-//
-// Arguments:
-//	bytes: A Tensor of string which is compressed.
-//
-// Returns A Tensor with the same shape as input `bytes`, uncompressed
-// from bytes.
-func DecodeCompressed(scope *Scope, bytes tf.Output, optional ...DecodeCompressedAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DecodeCompressed",
-		Input: []tf.Input{
-			bytes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the grayscale dilation of 4-D `input` and 3-D `filter` tensors.
-//
-// The `input` tensor has shape `[batch, in_height, in_width, depth]` and the
-// `filter` tensor has shape `[filter_height, filter_width, depth]`, i.e., each
-// input channel is processed independently of the others with its own structuring
-// function. The `output` tensor has shape
-// `[batch, out_height, out_width, depth]`. The spatial dimensions of the output
-// tensor depend on the `padding` algorithm. We currently only support the default
-// "NHWC" `data_format`.
-//
-// In detail, the grayscale morphological 2-D dilation is the max-sum correlation
-// (for consistency with `conv2d`, we use unmirrored filters):
-//
-//     output[b, y, x, c] =
-//        max_{dy, dx} input[b,
-//                           strides[1] * y + rates[1] * dy,
-//                           strides[2] * x + rates[2] * dx,
-//                           c] +
-//                     filter[dy, dx, c]
-//
-// Max-pooling is a special case when the filter has size equal to the pooling
-// kernel size and contains all zeros.
-//
-// Note on duality: The dilation of `input` by the `filter` is equal to the
-// negation of the erosion of `-input` by the reflected `filter`.
-//
-// Arguments:
-//	input: 4-D with shape `[batch, in_height, in_width, depth]`.
-//	filter: 3-D with shape `[filter_height, filter_width, depth]`.
-//	strides: The stride of the sliding window for each dimension of the input
-// tensor. Must be: `[1, stride_height, stride_width, 1]`.
-//	rates: The input stride for atrous morphological dilation. Must be:
-// `[1, rate_height, rate_width, 1]`.
-//	padding: The type of padding algorithm to use.
-//
-// Returns 4-D with shape `[batch, out_height, out_width, depth]`.
-func Dilation2D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, rates []int64, padding string) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding}
-	opspec := tf.OpSpec{
-		Type: "Dilation2D",
-		Input: []tf.Input{
-			input, filter,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Deprecated. Use TensorArrayCloseV3
-//
-// DEPRECATED at GraphDef version 26: Use TensorArrayCloseV3
-//
-// Returns the created operation.
-func TensorArrayCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayCloseV2",
-		Input: []tf.Input{
-			handle,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Creates a dataset that skips `count` elements from the `input_dataset`.
-//
-// Arguments:
-//
-//	count: A scalar representing the number of elements from the `input_dataset`
-// that should be skipped.  If count is -1, skips everything.
-//
-//
-func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+func AssertNextDataset(scope *Scope, input_dataset tf.Output, transformations tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "SkipDataset",
+		Type: "AssertNextDataset",
 		Input: []tf.Input{
-			input_dataset, count,
+			input_dataset, transformations,
 		},
 		Attrs: attrs,
 	}
@@ -17888,184 +11366,65 @@ func SkipDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_
 	return op.Output(0)
 }
 
-// DenseToSparseSetOperationAttr is an optional argument to DenseToSparseSetOperation.
-type DenseToSparseSetOperationAttr func(optionalAttr)
+// AddManySparseToTensorsMapAttr is an optional argument to AddManySparseToTensorsMap.
+type AddManySparseToTensorsMapAttr func(optionalAttr)
 
-// DenseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value.
-// If not specified, defaults to true
-func DenseToSparseSetOperationValidateIndices(value bool) DenseToSparseSetOperationAttr {
-	return func(m optionalAttr) {
-		m["validate_indices"] = value
-	}
-}
-
-// Applies set operation along last dimension of `Tensor` and `SparseTensor`.
+// AddManySparseToTensorsMapContainer sets the optional container attribute to value.
 //
-// See SetOperationOp::SetOperationFromContext for values of `set_operation`.
-//
-// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,
-// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same
-// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but
-// ignored.
-//
-// If `validate_indices` is `True`, this op validates the order and range of `set2`
-// indices.
-//
-// Output `result` is a `SparseTensor` represented by `result_indices`,
-// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
-// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
-// dimension contains the result of `set_operation` applied to the corresponding
-// `[0...n-1]` dimension of `set`.
-//
-// Arguments:
-//	set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`.
-// Dimension `n` contains values in a set, duplicates are allowed but ignored.
-//	set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
-// order.
-//	set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
-// order.
-//	set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must
-// be the same as the 1st `n-1` dimensions of `set1`, `result_shape[n]` is the
-// max set size across `n-1` dimensions.
-//
-//
-// Returns 2D indices of a `SparseTensor`.1D values of a `SparseTensor`.1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
-// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
-// is the max result set size across all `0...n-1` dimensions.
-func DenseToSparseSetOperation(scope *Scope, set1 tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...DenseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"set_operation": set_operation}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DenseToSparseSetOperation",
-		Input: []tf.Input{
-			set1, set2_indices, set2_values, set2_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// A TPU core selector Op.
-//
-// This Op produces a set of TPU cores (for warm-up) or a single TPU core
-// (for regular inference) to execute the TPU program on. The output is
-// consumed by TPUPartitionedCall.
-//
-// Returns A vector 1 or more TPU cores.
-func TPUOrdinalSelector(scope *Scope) (device_ordinals tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TPUOrdinalSelector",
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// An op that receives embedding activations on the TPU.
-//
-// The TPU system performs the embedding lookups and aggregations specified by
-// the arguments to TPUEmbeddingEnqueue(Integer/Sparse/SparseTensor)Batch. The
-// results of these aggregations are visible to the Tensorflow Graph as the
-// outputs of a RecvTPUEmbeddingActivations op. This op returns a list containing
-// one Tensor of activations per table specified in the model. There can be at
-// most one RecvTPUEmbeddingActivations op in the TPU graph.
-//
-// Arguments:
-//	num_outputs: The number of output activation tensors, equal to the number of
-// embedding tables in the model.
-//	config: Serialized TPUEmbeddingConfiguration proto.
-//
-// Returns A TensorList of embedding activations containing one Tensor per
-// embedding table in the model.
-func RecvTPUEmbeddingActivations(scope *Scope, num_outputs int64, config string) (outputs []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_outputs": num_outputs, "config": config}
-	opspec := tf.OpSpec{
-		Type: "RecvTPUEmbeddingActivations",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
-		scope.UpdateErr("RecvTPUEmbeddingActivations", err)
-		return
-	}
-	return outputs
-}
-
-// ReduceJoinAttr is an optional argument to ReduceJoin.
-type ReduceJoinAttr func(optionalAttr)
-
-// ReduceJoinKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If `True`, retain reduced dimensions with length `1`.
-// If not specified, defaults to false
-func ReduceJoinKeepDims(value bool) ReduceJoinAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
-	}
-}
-
-// ReduceJoinSeparator sets the optional separator attribute to value.
-//
-// value: The separator to use when joining.
+// value: The container name for the `SparseTensorsMap` created by this op.
 // If not specified, defaults to ""
-func ReduceJoinSeparator(value string) ReduceJoinAttr {
+func AddManySparseToTensorsMapContainer(value string) AddManySparseToTensorsMapAttr {
 	return func(m optionalAttr) {
-		m["separator"] = value
+		m["container"] = value
 	}
 }
 
-// Joins a string Tensor across the given dimensions.
+// AddManySparseToTensorsMapSharedName sets the optional shared_name attribute to value.
 //
-// Computes the string join across dimensions in the given string Tensor of shape
-// `[\\(d_0, d_1, ..., d_{n-1}\\)]`.  Returns a new Tensor created by joining the input
-// strings with the given separator (default: empty string).  Negative indices are
-// counted backwards from the end, with `-1` being equivalent to `n - 1`.  If
-// indices are not specified, joins across all dimensions beginning from `n - 1`
-// through `0`.
+// value: The shared name for the `SparseTensorsMap` created by this op.
+// If blank, the new Operation's unique name is used.
+// If not specified, defaults to ""
+func AddManySparseToTensorsMapSharedName(value string) AddManySparseToTensorsMapAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Add an `N`-minibatch `SparseTensor` to a `SparseTensorsMap`, return `N` handles.
 //
-// For example:
+// A `SparseTensor` of rank `R` is represented by three tensors: `sparse_indices`,
+// `sparse_values`, and `sparse_shape`, where
 //
-// ```python
-// # tensor `a` is [["a", "b"], ["c", "d"]]
-// tf.reduce_join(a, 0) ==> ["ac", "bd"]
-// tf.reduce_join(a, 1) ==> ["ab", "cd"]
-// tf.reduce_join(a, -2) = tf.reduce_join(a, 0) ==> ["ac", "bd"]
-// tf.reduce_join(a, -1) = tf.reduce_join(a, 1) ==> ["ab", "cd"]
-// tf.reduce_join(a, 0, keep_dims=True) ==> [["ac", "bd"]]
-// tf.reduce_join(a, 1, keep_dims=True) ==> [["ab"], ["cd"]]
-// tf.reduce_join(a, 0, separator=".") ==> ["a.c", "b.d"]
-// tf.reduce_join(a, [0, 1]) ==> "acbd"
-// tf.reduce_join(a, [1, 0]) ==> "abcd"
-// tf.reduce_join(a, []) ==> [["a", "b"], ["c", "d"]]
-// tf.reduce_join(a) = tf.reduce_join(a, [1, 0]) ==> "abcd"
-// ```
+// ```sparse_indices.shape[1] == sparse_shape.shape[0] == R```
+//
+// An `N`-minibatch of `SparseTensor` objects is represented as a `SparseTensor`
+// having a first `sparse_indices` column taking values between `[0, N)`, where
+// the minibatch size `N == sparse_shape[0]`.
+//
+// The input `SparseTensor` must have rank `R` greater than 1, and the first
+// dimension is treated as the minibatch dimension.  Elements of the `SparseTensor`
+// must be sorted in increasing order of this first dimension.  The stored
+// `SparseTensor` objects pointed to by each row of the output `sparse_handles`
+// will have rank `R-1`.
+//
+// The `SparseTensor` values can then be read out as part of a minibatch by passing
+// the given keys as vector elements to `TakeManySparseFromTensorsMap`.  To ensure
+// the correct `SparseTensorsMap` is accessed, ensure that the same
+// `container` and `shared_name` are passed to that Op.  If no `shared_name`
+// is provided here, instead use the *name* of the Operation created by calling
+// `AddManySparseToTensorsMap` as the `shared_name` passed to
+// `TakeManySparseFromTensorsMap`.  Ensure the Operations are colocated.
 //
 // Arguments:
-//	inputs: The input to be joined.  All reduced indices must have non-zero size.
-//	reduction_indices: The dimensions to reduce over.  Dimensions are reduced in the
-// order specified.  Omitting `reduction_indices` is equivalent to passing
-// `[n-1, n-2, ..., 0]`.  Negative indices from `-n` to `-1` are supported.
+//	sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
+// `sparse_indices[:, 0]` must be ordered values in `[0, N)`.
+//	sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
+//	sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
+// The minibatch size `N == sparse_shape[0]`.
 //
-// Returns Has shape equal to that of the input with reduced dimensions removed or
-// set to `1` depending on `keep_dims`.
-func ReduceJoin(scope *Scope, inputs tf.Output, reduction_indices tf.Output, optional ...ReduceJoinAttr) (output tf.Output) {
+// Returns 1-D.  The handles of the `SparseTensor` now stored in the
+// `SparseTensorsMap`.  Shape: `[N]`.
+func AddManySparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddManySparseToTensorsMapAttr) (sparse_handles tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -18074,9 +11433,9 @@ func ReduceJoin(scope *Scope, inputs tf.Output, reduction_indices tf.Output, opt
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ReduceJoin",
+		Type: "AddManySparseToTensorsMap",
 		Input: []tf.Input{
-			inputs, reduction_indices,
+			sparse_indices, sparse_values, sparse_shape,
 		},
 		Attrs: attrs,
 	}
@@ -18084,47 +11443,126 @@ func ReduceJoin(scope *Scope, inputs tf.Output, reduction_indices tf.Output, opt
 	return op.Output(0)
 }
 
-// Computes the complementary error function of `x` element-wise.
-func Erfc(scope *Scope, x tf.Output) (y tf.Output) {
+// CollectiveBcastSendAttr is an optional argument to CollectiveBcastSend.
+type CollectiveBcastSendAttr func(optionalAttr)
+
+// CollectiveBcastSendCommunicationHint sets the optional communication_hint attribute to value.
+// If not specified, defaults to "auto"
+func CollectiveBcastSendCommunicationHint(value string) CollectiveBcastSendAttr {
+	return func(m optionalAttr) {
+		m["communication_hint"] = value
+	}
+}
+
+// Broadcasts a tensor value to one or more other devices.
+func CollectiveBcastSend(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, shape tf.Shape, optional ...CollectiveBcastSendAttr) (data tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "shape": shape}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CollectiveBcastSend",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the truth value of x AND y element-wise.
+//
+// *NOTE*: `LogicalAnd` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func LogicalAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Erfc",
+		Type: "LogicalAnd",
 		Input: []tf.Input{
-			x,
+			x, y,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// StatelessRandomUniformAttr is an optional argument to StatelessRandomUniform.
-type StatelessRandomUniformAttr func(optionalAttr)
+// CombinedNonMaxSuppressionAttr is an optional argument to CombinedNonMaxSuppression.
+type CombinedNonMaxSuppressionAttr func(optionalAttr)
 
-// StatelessRandomUniformDtype sets the optional dtype attribute to value.
+// CombinedNonMaxSuppressionPadPerClass sets the optional pad_per_class attribute to value.
 //
-// value: The type of the output.
-// If not specified, defaults to DT_FLOAT
-func StatelessRandomUniformDtype(value tf.DataType) StatelessRandomUniformAttr {
+// value: If false, the output nmsed boxes, scores and classes
+// are padded/clipped to `max_total_size`. If true, the
+// output nmsed boxes, scores and classes are padded to be of length
+// `max_size_per_class`*`num_classes`, unless it exceeds `max_total_size` in
+// which case it is clipped to `max_total_size`. Defaults to false.
+// If not specified, defaults to false
+func CombinedNonMaxSuppressionPadPerClass(value bool) CombinedNonMaxSuppressionAttr {
 	return func(m optionalAttr) {
-		m["dtype"] = value
+		m["pad_per_class"] = value
 	}
 }
 
-// Outputs deterministic pseudorandom random values from a uniform distribution.
+// CombinedNonMaxSuppressionClipBoxes sets the optional clip_boxes attribute to value.
 //
-// The generated values follow a uniform distribution in the range `[0, 1)`. The
-// lower bound 0 is included in the range, while the upper bound 1 is excluded.
+// value: If true, assume the box coordinates are between [0, 1] and clip the output boxes
+// if they fall beyond [0, 1]. If false, do not do clipping and output the box
+// coordinates as it is.
+// If not specified, defaults to true
+func CombinedNonMaxSuppressionClipBoxes(value bool) CombinedNonMaxSuppressionAttr {
+	return func(m optionalAttr) {
+		m["clip_boxes"] = value
+	}
+}
+
+// Greedily selects a subset of bounding boxes in descending order of score,
 //
-// The outputs are a deterministic function of `shape` and `seed`.
+// This operation performs non_max_suppression on the inputs per batch, across
+// all classes.
+// Prunes away boxes that have high intersection-over-union (IOU) overlap
+// with previously selected boxes.  Bounding boxes are supplied as
+// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
+// diagonal pair of box corners and the coordinates can be provided as normalized
+// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
+// is agnostic to where the origin is in the coordinate system. Also note that
+// this algorithm is invariant to orthogonal transformations and translations
+// of the coordinate system; thus translating or reflections of the coordinate
+// system result in the same boxes being selected by the algorithm.
+// The output of this operation is the final boxes, scores and classes tensor
+// returned after performing non_max_suppression.
 //
 // Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
+//	boxes: A 4-D float tensor of shape `[batch_size, num_boxes, q, 4]`. If `q` is 1 then
+// same boxes are used for all classes otherwise, if `q` is equal to number of
+// classes, class-specific boxes are used.
+//	scores: A 3-D float tensor of shape `[batch_size, num_boxes, num_classes]`
+// representing a single score corresponding to each box (each row of boxes).
+//	max_output_size_per_class: A scalar integer tensor representing the maximum number of
+// boxes to be selected by non max suppression per class
+//	max_total_size: A scalar representing maximum number of boxes retained over all classes.
+//	iou_threshold: A 0-D float tensor representing the threshold for deciding whether
+// boxes overlap too much with respect to IOU.
+//	score_threshold: A 0-D float tensor representing the threshold for deciding when to remove
+// boxes based on score.
 //
-// Returns Random values with specified shape.
-func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomUniformAttr) (output tf.Output) {
+// Returns:
+//	nmsed_boxes: A [batch_size, max_detections, 4] float32 tensor
+// containing the non-max suppressed boxes.
+//	nmsed_scores: A [batch_size, max_detections] float32 tensor
+// containing the scores for the boxes.
+//	nmsed_classes: A [batch_size, max_detections] float32 tensor
+// containing the classes for the boxes.
+//	valid_detections: A [batch_size] int32 tensor indicating the number of
+// valid detections per batch item. Only the top num_detections[i] entries in
+// nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the
+// entries are zero paddings.
+func CombinedNonMaxSuppression(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size_per_class tf.Output, max_total_size tf.Output, iou_threshold tf.Output, score_threshold tf.Output, optional ...CombinedNonMaxSuppressionAttr) (nmsed_boxes tf.Output, nmsed_scores tf.Output, nmsed_classes tf.Output, valid_detections tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -18133,63 +11571,183 @@ func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optio
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StatelessRandomUniform",
+		Type: "CombinedNonMaxSuppression",
 		Input: []tf.Input{
-			shape, seed,
+			boxes, scores, max_output_size_per_class, max_total_size, iou_threshold, score_threshold,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// NonMaxSuppressionV4Attr is an optional argument to NonMaxSuppressionV4.
+type NonMaxSuppressionV4Attr func(optionalAttr)
+
+// NonMaxSuppressionV4PadToMaxOutputSize sets the optional pad_to_max_output_size attribute to value.
+//
+// value: If true, the output `selected_indices` is padded to be of length
+// `max_output_size`. Defaults to false.
+// If not specified, defaults to false
+func NonMaxSuppressionV4PadToMaxOutputSize(value bool) NonMaxSuppressionV4Attr {
+	return func(m optionalAttr) {
+		m["pad_to_max_output_size"] = value
+	}
+}
+
+// Greedily selects a subset of bounding boxes in descending order of score,
+//
+// pruning away boxes that have high intersection-over-union (IOU) overlap
+// with previously selected boxes.  Bounding boxes with score less than
+// `score_threshold` are removed.  Bounding boxes are supplied as
+// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
+// diagonal pair of box corners and the coordinates can be provided as normalized
+// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
+// is agnostic to where the origin is in the coordinate system and more
+// generally is invariant to orthogonal transformations and translations
+// of the coordinate system; thus translating or reflections of the coordinate
+// system result in the same boxes being selected by the algorithm.
+// The output of this operation is a set of integers indexing into the input
+// collection of bounding boxes representing the selected boxes.  The bounding
+// box coordinates corresponding to the selected indices can then be obtained
+// using the `tf.gather operation`.  For example:
+//   selected_indices = tf.image.non_max_suppression_v2(
+//       boxes, scores, max_output_size, iou_threshold, score_threshold)
+//   selected_boxes = tf.gather(boxes, selected_indices)
+//
+// Arguments:
+//	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
+//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
+// score corresponding to each box (each row of boxes).
+//	max_output_size: A scalar integer tensor representing the maximum number of
+// boxes to be selected by non max suppression.
+//	iou_threshold: A 0-D float tensor representing the threshold for deciding whether
+// boxes overlap too much with respect to IOU.
+//	score_threshold: A 0-D float tensor representing the threshold for deciding when to remove
+// boxes based on score.
+//
+// Returns:
+//	selected_indices: A 1-D integer tensor of shape `[M]` representing the selected
+// indices from the boxes tensor, where `M <= max_output_size`.
+//	valid_outputs: A 0-D integer tensor representing the number of valid elements in
+// `selected_indices`, with the valid elements appearing first.
+func NonMaxSuppressionV4(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output, score_threshold tf.Output, optional ...NonMaxSuppressionV4Attr) (selected_indices tf.Output, valid_outputs tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "NonMaxSuppressionV4",
+		Input: []tf.Input{
+			boxes, scores, max_output_size, iou_threshold, score_threshold,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Greedily selects a subset of bounding boxes in descending order of score,
+//
+// pruning away boxes that have high intersection-over-union (IOU) overlap
+// with previously selected boxes.  Bounding boxes are supplied as
+// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
+// diagonal pair of box corners and the coordinates can be provided as normalized
+// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
+// is agnostic to where the origin is in the coordinate system.  Note that this
+// algorithm is invariant to orthogonal transformations and translations
+// of the coordinate system; thus translating or reflections of the coordinate
+// system result in the same boxes being selected by the algorithm.
+//
+// The output of this operation is a set of integers indexing into the input
+// collection of bounding boxes representing the selected boxes.  The bounding
+// box coordinates corresponding to the selected indices can then be obtained
+// using the `tf.gather operation`.  For example:
+//
+//   selected_indices = tf.image.non_max_suppression_v2(
+//       boxes, scores, max_output_size, iou_threshold)
+//   selected_boxes = tf.gather(boxes, selected_indices)
+//
+// Arguments:
+//	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
+//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
+// score corresponding to each box (each row of boxes).
+//	max_output_size: A scalar integer tensor representing the maximum number of
+// boxes to be selected by non max suppression.
+//	iou_threshold: A 0-D float tensor representing the threshold for deciding whether
+// boxes overlap too much with respect to IOU.
+//
+// Returns A 1-D integer tensor of shape `[M]` representing the selected
+// indices from the boxes tensor, where `M <= max_output_size`.
+func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output) (selected_indices tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "NonMaxSuppressionV2",
+		Input: []tf.Input{
+			boxes, scores, max_output_size, iou_threshold,
+		},
+	}
+	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Conv3DAttr is an optional argument to Conv3D.
-type Conv3DAttr func(optionalAttr)
+// DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter.
+type DepthwiseConv2dNativeBackpropFilterAttr func(optionalAttr)
 
-// Conv3DDataFormat sets the optional data_format attribute to value.
+// DepthwiseConv2dNativeBackpropFilterDataFormat sets the optional data_format attribute to value.
 //
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func Conv3DDataFormat(value string) Conv3DAttr {
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, height, width, channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, channels, height, width].
+// If not specified, defaults to "NHWC"
+func DepthwiseConv2dNativeBackpropFilterDataFormat(value string) DepthwiseConv2dNativeBackpropFilterAttr {
 	return func(m optionalAttr) {
 		m["data_format"] = value
 	}
 }
 
-// Conv3DDilations sets the optional dilations attribute to value.
+// DepthwiseConv2dNativeBackpropFilterDilations sets the optional dilations attribute to value.
 //
-// value: 1-D tensor of length 5.  The dilation factor for each dimension of
-// `input`. If set to k > 1, there will be k-1 skipped cells between each
-// filter element on that dimension. The dimension order is determined by the
-// value of `data_format`, see above for details. Dilations in the batch and
-// depth dimensions must be 1.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 i:1 >
-func Conv3DDilations(value []int64) Conv3DAttr {
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to {i:1 i:1 i:1 i:1}
+func DepthwiseConv2dNativeBackpropFilterDilations(value []int64) DepthwiseConv2dNativeBackpropFilterAttr {
 	return func(m optionalAttr) {
 		m["dilations"] = value
 	}
 }
 
-// Computes a 3-D convolution given 5-D `input` and `filter` tensors.
-//
-// In signal processing, cross-correlation is a measure of similarity of
-// two waveforms as a function of a time-lag applied to one of them. This
-// is also known as a sliding dot product or sliding inner-product.
-//
-// Our Conv3D implements a form of cross-correlation.
+// Computes the gradients of depthwise convolution with respect to the filter.
 //
 // Arguments:
-//	input: Shape `[batch, in_depth, in_height, in_width, in_channels]`.
-//	filter: Shape `[filter_depth, filter_height, filter_width, in_channels,
-// out_channels]`. `in_channels` must match between `input` and `filter`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	input: 4-D with shape based on `data_format`.  For example, if
+// `data_format` is 'NHWC' then `input` is a 4-D `[batch, in_height,
+// in_width, in_channels]` tensor.
+//	filter_sizes: An integer vector representing the tensor shape of `filter`,
+// where `filter` is a 4-D
+// `[filter_height, filter_width, in_channels, depthwise_multiplier]` tensor.
+//	out_backprop: 4-D with shape  based on `data_format`.
+// For example, if `data_format` is 'NHWC' then
+// out_backprop shape is `[batch, out_height, out_width, out_channels]`.
+// Gradients w.r.t. the output of the convolution.
+//	strides: The stride of the sliding window for each dimension of the input
+// of the convolution.
 //	padding: The type of padding algorithm to use.
-func Conv3D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...Conv3DAttr) (output tf.Output) {
+//
+// Returns 4-D with shape
+// `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
+// the `filter` input of the convolution.
+func DepthwiseConv2dNativeBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropFilterAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -18198,9 +11756,9 @@ func Conv3D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, pa
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Conv3D",
+		Type: "DepthwiseConv2dNativeBackpropFilter",
 		Input: []tf.Input{
-			input, filter,
+			input, filter_sizes, out_backprop,
 		},
 		Attrs: attrs,
 	}
@@ -18208,50 +11766,71 @@ func Conv3D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, pa
 	return op.Output(0)
 }
 
-// StringSplitV2Attr is an optional argument to StringSplitV2.
-type StringSplitV2Attr func(optionalAttr)
+// CropAndResizeAttr is an optional argument to CropAndResize.
+type CropAndResizeAttr func(optionalAttr)
 
-// StringSplitV2Maxsplit sets the optional maxsplit attribute to value.
+// CropAndResizeMethod sets the optional method attribute to value.
 //
-// value: An `int`. If `maxsplit > 0`, limit of the split of the result.
-// If not specified, defaults to -1
-func StringSplitV2Maxsplit(value int64) StringSplitV2Attr {
+// value: A string specifying the sampling method for resizing. It can be either
+// `"bilinear"` or `"nearest"` and default to `"bilinear"`. Currently two sampling
+// methods are supported: Bilinear and Nearest Neighbor.
+// If not specified, defaults to "bilinear"
+func CropAndResizeMethod(value string) CropAndResizeAttr {
 	return func(m optionalAttr) {
-		m["maxsplit"] = value
+		m["method"] = value
 	}
 }
 
-// Split elements of `source` based on `sep` into a `SparseTensor`.
+// CropAndResizeExtrapolationValue sets the optional extrapolation_value attribute to value.
 //
-// Let N be the size of source (typically N will be the batch size). Split each
-// element of `source` based on `sep` and return a `SparseTensor`
-// containing the split tokens. Empty tokens are ignored.
+// value: Value used for extrapolation, when applicable.
+// If not specified, defaults to 0
+func CropAndResizeExtrapolationValue(value float32) CropAndResizeAttr {
+	return func(m optionalAttr) {
+		m["extrapolation_value"] = value
+	}
+}
+
+// Extracts crops from the input image tensor and resizes them.
 //
-// For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c',
-// then the output will be
-// ```
-// st.indices = [0, 0;
-//               0, 1;
-//               1, 0;
-//               1, 1;
-//               1, 2]
-// st.shape = [2, 3]
-// st.values = ['hello', 'world', 'a', 'b', 'c']
-// ```
+// Extracts crops from the input image tensor and resizes them using bilinear
+// sampling or nearest neighbor sampling (possibly with aspect ratio change) to a
+// common output size specified by `crop_size`. This is more general than the
+// `crop_to_bounding_box` op which extracts a fixed size slice from the input image
+// and does not allow resizing or aspect ratio change.
 //
-// If `sep` is given, consecutive delimiters are not grouped together and are
-// deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and
-// sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty
-// string, consecutive whitespace are regarded as a single separator, and the
-// result will contain no empty strings at the startor end if the string has
-// leading or trailing whitespace.
-//
-// Note that the above mentioned behavior matches python's str.split.
+// Returns a tensor with `crops` from the input `image` at positions defined at the
+// bounding box locations in `boxes`. The cropped boxes are all resized (with
+// bilinear or nearest neighbor interpolation) to a fixed
+// `size = [crop_height, crop_width]`. The result is a 4-D tensor
+// `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned.
+// In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical
+// results to using `tf.image.resize_bilinear()` or
+// `tf.image.resize_nearest_neighbor()`(depends on the `method` argument) with
+// `align_corners=True`.
 //
 // Arguments:
-//	input: `1-D` string `Tensor`, the strings to split.
-//	sep: `0-D` string `Tensor`, the delimiter character.
-func StringSplitV2(scope *Scope, input tf.Output, sep tf.Output, optional ...StringSplitV2Attr) (indices tf.Output, values tf.Output, shape tf.Output) {
+//	image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
+// Both `image_height` and `image_width` need to be positive.
+//	boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
+// specifies the coordinates of a box in the `box_ind[i]` image and is specified
+// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
+// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
+// `[0, 1]` interval of normalized image height is mapped to
+// `[0, image_height - 1]` in image height coordinates. We do allow `y1` > `y2`, in
+// which case the sampled crop is an up-down flipped version of the original
+// image. The width dimension is treated similarly. Normalized coordinates
+// outside the `[0, 1]` range are allowed, in which case we use
+// `extrapolation_value` to extrapolate the input image values.
+//	box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
+// The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
+//	crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. All
+// cropped image patches are resized to this size. The aspect ratio of the image
+// content is not preserved. Both `crop_height` and `crop_width` need to be
+// positive.
+//
+// Returns A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
+func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Output, crop_size tf.Output, optional ...CropAndResizeAttr) (crops tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -18260,144 +11839,61 @@ func StringSplitV2(scope *Scope, input tf.Output, sep tf.Output, optional ...Str
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StringSplitV2",
+		Type: "CropAndResize",
 		Input: []tf.Input{
-			input, sep,
+			image, boxes, box_ind, crop_size,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// Converts the quantized `input` tensor into a lower-precision `output`.
-//
-// Converts the quantized `input` tensor into a lower-precision `output`, using the
-// output range specified with `requested_output_min` and `requested_output_max`.
-//
-// `[input_min, input_max]` are scalar floats that specify the range for the float
-// interpretation of the `input` data. For example, if `input_min` is -1.0f and
-// `input_max` is 1.0f, and we are dealing with `quint16` quantized data, then a 0
-// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
-//
-// Arguments:
-//
-//	input_min: The float value that the minimum quantized input value represents.
-//	input_max: The float value that the maximum quantized input value represents.
-//	requested_output_min: The float value that the minimum quantized output value represents.
-//	requested_output_max: The float value that the maximum quantized output value represents.
-//	out_type: The type of the output. Should be a lower bit depth than Tinput.
-//
-// Returns The requested_output_min value is copied into this output.The requested_output_max value is copied into this output.
-func Requantize(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"out_type": out_type}
-	opspec := tf.OpSpec{
-		Type: "Requantize",
-		Input: []tf.Input{
-			input, input_min, input_max, requested_output_min, requested_output_max,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
+// EigAttr is an optional argument to Eig.
+type EigAttr func(optionalAttr)
 
-// Saves input tensors slices to disk.
+// EigComputeV sets the optional compute_v attribute to value.
 //
-// This is like `Save` except that tensors can be listed in the saved file as being
-// a slice of a larger tensor.  `shapes_and_slices` specifies the shape of the
-// larger tensor and the slice that this tensor covers. `shapes_and_slices` must
-// have as many elements as `tensor_names`.
-//
-// Elements of the `shapes_and_slices` input must either be:
-//
-// *  The empty string, in which case the corresponding tensor is
-//    saved normally.
-// *  A string of the form `dim0 dim1 ... dimN-1 slice-spec` where the
-//    `dimI` are the dimensions of the larger tensor and `slice-spec`
-//    specifies what part is covered by the tensor to save.
-//
-// `slice-spec` itself is a `:`-separated list: `slice0:slice1:...:sliceN-1`
-// where each `sliceI` is either:
-//
-// *  The string `-` meaning that the slice covers all indices of this dimension
-// *  `start,length` where `start` and `length` are integers.  In that
-//    case the slice covers `length` indices starting at `start`.
-//
-// See also `Save`.
-//
-// Arguments:
-//	filename: Must have a single element. The name of the file to which we write the
-// tensor.
-//	tensor_names: Shape `[N]`. The names of the tensors to be saved.
-//	shapes_and_slices: Shape `[N]`.  The shapes and slice specifications to use when
-// saving the tensors.
-//	data: `N` tensors to save.
-//
-// Returns the created operation.
-func SaveSlices(scope *Scope, filename tf.Output, tensor_names tf.Output, shapes_and_slices tf.Output, data []tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SaveSlices",
-		Input: []tf.Input{
-			filename, tensor_names, shapes_and_slices, tf.OutputList(data),
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// TopKAttr is an optional argument to TopK.
-type TopKAttr func(optionalAttr)
-
-// TopKSorted sets the optional sorted attribute to value.
-//
-// value: If true the resulting `k` elements will be sorted by the values in
-// descending order.
+// value: If `True` then eigenvectors will be computed and returned in `v`.
+// Otherwise, only the eigenvalues will be computed.
 // If not specified, defaults to true
-func TopKSorted(value bool) TopKAttr {
+func EigComputeV(value bool) EigAttr {
 	return func(m optionalAttr) {
-		m["sorted"] = value
+		m["compute_v"] = value
 	}
 }
 
-// Finds values and indices of the `k` largest elements for the last dimension.
+// Computes the eigen decomposition of one or more square matrices.
 //
-// DEPRECATED at GraphDef version 7: Use TopKV2 instead
+// Computes the eigenvalues and (optionally) right eigenvectors of each inner matrix in
+// `input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`. The eigenvalues
+// are sorted in non-decreasing order.
 //
-// If the input is a vector (rank-1), finds the `k` largest entries in the vector
-// and outputs their values and indices as vectors.  Thus `values[j]` is the
-// `j`-th largest entry in `input`, and its index is `indices[j]`.
-//
-// For matrices (resp. higher rank input), computes the top `k` entries in each
-// row (resp. vector along the last dimension).  Thus,
-//
-//     values.shape = indices.shape = input.shape[:-1] + [k]
-//
-// If two elements are equal, the lower-index element appears first.
-//
-// If `k` varies dynamically, use `TopKV2` below.
+// ```python
+// # a is a tensor.
+// # e is a tensor of eigenvalues.
+// # v is a tensor of eigenvectors.
+// e, v = eig(a)
+// e = eig(a, compute_v=False)
+// ```
 //
 // Arguments:
-//	input: 1-D or higher with last dimension at least `k`.
-//	k: Number of top elements to look for along the last dimension (along each
-// row for matrices).
+//	input: `Tensor` input of shape `[N, N]`.
 //
-// Returns The `k` largest elements along each last dimensional slice.The indices of `values` within the last dimension of `input`.
-func TopK(scope *Scope, input tf.Output, k int64, optional ...TopKAttr) (values tf.Output, indices tf.Output) {
+//
+// Returns:
+//	e: Eigenvalues. Shape is `[N]`.
+//	v: Eigenvectors. Shape is `[N, N]`.
+func Eig(scope *Scope, input tf.Output, Tout tf.DataType, optional ...EigAttr) (e tf.Output, v tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"k": k}
+	attrs := map[string]interface{}{"Tout": Tout}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "TopK",
+		Type: "Eig",
 		Input: []tf.Input{
 			input,
 		},
@@ -18407,392 +11903,33 @@ func TopK(scope *Scope, input tf.Output, k int64, optional ...TopKAttr) (values
 	return op.Output(0), op.Output(1)
 }
 
-// Generate the bucket boundaries for each feature based on accumulated summaries.
-//
-// An op that returns a list of float tensors for a quantile stream resource. Each
-// tensor is Rank 1 containing bucket boundaries for a single feature.
-//
-// Arguments:
-//	quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource.
-//	num_features: inferred int; number of features to get bucket boundaries for.
-//
-// Returns float; List of Rank 1 Tensors each containing the bucket boundaries for a feature.
-func BoostedTreesQuantileStreamResourceGetBucketBoundaries(scope *Scope, quantile_stream_resource_handle tf.Output, num_features int64) (bucket_boundaries []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_features": num_features}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesQuantileStreamResourceGetBucketBoundaries",
-		Input: []tf.Input{
-			quantile_stream_resource_handle,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if bucket_boundaries, idx, err = makeOutputList(op, idx, "bucket_boundaries"); err != nil {
-		scope.UpdateErr("BoostedTreesQuantileStreamResourceGetBucketBoundaries", err)
-		return
-	}
-	return bucket_boundaries
-}
+// ProdAttr is an optional argument to Prod.
+type ProdAttr func(optionalAttr)
 
-// LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingFTRLParametersGradAccumDebug.
-type LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr func(optionalAttr)
-
-// LoadTPUEmbeddingFTRLParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
+// ProdKeepDims sets the optional keep_dims attribute to value.
 //
-// REQUIRES: value >= -1
-func LoadTPUEmbeddingFTRLParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingFTRLParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingFTRLParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Load FTRL embedding parameters with debug support.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the FTRL optimization algorithm.
-//	accumulators: Value of accumulators used in the FTRL optimization algorithm.
-//	linears: Value of linears used in the FTRL optimization algorithm.
-//	gradient_accumulators: Value of gradient_accumulators used in the FTRL optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingFTRLParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, linears tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingFTRLParametersGradAccumDebug",
-		Input: []tf.Input{
-			parameters, accumulators, linears, gradient_accumulators,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Output a fact about factorials.
-func Fact(scope *Scope) (fact tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Fact",
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes softplus gradients for a softplus operation.
-//
-// Arguments:
-//	gradients: The backpropagated gradients to the corresponding softplus operation.
-//	features: The features passed as input to the corresponding softplus operation.
-//
-// Returns The gradients: `gradients / (1 + exp(-features))`.
-func SoftplusGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SoftplusGrad",
-		Input: []tf.Input{
-			gradients, features,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Converts a flat index or array of flat indices into a tuple of
-//
-// coordinate arrays.
-//
-// @compatibility(numpy)
-// Equivalent to np.unravel_index
-// @end_compatibility
-//
-// Arguments:
-//	indices: An 0-D or 1-D `int` Tensor whose elements are indices into the
-// flattened version of an array of dimensions dims.
-//	dims: An 1-D `int` Tensor. The shape of the array to use for unraveling
-// indices.
-//
-// Returns An 2-D (or 1-D if indices is 0-D) tensor where each row has the
-// same shape as the indices array.
-func UnravelIndex(scope *Scope, indices tf.Output, dims tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "UnravelIndex",
-		Input: []tf.Input{
-			indices, dims,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Execute a sub graph on a remote processor.
-//
-// The graph specifications(such as graph itself, input tensors and output names)
-// are stored as a serialized protocol buffer of RemoteFusedGraphExecuteInfo
-// as serialized_remote_fused_graph_execute_info.
-// The specifications will be passed to a dedicated registered
-// remote fused graph executor.  The executor will send the graph specifications
-// to a remote processor and execute that graph.  The execution results
-// will be passed to consumer nodes as outputs of this node.
-//
-// Arguments:
-//	inputs: Arbitrary number of tensors with arbitrary data types
-//
-//	serialized_remote_fused_graph_execute_info: Serialized protocol buffer
-// of RemoteFusedGraphExecuteInfo which contains graph specifications.
-//
-// Returns Arbitrary number of tensors with arbitrary data types
-func RemoteFusedGraphExecute(scope *Scope, inputs []tf.Output, Toutputs []tf.DataType, serialized_remote_fused_graph_execute_info string) (outputs []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"Toutputs": Toutputs, "serialized_remote_fused_graph_execute_info": serialized_remote_fused_graph_execute_info}
-	opspec := tf.OpSpec{
-		Type: "RemoteFusedGraphExecute",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
-		scope.UpdateErr("RemoteFusedGraphExecute", err)
-		return
-	}
-	return outputs
-}
-
-// Returns an element-wise indication of the sign of a number.
-//
-// `y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`.
-//
-// For complex numbers, `y = sign(x) = x / |x|` if `x != 0`, otherwise `y = 0`.
-func Sign(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Sign",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// InfeedEnqueueTupleAttr is an optional argument to InfeedEnqueueTuple.
-type InfeedEnqueueTupleAttr func(optionalAttr)
-
-// InfeedEnqueueTupleLayouts sets the optional layouts attribute to value.
-//
-// value: A vector holding the requested layout in minor-to-major sequence for
-// all the tuple shapes, in the order the shapes appear in the "shapes" input.
-// The layout elements for a sub-shape can be set to -1, in which case the
-// corresponding layout will be computed by the infeed operation.
-// If not specified, defaults to <>
-func InfeedEnqueueTupleLayouts(value []int64) InfeedEnqueueTupleAttr {
-	return func(m optionalAttr) {
-		m["layouts"] = value
-	}
-}
-
-// InfeedEnqueueTupleDeviceOrdinal sets the optional device_ordinal attribute to value.
-//
-// value: The TPU device to use. This should be -1 when the Op
-// is running on a TPU device, and >= 0 when the Op is running on the CPU
-// device.
-// If not specified, defaults to -1
-func InfeedEnqueueTupleDeviceOrdinal(value int64) InfeedEnqueueTupleAttr {
-	return func(m optionalAttr) {
-		m["device_ordinal"] = value
-	}
-}
-
-// Feeds multiple Tensor values into the computation as an XLA tuple.
-//
-// Arguments:
-//	inputs: A list of tensors that will be provided using the infeed mechanism.
-//	shapes: The shapes of each tensor in `inputs`.
-//
-// Returns the created operation.
-func InfeedEnqueueTuple(scope *Scope, inputs []tf.Output, shapes []tf.Shape, optional ...InfeedEnqueueTupleAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"shapes": shapes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "InfeedEnqueueTuple",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// EncodeJpegAttr is an optional argument to EncodeJpeg.
-type EncodeJpegAttr func(optionalAttr)
-
-// EncodeJpegFormat sets the optional format attribute to value.
-//
-// value: Per pixel image format.
-// If not specified, defaults to ""
-func EncodeJpegFormat(value string) EncodeJpegAttr {
-	return func(m optionalAttr) {
-		m["format"] = value
-	}
-}
-
-// EncodeJpegQuality sets the optional quality attribute to value.
-//
-// value: Quality of the compression from 0 to 100 (higher is better and slower).
-// If not specified, defaults to 95
-func EncodeJpegQuality(value int64) EncodeJpegAttr {
-	return func(m optionalAttr) {
-		m["quality"] = value
-	}
-}
-
-// EncodeJpegProgressive sets the optional progressive attribute to value.
-//
-// value: If True, create a JPEG that loads progressively (coarse to fine).
+// value: If true, retain reduced dimensions with length 1.
 // If not specified, defaults to false
-func EncodeJpegProgressive(value bool) EncodeJpegAttr {
+func ProdKeepDims(value bool) ProdAttr {
 	return func(m optionalAttr) {
-		m["progressive"] = value
+		m["keep_dims"] = value
 	}
 }
 
-// EncodeJpegOptimizeSize sets the optional optimize_size attribute to value.
+// Computes the product of elements across dimensions of a tensor.
 //
-// value: If True, spend CPU/RAM to reduce size with no quality change.
-// If not specified, defaults to false
-func EncodeJpegOptimizeSize(value bool) EncodeJpegAttr {
-	return func(m optionalAttr) {
-		m["optimize_size"] = value
-	}
-}
-
-// EncodeJpegChromaDownsampling sets the optional chroma_downsampling attribute to value.
-//
-// value: See http://en.wikipedia.org/wiki/Chroma_subsampling.
-// If not specified, defaults to true
-func EncodeJpegChromaDownsampling(value bool) EncodeJpegAttr {
-	return func(m optionalAttr) {
-		m["chroma_downsampling"] = value
-	}
-}
-
-// EncodeJpegDensityUnit sets the optional density_unit attribute to value.
-//
-// value: Unit used to specify `x_density` and `y_density`:
-// pixels per inch (`'in'`) or centimeter (`'cm'`).
-// If not specified, defaults to "in"
-func EncodeJpegDensityUnit(value string) EncodeJpegAttr {
-	return func(m optionalAttr) {
-		m["density_unit"] = value
-	}
-}
-
-// EncodeJpegXDensity sets the optional x_density attribute to value.
-//
-// value: Horizontal pixels per density unit.
-// If not specified, defaults to 300
-func EncodeJpegXDensity(value int64) EncodeJpegAttr {
-	return func(m optionalAttr) {
-		m["x_density"] = value
-	}
-}
-
-// EncodeJpegYDensity sets the optional y_density attribute to value.
-//
-// value: Vertical pixels per density unit.
-// If not specified, defaults to 300
-func EncodeJpegYDensity(value int64) EncodeJpegAttr {
-	return func(m optionalAttr) {
-		m["y_density"] = value
-	}
-}
-
-// EncodeJpegXmpMetadata sets the optional xmp_metadata attribute to value.
-//
-// value: If not empty, embed this XMP metadata in the image header.
-// If not specified, defaults to ""
-func EncodeJpegXmpMetadata(value string) EncodeJpegAttr {
-	return func(m optionalAttr) {
-		m["xmp_metadata"] = value
-	}
-}
-
-// JPEG-encode an image.
-//
-// `image` is a 3-D uint8 Tensor of shape `[height, width, channels]`.
-//
-// The attr `format` can be used to override the color format of the encoded
-// output.  Values can be:
-//
-// *   `''`: Use a default format based on the number of channels in the image.
-// *   `grayscale`: Output a grayscale JPEG image.  The `channels` dimension
-//     of `image` must be 1.
-// *   `rgb`: Output an RGB JPEG image. The `channels` dimension
-//     of `image` must be 3.
-//
-// If `format` is not specified or is the empty string, a default format is picked
-// in function of the number of channels in `image`:
-//
-// *   1: Output a grayscale image.
-// *   3: Output an RGB image.
+// Reduces `input` along the dimensions given in `axis`. Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `axis`. If `keep_dims` is true, the reduced dimensions are
+// retained with length 1.
 //
 // Arguments:
-//	image: 3-D with shape `[height, width, channels]`.
+//	input: The tensor to reduce.
+//	axis: The dimensions to reduce. Must be in the range
+// `[-rank(input), rank(input))`.
 //
-// Returns 0-D. JPEG-encoded image.
-func EncodeJpeg(scope *Scope, image tf.Output, optional ...EncodeJpegAttr) (contents tf.Output) {
+// Returns The reduced tensor.
+func Prod(scope *Scope, input tf.Output, axis tf.Output, optional ...ProdAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -18801,7 +11938,462 @@ func EncodeJpeg(scope *Scope, image tf.Output, optional ...EncodeJpegAttr) (cont
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "EncodeJpeg",
+		Type: "Prod",
+		Input: []tf.Input{
+			input, axis,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SampleDistortedBoundingBoxV2Attr is an optional argument to SampleDistortedBoundingBoxV2.
+type SampleDistortedBoundingBoxV2Attr func(optionalAttr)
+
+// SampleDistortedBoundingBoxV2Seed sets the optional seed attribute to value.
+//
+// value: If either `seed` or `seed2` are set to non-zero, the random number
+// generator is seeded by the given `seed`.  Otherwise, it is seeded by a random
+// seed.
+// If not specified, defaults to 0
+func SampleDistortedBoundingBoxV2Seed(value int64) SampleDistortedBoundingBoxV2Attr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxV2Seed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func SampleDistortedBoundingBoxV2Seed2(value int64) SampleDistortedBoundingBoxV2Attr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxV2AspectRatioRange sets the optional aspect_ratio_range attribute to value.
+//
+// value: The cropped area of the image must have an aspect ratio =
+// width / height within this range.
+// If not specified, defaults to {f:0.75 f:1.33}
+func SampleDistortedBoundingBoxV2AspectRatioRange(value []float32) SampleDistortedBoundingBoxV2Attr {
+	return func(m optionalAttr) {
+		m["aspect_ratio_range"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxV2AreaRange sets the optional area_range attribute to value.
+//
+// value: The cropped area of the image must contain a fraction of the
+// supplied image within this range.
+// If not specified, defaults to {f:0.05 f:1}
+func SampleDistortedBoundingBoxV2AreaRange(value []float32) SampleDistortedBoundingBoxV2Attr {
+	return func(m optionalAttr) {
+		m["area_range"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxV2MaxAttempts sets the optional max_attempts attribute to value.
+//
+// value: Number of attempts at generating a cropped region of the image
+// of the specified constraints. After `max_attempts` failures, return the entire
+// image.
+// If not specified, defaults to 100
+func SampleDistortedBoundingBoxV2MaxAttempts(value int64) SampleDistortedBoundingBoxV2Attr {
+	return func(m optionalAttr) {
+		m["max_attempts"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxV2UseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value.
+//
+// value: Controls behavior if no bounding boxes supplied.
+// If true, assume an implicit bounding box covering the whole input. If false,
+// raise an error.
+// If not specified, defaults to false
+func SampleDistortedBoundingBoxV2UseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxV2Attr {
+	return func(m optionalAttr) {
+		m["use_image_if_no_bounding_boxes"] = value
+	}
+}
+
+// Generate a single randomly distorted bounding box for an image.
+//
+// Bounding box annotations are often supplied in addition to ground-truth labels
+// in image recognition or object localization tasks. A common technique for
+// training such a system is to randomly distort an image while preserving
+// its content, i.e. *data augmentation*. This Op outputs a randomly distorted
+// localization of an object, i.e. bounding box, given an `image_size`,
+// `bounding_boxes` and a series of constraints.
+//
+// The output of this Op is a single bounding box that may be used to crop the
+// original image. The output is returned as 3 tensors: `begin`, `size` and
+// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
+// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize
+// what the bounding box looks like.
+//
+// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The
+// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
+// height of the underlying image.
+//
+// For example,
+//
+// ```python
+//     # Generate a single distorted bounding box.
+//     begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
+//         tf.shape(image),
+//         bounding_boxes=bounding_boxes)
+//
+//     # Draw the bounding box in an image summary.
+//     image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
+//                                                   bbox_for_draw)
+//     tf.summary.image('images_with_box', image_with_box)
+//
+//     # Employ the bounding box to distort the image.
+//     distorted_image = tf.slice(image, begin, size)
+// ```
+//
+// Note that if no bounding box information is available, setting
+// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit
+// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
+// false and no bounding boxes are supplied, an error is raised.
+//
+// Arguments:
+//	image_size: 1-D, containing `[height, width, channels]`.
+//	bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes
+// associated with the image.
+//	min_object_covered: The cropped area of the image must contain at least this
+// fraction of any bounding box supplied. The value of this parameter should be
+// non-negative. In the case of 0, the cropped area does not need to overlap
+// any of the bounding boxes supplied.
+//
+// Returns:
+//	begin: 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to
+// `tf.slice`.
+//	size: 1-D, containing `[target_height, target_width, -1]`. Provide as input to
+// `tf.slice`.
+//	bboxes: 3-D with shape `[1, 1, 4]` containing the distorted bounding box.
+// Provide as input to `tf.image.draw_bounding_boxes`.
+func SampleDistortedBoundingBoxV2(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, min_object_covered tf.Output, optional ...SampleDistortedBoundingBoxV2Attr) (begin tf.Output, size tf.Output, bboxes tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SampleDistortedBoundingBoxV2",
+		Input: []tf.Input{
+			image_size, bounding_boxes, min_object_covered,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Converts one or more images from RGB to HSV.
+//
+// Outputs a tensor of the same shape as the `images` tensor, containing the HSV
+// value of the pixels. The output is only well defined if the value in `images`
+// are in `[0,1]`.
+//
+// `output[..., 0]` contains hue, `output[..., 1]` contains saturation, and
+// `output[..., 2]` contains value. All HSV values are in `[0,1]`. A hue of 0
+// corresponds to pure red, hue 1/3 is pure green, and 2/3 is pure blue.
+//
+// Arguments:
+//	images: 1-D or higher rank. RGB data to convert. Last dimension must be size 3.
+//
+// Returns `images` converted to HSV.
+func RGBToHSV(scope *Scope, images tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "RGBToHSV",
+		Input: []tf.Input{
+			images,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Decode the frame(s) of a GIF-encoded image to a uint8 tensor.
+//
+// GIF images with frame or transparency compression are not supported.
+// On Linux and MacOS systems, convert animated GIFs from compressed to
+// uncompressed by running:
+//
+//     convert $src.gif -coalesce $dst.gif
+//
+// This op also supports decoding JPEGs and PNGs, though it is cleaner to use
+// `tf.image.decode_image`.
+//
+// Arguments:
+//	contents: 0-D.  The GIF-encoded image.
+//
+// Returns 4-D with shape `[num_frames, height, width, 3]`. RGB channel order.
+func DecodeGif(scope *Scope, contents tf.Output) (image tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodeGif",
+		Input: []tf.Input{
+			contents,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SampleDistortedBoundingBoxAttr is an optional argument to SampleDistortedBoundingBox.
+type SampleDistortedBoundingBoxAttr func(optionalAttr)
+
+// SampleDistortedBoundingBoxSeed sets the optional seed attribute to value.
+//
+// value: If either `seed` or `seed2` are set to non-zero, the random number
+// generator is seeded by the given `seed`.  Otherwise, it is seeded by a random
+// seed.
+// If not specified, defaults to 0
+func SampleDistortedBoundingBoxSeed(value int64) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxSeed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func SampleDistortedBoundingBoxSeed2(value int64) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxMinObjectCovered sets the optional min_object_covered attribute to value.
+//
+// value: The cropped area of the image must contain at least this
+// fraction of any bounding box supplied. The value of this parameter should be
+// non-negative. In the case of 0, the cropped area does not need to overlap
+// any of the bounding boxes supplied.
+// If not specified, defaults to 0.1
+func SampleDistortedBoundingBoxMinObjectCovered(value float32) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["min_object_covered"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxAspectRatioRange sets the optional aspect_ratio_range attribute to value.
+//
+// value: The cropped area of the image must have an aspect ratio =
+// width / height within this range.
+// If not specified, defaults to {f:0.75 f:1.33}
+func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["aspect_ratio_range"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxAreaRange sets the optional area_range attribute to value.
+//
+// value: The cropped area of the image must contain a fraction of the
+// supplied image within this range.
+// If not specified, defaults to {f:0.05 f:1}
+func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["area_range"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxMaxAttempts sets the optional max_attempts attribute to value.
+//
+// value: Number of attempts at generating a cropped region of the image
+// of the specified constraints. After `max_attempts` failures, return the entire
+// image.
+// If not specified, defaults to 100
+func SampleDistortedBoundingBoxMaxAttempts(value int64) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["max_attempts"] = value
+	}
+}
+
+// SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value.
+//
+// value: Controls behavior if no bounding boxes supplied.
+// If true, assume an implicit bounding box covering the whole input. If false,
+// raise an error.
+// If not specified, defaults to false
+func SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxAttr {
+	return func(m optionalAttr) {
+		m["use_image_if_no_bounding_boxes"] = value
+	}
+}
+
+// Generate a single randomly distorted bounding box for an image.
+//
+// Bounding box annotations are often supplied in addition to ground-truth labels
+// in image recognition or object localization tasks. A common technique for
+// training such a system is to randomly distort an image while preserving
+// its content, i.e. *data augmentation*. This Op outputs a randomly distorted
+// localization of an object, i.e. bounding box, given an `image_size`,
+// `bounding_boxes` and a series of constraints.
+//
+// The output of this Op is a single bounding box that may be used to crop the
+// original image. The output is returned as 3 tensors: `begin`, `size` and
+// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
+// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize
+// what the bounding box looks like.
+//
+// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The
+// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
+// height of the underlying image.
+//
+// For example,
+//
+// ```python
+//     # Generate a single distorted bounding box.
+//     begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
+//         tf.shape(image),
+//         bounding_boxes=bounding_boxes)
+//
+//     # Draw the bounding box in an image summary.
+//     image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
+//                                                   bbox_for_draw)
+//     tf.summary.image('images_with_box', image_with_box)
+//
+//     # Employ the bounding box to distort the image.
+//     distorted_image = tf.slice(image, begin, size)
+// ```
+//
+// Note that if no bounding box information is available, setting
+// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit
+// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
+// false and no bounding boxes are supplied, an error is raised.
+//
+// Arguments:
+//	image_size: 1-D, containing `[height, width, channels]`.
+//	bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes
+// associated with the image.
+//
+// Returns:
+//	begin: 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to
+// `tf.slice`.
+//	size: 1-D, containing `[target_height, target_width, -1]`. Provide as input to
+// `tf.slice`.
+//	bboxes: 3-D with shape `[1, 1, 4]` containing the distorted bounding box.
+// Provide as input to `tf.image.draw_bounding_boxes`.
+func SampleDistortedBoundingBox(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, optional ...SampleDistortedBoundingBoxAttr) (begin tf.Output, size tf.Output, bboxes tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SampleDistortedBoundingBox",
+		Input: []tf.Input{
+			image_size, bounding_boxes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// DecodeBmpAttr is an optional argument to DecodeBmp.
+type DecodeBmpAttr func(optionalAttr)
+
+// DecodeBmpChannels sets the optional channels attribute to value.
+// If not specified, defaults to 0
+func DecodeBmpChannels(value int64) DecodeBmpAttr {
+	return func(m optionalAttr) {
+		m["channels"] = value
+	}
+}
+
+// Decode the first frame of a BMP-encoded image to a uint8 tensor.
+//
+// The attr `channels` indicates the desired number of color channels for the
+// decoded image.
+//
+// Accepted values are:
+//
+// *   0: Use the number of channels in the BMP-encoded image.
+// *   3: output an RGB image.
+// *   4: output an RGBA image.
+//
+// Arguments:
+//	contents: 0-D.  The BMP-encoded image.
+//
+// Returns 3-D with shape `[height, width, channels]`. RGB order
+func DecodeBmp(scope *Scope, contents tf.Output, optional ...DecodeBmpAttr) (image tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodeBmp",
+		Input: []tf.Input{
+			contents,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// EncodePngAttr is an optional argument to EncodePng.
+type EncodePngAttr func(optionalAttr)
+
+// EncodePngCompression sets the optional compression attribute to value.
+//
+// value: Compression level.
+// If not specified, defaults to -1
+func EncodePngCompression(value int64) EncodePngAttr {
+	return func(m optionalAttr) {
+		m["compression"] = value
+	}
+}
+
+// PNG-encode an image.
+//
+// `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]`
+// where `channels` is:
+//
+// *   1: for grayscale.
+// *   2: for grayscale + alpha.
+// *   3: for RGB.
+// *   4: for RGBA.
+//
+// The ZLIB compression level, `compression`, can be -1 for the PNG-encoder
+// default or a value from 0 to 9.  9 is the highest compression level, generating
+// the smallest output, but is slower.
+//
+// Arguments:
+//	image: 3-D with shape `[height, width, channels]`.
+//
+// Returns 0-D. PNG-encoded image.
+func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (contents tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "EncodePng",
 		Input: []tf.Input{
 			image,
 		},
@@ -18811,6 +12403,218 @@ func EncodeJpeg(scope *Scope, image tf.Output, optional ...EncodeJpegAttr) (cont
 	return op.Output(0)
 }
 
+// ExtractJpegShapeAttr is an optional argument to ExtractJpegShape.
+type ExtractJpegShapeAttr func(optionalAttr)
+
+// ExtractJpegShapeOutputType sets the optional output_type attribute to value.
+//
+// value: (Optional) The output type of the operation (int32 or int64).
+// Defaults to int32.
+// If not specified, defaults to DT_INT32
+func ExtractJpegShapeOutputType(value tf.DataType) ExtractJpegShapeAttr {
+	return func(m optionalAttr) {
+		m["output_type"] = value
+	}
+}
+
+// Extract the shape information of a JPEG-encoded image.
+//
+// This op only parses the image header, so it is much faster than DecodeJpeg.
+//
+// Arguments:
+//	contents: 0-D. The JPEG-encoded image.
+//
+// Returns 1-D. The image shape with format [height, width, channels].
+func ExtractJpegShape(scope *Scope, contents tf.Output, optional ...ExtractJpegShapeAttr) (image_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ExtractJpegShape",
+		Input: []tf.Input{
+			contents,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RandomCropAttr is an optional argument to RandomCrop.
+type RandomCropAttr func(optionalAttr)
+
+// RandomCropSeed sets the optional seed attribute to value.
+//
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func RandomCropSeed(value int64) RandomCropAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// RandomCropSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomCropSeed2(value int64) RandomCropAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Randomly crop `image`.
+//
+// DEPRECATED at GraphDef version 8: Random crop is now pure Python
+//
+// `size` is a 1-D int64 tensor with 2 elements representing the crop height and
+// width.  The values must be non negative.
+//
+// This Op picks a random location in `image` and crops a `height` by `width`
+// rectangle from that location.  The random location is picked so the cropped
+// area will fit inside the original image.
+//
+// Arguments:
+//	image: 3-D of shape `[height, width, channels]`.
+//	size: 1-D of length 2 containing: `crop_height`, `crop_width`..
+//
+// Returns 3-D of shape `[crop_height, crop_width, channels].`
+func RandomCrop(scope *Scope, image tf.Output, size tf.Output, optional ...RandomCropAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RandomCrop",
+		Input: []tf.Input{
+			image, size,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResizeNearestNeighborGradAttr is an optional argument to ResizeNearestNeighborGrad.
+type ResizeNearestNeighborGradAttr func(optionalAttr)
+
+// ResizeNearestNeighborGradAlignCorners sets the optional align_corners attribute to value.
+//
+// value: If true, the centers of the 4 corner pixels of the input and grad tensors are
+// aligned. Defaults to false.
+// If not specified, defaults to false
+func ResizeNearestNeighborGradAlignCorners(value bool) ResizeNearestNeighborGradAttr {
+	return func(m optionalAttr) {
+		m["align_corners"] = value
+	}
+}
+
+// ResizeNearestNeighborGradHalfPixelCenters sets the optional half_pixel_centers attribute to value.
+// If not specified, defaults to false
+func ResizeNearestNeighborGradHalfPixelCenters(value bool) ResizeNearestNeighborGradAttr {
+	return func(m optionalAttr) {
+		m["half_pixel_centers"] = value
+	}
+}
+
+// Computes the gradient of nearest neighbor interpolation.
+//
+// Arguments:
+//	grads: 4-D with shape `[batch, height, width, channels]`.
+//	size: = A 1-D int32 Tensor of 2 elements: `orig_height, orig_width`. The
+// original input size.
+//
+// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. Gradients
+// with respect to the input image.
+func ResizeNearestNeighborGrad(scope *Scope, grads tf.Output, size tf.Output, optional ...ResizeNearestNeighborGradAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResizeNearestNeighborGrad",
+		Input: []tf.Input{
+			grads, size,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Initializes the multi device iterator with the given dataset.
+//
+// Arguments:
+//	dataset: Dataset to be iterated upon.
+//	multi_device_iterator: A MultiDeviceIteratorResource.
+//	max_buffer_size: The maximum size of the host side per device buffer to keep.
+//
+// Returns An int64 indicating which incarnation of the MultiDeviceIterator
+// is running.
+func MultiDeviceIteratorInit(scope *Scope, dataset tf.Output, multi_device_iterator tf.Output, max_buffer_size tf.Output) (incarnation_id tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MultiDeviceIteratorInit",
+		Input: []tf.Input{
+			dataset, multi_device_iterator, max_buffer_size,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Deprecated. Disallowed in GraphDef version >= 2.
+//
+// DEPRECATED at GraphDef version 2: Use AdjustContrastv2 instead
+func AdjustContrast(scope *Scope, images tf.Output, contrast_factor tf.Output, min_value tf.Output, max_value tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "AdjustContrast",
+		Input: []tf.Input{
+			images, contrast_factor, min_value, max_value,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// A placeholder op for a value that will be fed into the computation.
+//
+// Arguments:
+//	dtype: The type of elements in the tensor.
+//	shape: The shape of the tensor.
+//
+// Returns A tensor that will be provided using the infeed mechanism.
+func InfeedDequeue(scope *Scope, dtype tf.DataType, shape tf.Shape) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype, "shape": shape}
+	opspec := tf.OpSpec{
+		Type: "InfeedDequeue",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // Encodes a `RaggedTensor` into a `variant` Tensor.
 //
 //
@@ -18849,147 +12653,38 @@ func RaggedTensorToVariant(scope *Scope, rt_nested_splits []tf.Output, rt_dense_
 	return op.Output(0)
 }
 
-// This op consumes a lock created by `MutexLock`.
-//
-// This op exists to consume a tensor created by `MutexLock` (other than
-// direct control dependencies).  It should be the only that consumes the tensor,
-// and will raise an error if it is not.  Its only purpose is to keep the
-// mutex lock tensor alive until it is consumed by this op.
-//
-// **NOTE**: This operation must run on the same device as its input.  This may
-// be enforced via the `colocate_with` mechanism.
-//
-// Arguments:
-//	mutex_lock: A tensor returned by `MutexLock`.
-//
-// Returns the created operation.
-func ConsumeMutexLock(scope *Scope, mutex_lock tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ConsumeMutexLock",
-		Input: []tf.Input{
-			mutex_lock,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
+// ResizeNearestNeighborAttr is an optional argument to ResizeNearestNeighbor.
+type ResizeNearestNeighborAttr func(optionalAttr)
 
-// Creates a dataset that contains `count` elements from the `input_dataset`.
+// ResizeNearestNeighborAlignCorners sets the optional align_corners attribute to value.
 //
-// Arguments:
-//
-//	count: A scalar representing the number of elements from the `input_dataset`
-// that should be taken. A value of `-1` indicates that all of `input_dataset`
-// is taken.
-//
-//
-func TakeDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "TakeDataset",
-		Input: []tf.Input{
-			input_dataset, count,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// EncodeBase64Attr is an optional argument to EncodeBase64.
-type EncodeBase64Attr func(optionalAttr)
-
-// EncodeBase64Pad sets the optional pad attribute to value.
-//
-// value: Bool whether padding is applied at the ends.
+// value: If true, the centers of the 4 corner pixels of the input and output tensors are
+// aligned, preserving the values at the corner pixels. Defaults to false.
 // If not specified, defaults to false
-func EncodeBase64Pad(value bool) EncodeBase64Attr {
+func ResizeNearestNeighborAlignCorners(value bool) ResizeNearestNeighborAttr {
 	return func(m optionalAttr) {
-		m["pad"] = value
+		m["align_corners"] = value
 	}
 }
 
-// Encode strings into web-safe base64 format.
-//
-// Refer to the following article for more information on base64 format:
-// en.wikipedia.org/wiki/Base64. Base64 strings may have padding with '=' at the
-// end so that the encoded has length multiple of 4. See Padding section of the
-// link above.
-//
-// Web-safe means that the encoder uses - and _ instead of + and /.
-//
-// Arguments:
-//	input: Strings to be encoded.
-//
-// Returns Input strings encoded in base64.
-func EncodeBase64(scope *Scope, input tf.Output, optional ...EncodeBase64Attr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "EncodeBase64",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// FakeQuantWithMinMaxArgsGradientAttr is an optional argument to FakeQuantWithMinMaxArgsGradient.
-type FakeQuantWithMinMaxArgsGradientAttr func(optionalAttr)
-
-// FakeQuantWithMinMaxArgsGradientMin sets the optional min attribute to value.
-// If not specified, defaults to -6
-func FakeQuantWithMinMaxArgsGradientMin(value float32) FakeQuantWithMinMaxArgsGradientAttr {
-	return func(m optionalAttr) {
-		m["min"] = value
-	}
-}
-
-// FakeQuantWithMinMaxArgsGradientMax sets the optional max attribute to value.
-// If not specified, defaults to 6
-func FakeQuantWithMinMaxArgsGradientMax(value float32) FakeQuantWithMinMaxArgsGradientAttr {
-	return func(m optionalAttr) {
-		m["max"] = value
-	}
-}
-
-// FakeQuantWithMinMaxArgsGradientNumBits sets the optional num_bits attribute to value.
-// If not specified, defaults to 8
-func FakeQuantWithMinMaxArgsGradientNumBits(value int64) FakeQuantWithMinMaxArgsGradientAttr {
-	return func(m optionalAttr) {
-		m["num_bits"] = value
-	}
-}
-
-// FakeQuantWithMinMaxArgsGradientNarrowRange sets the optional narrow_range attribute to value.
+// ResizeNearestNeighborHalfPixelCenters sets the optional half_pixel_centers attribute to value.
 // If not specified, defaults to false
-func FakeQuantWithMinMaxArgsGradientNarrowRange(value bool) FakeQuantWithMinMaxArgsGradientAttr {
+func ResizeNearestNeighborHalfPixelCenters(value bool) ResizeNearestNeighborAttr {
 	return func(m optionalAttr) {
-		m["narrow_range"] = value
+		m["half_pixel_centers"] = value
 	}
 }
 
-// Compute gradients for a FakeQuantWithMinMaxArgs operation.
+// Resize `images` to `size` using nearest neighbor interpolation.
 //
 // Arguments:
-//	gradients: Backpropagated gradients above the FakeQuantWithMinMaxArgs operation.
-//	inputs: Values passed as inputs to the FakeQuantWithMinMaxArgs operation.
+//	images: 4-D with shape `[batch, height, width, channels]`.
+//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+// new size for the images.
 //
-// Returns Backpropagated gradients below the FakeQuantWithMinMaxArgs operation:
-// `gradients * (inputs >= min && inputs <= max)`.
-func FakeQuantWithMinMaxArgsGradient(scope *Scope, gradients tf.Output, inputs tf.Output, optional ...FakeQuantWithMinMaxArgsGradientAttr) (backprops tf.Output) {
+// Returns 4-D with shape
+// `[batch, new_height, new_width, channels]`.
+func ResizeNearestNeighbor(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeNearestNeighborAttr) (resized_images tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -18998,9 +12693,9 @@ func FakeQuantWithMinMaxArgsGradient(scope *Scope, gradients tf.Output, inputs t
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "FakeQuantWithMinMaxArgsGradient",
+		Type: "ResizeNearestNeighbor",
 		Input: []tf.Input{
-			gradients, inputs,
+			images, size,
 		},
 		Attrs: attrs,
 	}
@@ -19008,522 +12703,29 @@ func FakeQuantWithMinMaxArgsGradient(scope *Scope, gradients tf.Output, inputs t
 	return op.Output(0)
 }
 
-// Returns a tensor of zeros with the same shape and type as x.
+// Runs multiple additive regression ensemble predictors on input instances and
+//
+// computes the logits. It is designed to be used during prediction.
+// It traverses all the trees and calculates the final score for each instance.
 //
 // Arguments:
-//	x: a tensor of type T.
 //
-// Returns a tensor of the same shape and type as x but filled with zeros.
-func ZerosLike(scope *Scope, x tf.Output) (y tf.Output) {
+//	bucketized_features: A list of rank 1 Tensors containing bucket id for each
+// feature.
+//	logits_dimension: scalar, dimension of the logits, to be used for partial logits
+// shape.
+//
+// Returns Output rank 2 Tensor containing logits for each example.
+func BoostedTreesPredict(scope *Scope, tree_ensemble_handle tf.Output, bucketized_features []tf.Output, logits_dimension int64) (logits tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"logits_dimension": logits_dimension}
 	opspec := tf.OpSpec{
-		Type: "ZerosLike",
+		Type: "BoostedTreesPredict",
 		Input: []tf.Input{
-			x,
+			tree_ensemble_handle, tf.OutputList(bucketized_features),
 		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// InfeedEnqueueAttr is an optional argument to InfeedEnqueue.
-type InfeedEnqueueAttr func(optionalAttr)
-
-// InfeedEnqueueShape sets the optional shape attribute to value.
-//
-// value: The shape of the tensor.
-// If not specified, defaults to <>
-func InfeedEnqueueShape(value tf.Shape) InfeedEnqueueAttr {
-	return func(m optionalAttr) {
-		m["shape"] = value
-	}
-}
-
-// InfeedEnqueueLayout sets the optional layout attribute to value.
-//
-// value: A vector holding the requested layout in minor-to-major sequence.
-// If a layout attribute is passed, but its values are all -1, the layout will
-// be computed by the infeed operation.
-// If not specified, defaults to <>
-func InfeedEnqueueLayout(value []int64) InfeedEnqueueAttr {
-	return func(m optionalAttr) {
-		m["layout"] = value
-	}
-}
-
-// InfeedEnqueueDeviceOrdinal sets the optional device_ordinal attribute to value.
-//
-// value: The TPU device to use. This should be -1 when the Op
-// is running on a TPU device, and >= 0 when the Op is running on the CPU
-// device.
-// If not specified, defaults to -1
-func InfeedEnqueueDeviceOrdinal(value int64) InfeedEnqueueAttr {
-	return func(m optionalAttr) {
-		m["device_ordinal"] = value
-	}
-}
-
-// An op which feeds a single Tensor value into the computation.
-//
-// Arguments:
-//	input: A tensor that will be provided using the infeed mechanism.
-//
-// Returns the created operation.
-func InfeedEnqueue(scope *Scope, input tf.Output, optional ...InfeedEnqueueAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "InfeedEnqueue",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// NthElementAttr is an optional argument to NthElement.
-type NthElementAttr func(optionalAttr)
-
-// NthElementReverse sets the optional reverse attribute to value.
-//
-// value: When set to True, find the nth-largest value in the vector and vice
-// versa.
-// If not specified, defaults to false
-func NthElementReverse(value bool) NthElementAttr {
-	return func(m optionalAttr) {
-		m["reverse"] = value
-	}
-}
-
-// Finds values of the `n`-th order statistic for the last dimension.
-//
-// If the input is a vector (rank-1), finds the entries which is the nth-smallest
-// value in the vector and outputs their values as scalar tensor.
-//
-// For matrices (resp. higher rank input), computes the entries which is the
-// nth-smallest value in each row (resp. vector along the last dimension). Thus,
-//
-//     values.shape = input.shape[:-1]
-//
-// Arguments:
-//	input: 1-D or higher with last dimension at least `n+1`.
-//	n: 0-D. Position of sorted vector to select along the last dimension (along
-// each row for matrices). Valid range of n is `[0, input.shape[:-1])`
-//
-// Returns The `n`-th order statistic along each last dimensional slice.
-func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthElementAttr) (values tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "NthElement",
-		Input: []tf.Input{
-			input, n,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// StatelessRandomNormalAttr is an optional argument to StatelessRandomNormal.
-type StatelessRandomNormalAttr func(optionalAttr)
-
-// StatelessRandomNormalDtype sets the optional dtype attribute to value.
-//
-// value: The type of the output.
-// If not specified, defaults to DT_FLOAT
-func StatelessRandomNormalDtype(value tf.DataType) StatelessRandomNormalAttr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Outputs deterministic pseudorandom values from a normal distribution.
-//
-// The generated values will have mean 0 and standard deviation 1.
-//
-// The outputs are a deterministic function of `shape` and `seed`.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
-//
-// Returns Random values with specified shape.
-func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomNormalAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StatelessRandomNormal",
-		Input: []tf.Input{
-			shape, seed,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// StatefulStandardNormalAttr is an optional argument to StatefulStandardNormal.
-type StatefulStandardNormalAttr func(optionalAttr)
-
-// StatefulStandardNormalDtype sets the optional dtype attribute to value.
-//
-// value: The type of the output.
-// If not specified, defaults to DT_FLOAT
-func StatefulStandardNormalDtype(value tf.DataType) StatefulStandardNormalAttr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Outputs random values from a normal distribution. This op is deprecated in favor of op 'StatefulStandardNormalV2'
-//
-// DEPRECATED at GraphDef version 29: Use StatefulStandardNormalV2 instead
-//
-// The generated values will have mean 0 and standard deviation 1.
-//
-// Arguments:
-//	resource: The handle of the resource variable that stores the state of the RNG.
-//	shape: The shape of the output tensor.
-//
-// Returns A tensor of the specified shape filled with random normal values.
-func StatefulStandardNormal(scope *Scope, resource tf.Output, shape tf.Output, optional ...StatefulStandardNormalAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StatefulStandardNormal",
-		Input: []tf.Input{
-			resource, shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ShapeNAttr is an optional argument to ShapeN.
-type ShapeNAttr func(optionalAttr)
-
-// ShapeNOutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_INT32
-func ShapeNOutType(value tf.DataType) ShapeNAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Returns shape of tensors.
-//
-// This operation returns N 1-D integer tensors representing shape of `input[i]s`.
-func ShapeN(scope *Scope, input []tf.Output, optional ...ShapeNAttr) (output []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ShapeN",
-		Input: []tf.Input{
-			tf.OutputList(input),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
-		scope.UpdateErr("ShapeN", err)
-		return
-	}
-	return output
-}
-
-// BatchAttr is an optional argument to Batch.
-type BatchAttr func(optionalAttr)
-
-// BatchMaxEnqueuedBatches sets the optional max_enqueued_batches attribute to value.
-// If not specified, defaults to 10
-func BatchMaxEnqueuedBatches(value int64) BatchAttr {
-	return func(m optionalAttr) {
-		m["max_enqueued_batches"] = value
-	}
-}
-
-// BatchAllowedBatchSizes sets the optional allowed_batch_sizes attribute to value.
-// If not specified, defaults to <>
-func BatchAllowedBatchSizes(value []int64) BatchAttr {
-	return func(m optionalAttr) {
-		m["allowed_batch_sizes"] = value
-	}
-}
-
-// BatchContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func BatchContainer(value string) BatchAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// BatchSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func BatchSharedName(value string) BatchAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// BatchBatchingQueue sets the optional batching_queue attribute to value.
-// If not specified, defaults to ""
-func BatchBatchingQueue(value string) BatchAttr {
-	return func(m optionalAttr) {
-		m["batching_queue"] = value
-	}
-}
-
-// Batches all input tensors nondeterministically.
-//
-// When many instances of this Op are being run concurrently with the same
-// container/shared_name in the same device, some will output zero-shaped Tensors
-// and others will output Tensors of size up to max_batch_size.
-//
-// All Tensors in in_tensors are batched together (so, for example, labels and
-// features should be batched with a single instance of this operation.
-//
-// Each invocation of batch emits an `id` scalar which will be used to identify
-// this particular invocation when doing unbatch or its gradient.
-//
-// Each op which emits a non-empty batch will also emit a non-empty batch_index
-// Tensor, which, is a [K, 3] matrix where each row contains the invocation's id,
-// start, and length of elements of each set of Tensors present in batched_tensors.
-//
-// Batched tensors are concatenated along the first dimension, and all tensors in
-// in_tensors must have the first dimension of the same size.
-//
-// in_tensors: The tensors to be batched.
-// num_batch_threads: Number of scheduling threads for processing batches of work.
-//  Determines the number of batches processed in parallel.
-// max_batch_size: Batch sizes will never be bigger than this.
-// batch_timeout_micros: Maximum number of microseconds to wait before outputting
-//  an incomplete batch.
-// allowed_batch_sizes: Optional list of allowed batch sizes. If left empty, does
-//  nothing. Otherwise, supplies a list of batch sizes, causing the op to pad
-//  batches up to one of those sizes. The entries must increase monotonically, and
-//  the final entry must equal max_batch_size.
-// grad_timeout_micros: The timeout to use for the gradient. See Unbatch.
-// batched_tensors: Either empty tensors or a batch of concatenated Tensors.
-// batch_index: If out_tensors is non-empty, has information to invert it.
-// container: Controls the scope of sharing of this batch.
-// id: always contains a scalar with a unique ID for this invocation of Batch.
-// shared_name: Concurrently running instances of batch in the same device with the
-//  same container and shared_name will batch their elements together. If left
-//  empty, the op name will be used as the shared name.
-// T: the types of tensors to be batched.
-func Batch(scope *Scope, in_tensors []tf.Output, num_batch_threads int64, max_batch_size int64, batch_timeout_micros int64, grad_timeout_micros int64, optional ...BatchAttr) (batched_tensors []tf.Output, batch_index tf.Output, id tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_batch_threads": num_batch_threads, "max_batch_size": max_batch_size, "batch_timeout_micros": batch_timeout_micros, "grad_timeout_micros": grad_timeout_micros}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Batch",
-		Input: []tf.Input{
-			tf.OutputList(in_tensors),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if batched_tensors, idx, err = makeOutputList(op, idx, "batched_tensors"); err != nil {
-		scope.UpdateErr("Batch", err)
-		return
-	}
-	batch_index = op.Output(idx)
-	id = op.Output(idx)
-	return batched_tensors, batch_index, id
-}
-
-// Returns 0 if the denominator is zero.
-//
-//
-// *NOTE*: `DivNoNan` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func DivNoNan(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DivNoNan",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a Dataset that returns pseudorandom numbers.
-//
-// Arguments:
-//	seed: A scalar seed for the random number generator. If either seed or
-// seed2 is set to be non-zero, the random number generator is seeded
-// by the given seed.  Otherwise, a random seed is used.
-//	seed2: A second scalar seed to avoid seed collision.
-//
-//
-func RandomDataset(scope *Scope, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "RandomDataset",
-		Input: []tf.Input{
-			seed, seed2,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// OutfeedDequeueTupleAttr is an optional argument to OutfeedDequeueTuple.
-type OutfeedDequeueTupleAttr func(optionalAttr)
-
-// OutfeedDequeueTupleDeviceOrdinal sets the optional device_ordinal attribute to value.
-//
-// value: The TPU device to use. This should be -1 when the Op
-// is running on a TPU device, and >= 0 when the Op is running on the CPU
-// device.
-// If not specified, defaults to -1
-func OutfeedDequeueTupleDeviceOrdinal(value int64) OutfeedDequeueTupleAttr {
-	return func(m optionalAttr) {
-		m["device_ordinal"] = value
-	}
-}
-
-// Retrieve multiple values from the computation outfeed.
-//
-// This operation will block indefinitely until data is available. Output `i`
-// corresponds to XLA tuple element `i`.
-//
-// Arguments:
-//	dtypes: The element types of each element in `outputs`.
-//	shapes: The shapes of each tensor in `outputs`.
-//
-// Returns A list of tensors that will be read from the outfeed.
-func OutfeedDequeueTuple(scope *Scope, dtypes []tf.DataType, shapes []tf.Shape, optional ...OutfeedDequeueTupleAttr) (outputs []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes, "shapes": shapes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "OutfeedDequeueTuple",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
-		scope.UpdateErr("OutfeedDequeueTuple", err)
-		return
-	}
-	return outputs
-}
-
-// Table initializer that takes two tensors for keys and values respectively.
-//
-// Arguments:
-//	table_handle: Handle to a table which will be initialized.
-//	keys: Keys of type Tkey.
-//	values: Values of type Tval.
-//
-// Returns the created operation.
-func InitializeTableV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "InitializeTableV2",
-		Input: []tf.Input{
-			table_handle, keys, values,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// TensorForestTreeResourceHandleOpAttr is an optional argument to TensorForestTreeResourceHandleOp.
-type TensorForestTreeResourceHandleOpAttr func(optionalAttr)
-
-// TensorForestTreeResourceHandleOpContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func TensorForestTreeResourceHandleOpContainer(value string) TensorForestTreeResourceHandleOpAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// TensorForestTreeResourceHandleOpSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func TensorForestTreeResourceHandleOpSharedName(value string) TensorForestTreeResourceHandleOpAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Creates a handle to a TensorForestTreeResource
-func TensorForestTreeResourceHandleOp(scope *Scope, optional ...TensorForestTreeResourceHandleOpAttr) (resource tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorForestTreeResourceHandleOp",
-
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
@@ -19589,71 +12791,45 @@ func RandomGamma(scope *Scope, shape tf.Output, alpha tf.Output, optional ...Ran
 	return op.Output(0)
 }
 
-// Worker heartbeat op.
+// QuantizedResizeBilinearAttr is an optional argument to QuantizedResizeBilinear.
+type QuantizedResizeBilinearAttr func(optionalAttr)
+
+// QuantizedResizeBilinearAlignCorners sets the optional align_corners attribute to value.
 //
-// Heartbeats may be sent periodically to indicate the coordinator is still active,
-// to retrieve the current worker status and to expedite shutdown when necessary.
+// value: If true, the centers of the 4 corner pixels of the input and output tensors are
+// aligned, preserving the values at the corner pixels. Defaults to false.
+// If not specified, defaults to false
+func QuantizedResizeBilinearAlignCorners(value bool) QuantizedResizeBilinearAttr {
+	return func(m optionalAttr) {
+		m["align_corners"] = value
+	}
+}
+
+// QuantizedResizeBilinearHalfPixelCenters sets the optional half_pixel_centers attribute to value.
+// If not specified, defaults to false
+func QuantizedResizeBilinearHalfPixelCenters(value bool) QuantizedResizeBilinearAttr {
+	return func(m optionalAttr) {
+		m["half_pixel_centers"] = value
+	}
+}
+
+// Resize quantized `images` to `size` using quantized bilinear interpolation.
+//
+// Input images and output images must be quantized types.
 //
 // Arguments:
-//	request: A string tensor containing a serialized WorkerHeartbeatRequest
+//	images: 4-D with shape `[batch, height, width, channels]`.
+//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+// new size for the images.
 //
-// Returns A string tensor containing a serialized WorkerHeartbeatResponse
-func WorkerHeartbeat(scope *Scope, request tf.Output) (response tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "WorkerHeartbeat",
-		Input: []tf.Input{
-			request,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// FakeQuantWithMinMaxVarsPerChannelAttr is an optional argument to FakeQuantWithMinMaxVarsPerChannel.
-type FakeQuantWithMinMaxVarsPerChannelAttr func(optionalAttr)
-
-// FakeQuantWithMinMaxVarsPerChannelNumBits sets the optional num_bits attribute to value.
-// If not specified, defaults to 8
-func FakeQuantWithMinMaxVarsPerChannelNumBits(value int64) FakeQuantWithMinMaxVarsPerChannelAttr {
-	return func(m optionalAttr) {
-		m["num_bits"] = value
-	}
-}
-
-// FakeQuantWithMinMaxVarsPerChannelNarrowRange sets the optional narrow_range attribute to value.
-// If not specified, defaults to false
-func FakeQuantWithMinMaxVarsPerChannelNarrowRange(value bool) FakeQuantWithMinMaxVarsPerChannelAttr {
-	return func(m optionalAttr) {
-		m["narrow_range"] = value
-	}
-}
-
-// Fake-quantize the 'inputs' tensor of type float and one of the shapes: `[d]`,
 //
-// `[b, d]` `[b, h, w, d]` via per-channel floats `min` and `max` of shape `[d]`
-// to 'outputs' tensor of same shape as `inputs`.
 //
-// `[min; max]` define the clamping range for the `inputs` data.
-// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
-// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
-// then de-quantized and output as floats in `[min; max]` interval.
-// `num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive.
-//
-// Before quantization, `min` and `max` values are adjusted with the following
-// logic.
-// It is suggested to have `min <= 0 <= max`. If `0` is not in the range of values,
-// the behavior can be unexpected:
-// If `0 < min < max`: `min_adj = 0` and `max_adj = max - min`.
-// If `min < max < 0`: `min_adj = min - max` and `max_adj = 0`.
-// If `min <= 0 <= max`: `scale = (max - min) / (2^num_bits - 1) `,
-// `min_adj = scale * round(min / scale)` and `max_adj = max + min_adj - min`.
-//
-// This operation has a gradient and thus allows for training `min` and `max`
-// values.
-func FakeQuantWithMinMaxVarsPerChannel(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsPerChannelAttr) (outputs tf.Output) {
+// Returns:
+//	resized_images: 4-D with shape
+// `[batch, new_height, new_width, channels]`.
+//	out_min
+//	out_max
+func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min tf.Output, max tf.Output, optional ...QuantizedResizeBilinearAttr) (resized_images tf.Output, out_min tf.Output, out_max tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -19662,9 +12838,63 @@ func FakeQuantWithMinMaxVarsPerChannel(scope *Scope, inputs tf.Output, min tf.Ou
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "FakeQuantWithMinMaxVarsPerChannel",
+		Type: "QuantizedResizeBilinear",
 		Input: []tf.Input{
-			inputs, min, max,
+			images, size, min, max,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// ResizeAreaAttr is an optional argument to ResizeArea.
+type ResizeAreaAttr func(optionalAttr)
+
+// ResizeAreaAlignCorners sets the optional align_corners attribute to value.
+//
+// value: If true, the centers of the 4 corner pixels of the input and output tensors are
+// aligned, preserving the values at the corner pixels. Defaults to false.
+// If not specified, defaults to false
+func ResizeAreaAlignCorners(value bool) ResizeAreaAttr {
+	return func(m optionalAttr) {
+		m["align_corners"] = value
+	}
+}
+
+// Resize `images` to `size` using area interpolation.
+//
+// Input images can be of different types but output images are always float.
+//
+// The range of pixel values for the output image might be slightly different
+// from the range for the input image because of limited numerical precision.
+// To guarantee an output range, for example `[0.0, 1.0]`, apply
+// `tf.clip_by_value` to the output.
+//
+// Each output pixel is computed by first transforming the pixel's footprint into
+// the input tensor and then averaging the pixels that intersect the footprint. An
+// input pixel's contribution to the average is weighted by the fraction of its
+// area that intersects the footprint.  This is the same as OpenCV's INTER_AREA.
+//
+// Arguments:
+//	images: 4-D with shape `[batch, height, width, channels]`.
+//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+// new size for the images.
+//
+// Returns 4-D with shape
+// `[batch, new_height, new_width, channels]`.
+func ResizeArea(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeAreaAttr) (resized_images tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResizeArea",
+		Input: []tf.Input{
+			images, size,
 		},
 		Attrs: attrs,
 	}
@@ -19672,83 +12902,1654 @@ func FakeQuantWithMinMaxVarsPerChannel(scope *Scope, inputs tf.Output, min tf.Ou
 	return op.Output(0)
 }
 
-// RetrieveTPUEmbeddingFTRLParametersAttr is an optional argument to RetrieveTPUEmbeddingFTRLParameters.
-type RetrieveTPUEmbeddingFTRLParametersAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingFTRLParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
+// Restore a reader to a previously saved state.
 //
-// REQUIRES: value >= -1
-func RetrieveTPUEmbeddingFTRLParametersTableId(value int64) RetrieveTPUEmbeddingFTRLParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingFTRLParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingFTRLParametersTableName(value string) RetrieveTPUEmbeddingFTRLParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Retrieve FTRL embedding parameters.
+// Not all Readers support being restored, so this can produce an
+// Unimplemented error.
 //
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
+// Arguments:
+//	reader_handle: Handle to a Reader.
+//	state: Result of a ReaderSerializeState of a Reader with type
+// matching reader_handle.
 //
-// Returns Parameter parameters updated by the FTRL optimization algorithm.Parameter accumulators updated by the FTRL optimization algorithm.Parameter linears updated by the FTRL optimization algorithm.
-func RetrieveTPUEmbeddingFTRLParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingFTRLParametersAttr) (parameters tf.Output, accumulators tf.Output, linears tf.Output) {
+// Returns the created operation.
+func ReaderRestoreStateV2(scope *Scope, reader_handle tf.Output, state tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	opspec := tf.OpSpec{
+		Type: "ReaderRestoreStateV2",
+		Input: []tf.Input{
+			reader_handle, state,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes rectified linear 6: `min(max(features, 0), 6)`.
+func Relu6(scope *Scope, features tf.Output) (activations tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Relu6",
+		Input: []tf.Input{
+			features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RaggedRangeAttr is an optional argument to RaggedRange.
+type RaggedRangeAttr func(optionalAttr)
+
+// RaggedRangeTsplits sets the optional Tsplits attribute to value.
+// If not specified, defaults to DT_INT64
+func RaggedRangeTsplits(value tf.DataType) RaggedRangeAttr {
+	return func(m optionalAttr) {
+		m["Tsplits"] = value
+	}
+}
+
+// Returns a `RaggedTensor` containing the specified sequences of numbers.
+//
+//
+// Returns a `RaggedTensor` `result` composed from `rt_dense_values` and
+// `rt_nested_splits`, such that
+// `result[i] = range(starts[i], limits[i], deltas[i])`.
+//
+// ```python
+// (rt_nested_splits, rt_dense_values) = ragged_range(
+//       starts=[2, 5, 8], limits=[3, 5, 12], deltas=1)
+// result = tf.ragged.from_row_splits(rt_dense_values, rt_nested_splits)
+// print(result)
+// <tf.RaggedTensor [[2], [], [8, 9, 10, 11]] >
+// ```
+//
+// The input tensors `starts`, `limits`, and `deltas` may be scalars or vectors.
+// The vector inputs must all have the same size.  Scalar inputs are broadcast
+// to match the size of the vector inputs.
+//
+// Arguments:
+//	starts: The starts of each range.
+//	limits: The limits of each range.
+//	deltas: The deltas of each range.
+//
+// Returns:
+//	rt_nested_splits: The `row_splits` for the returned `RaggedTensor`.
+//	rt_dense_values: The `flat_values` for the returned `RaggedTensor`.
+func RaggedRange(scope *Scope, starts tf.Output, limits tf.Output, deltas tf.Output, optional ...RaggedRangeAttr) (rt_nested_splits tf.Output, rt_dense_values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingFTRLParameters",
+		Type: "RaggedRange",
+		Input: []tf.Input{
+			starts, limits, deltas,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
 
+// Split a `SparseTensor` into `num_split` tensors along one dimension.
+//
+// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices
+// `[0 : shape[split_dim] % num_split]` gets one extra dimension.
+// For example, if `split_dim = 1` and `num_split = 2` and the input is
+//
+//     input_tensor = shape = [2, 7]
+//     [    a   d e  ]
+//     [b c          ]
+//
+// Graphically the output tensors are:
+//
+//     output_tensor[0] = shape = [2, 4]
+//     [    a  ]
+//     [b c    ]
+//
+//     output_tensor[1] = shape = [2, 3]
+//     [ d e  ]
+//     [      ]
+//
+// Arguments:
+//	split_dim: 0-D.  The dimension along which to split.  Must be in the range
+// `[0, rank(shape))`.
+//	indices: 2-D tensor represents the indices of the sparse tensor.
+//	values: 1-D tensor represents the values of the sparse tensor.
+//	shape: 1-D. tensor represents the shape of the sparse tensor.
+// output indices: A list of 1-D tensors represents the indices of the output
+// sparse tensors.
+//	num_split: The number of ways to split.
+//
+// Returns:
+//	output_indices
+//	output_values: A list of 1-D tensors represents the values of the output sparse
+// tensors.
+//	output_shape: A list of 1-D tensors represents the shape of the output sparse
+// tensors.
+func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_split": num_split}
+	opspec := tf.OpSpec{
+		Type: "SparseSplit",
+		Input: []tf.Input{
+			split_dim, indices, values, shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil {
+		scope.UpdateErr("SparseSplit", err)
+		return
+	}
+	if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil {
+		scope.UpdateErr("SparseSplit", err)
+		return
+	}
+	if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil {
+		scope.UpdateErr("SparseSplit", err)
+		return
+	}
+	return output_indices, output_values, output_shape
+}
+
+// Produce a string tensor that encodes the state of a Reader.
+//
+// Not all Readers support being serialized, so this can produce an
+// Unimplemented error.
+//
+// Arguments:
+//	reader_handle: Handle to a Reader.
+func ReaderSerializeStateV2(scope *Scope, reader_handle tf.Output) (state tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReaderSerializeStateV2",
+		Input: []tf.Input{
+			reader_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// DecodeRawAttr is an optional argument to DecodeRaw.
+type DecodeRawAttr func(optionalAttr)
+
+// DecodeRawLittleEndian sets the optional little_endian attribute to value.
+//
+// value: Whether the input `bytes` are in little-endian order.
+// Ignored for `out_type` values that are stored in a single byte like
+// `uint8`.
+// If not specified, defaults to true
+func DecodeRawLittleEndian(value bool) DecodeRawAttr {
+	return func(m optionalAttr) {
+		m["little_endian"] = value
+	}
+}
+
+// Reinterpret the bytes of a string as a vector of numbers.
+//
+// Arguments:
+//	bytes: All the elements must have the same length.
+//
+//
+// Returns A Tensor with one more dimension than the input `bytes`.  The
+// added dimension will have size equal to the length of the elements
+// of `bytes` divided by the number of bytes to represent `out_type`.
+func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"out_type": out_type}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodeRaw",
+		Input: []tf.Input{
+			bytes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the number of records this Reader has produced.
+//
+// This is the same as the number of ReaderRead executions that have
+// succeeded.
+//
+// Arguments:
+//	reader_handle: Handle to a Reader.
+func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReaderNumRecordsProducedV2",
+		Input: []tf.Input{
+			reader_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns up to `num_records` (key, value) pairs produced by a Reader.
+//
+// Will dequeue from the input queue if necessary (e.g. when the
+// Reader needs to start reading from a new file since it has finished
+// with the previous file).
+// It may return less than `num_records` even before the last batch.
+//
+// Arguments:
+//	reader_handle: Handle to a `Reader`.
+//	queue_handle: Handle to a `Queue`, with string work items.
+//	num_records: number of records to read from `Reader`.
+//
+// Returns:
+//	keys: A 1-D tensor.
+//	values: A 1-D tensor.
+func ReaderReadUpToV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output, num_records tf.Output) (keys tf.Output, values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReaderReadUpToV2",
+		Input: []tf.Input{
+			reader_handle, queue_handle, num_records,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// QueueDequeueV2Attr is an optional argument to QueueDequeueV2.
+type QueueDequeueV2Attr func(optionalAttr)
+
+// QueueDequeueV2TimeoutMs sets the optional timeout_ms attribute to value.
+//
+// value: If the queue is empty, this operation will block for up to
+// timeout_ms milliseconds.
+// Note: This option is not supported yet.
+// If not specified, defaults to -1
+func QueueDequeueV2TimeoutMs(value int64) QueueDequeueV2Attr {
+	return func(m optionalAttr) {
+		m["timeout_ms"] = value
+	}
+}
+
+// Dequeues a tuple of one or more tensors from the given queue.
+//
+// This operation has k outputs, where k is the number of components
+// in the tuples stored in the given queue, and output i is the ith
+// component of the dequeued tuple.
+//
+// N.B. If the queue is empty, this operation will block until an element
+// has been dequeued (or 'timeout_ms' elapses, if specified).
+//
+// Arguments:
+//	handle: The handle to a queue.
+//	component_types: The type of each component in a tuple.
+//
+// Returns One or more tensors that were dequeued as a tuple.
+func QueueDequeueV2(scope *Scope, handle tf.Output, component_types []tf.DataType, optional ...QueueDequeueV2Attr) (components []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"component_types": component_types}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QueueDequeueV2",
+		Input: []tf.Input{
+			handle,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("QueueDequeueV2", err)
+		return
+	}
+	return components
+}
+
+// Return a slice from 'input'.
+//
+// The output tensor is a tensor with dimensions described by 'size'
+// whose values are extracted from 'input' starting at the offsets in
+// 'begin'.
+//
+// *Requirements*:
+//   0 <= begin[i] <= begin[i] + size[i] <= Di  for i in [0, n)
+//
+// Arguments:
+//
+//	begin: begin[i] specifies the offset into the 'i'th dimension of
+// 'input' to slice from.
+//	size: size[i] specifies the number of elements of the 'i'th dimension
+// of 'input' to slice. If size[i] is -1, all remaining elements in dimension
+// i are included in the slice (i.e. this is equivalent to setting
+// size[i] = input.dim_size(i) - begin[i]).
+func Slice(scope *Scope, input tf.Output, begin tf.Output, size tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Slice",
+		Input: []tf.Input{
+			input, begin, size,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// TFRecordReaderV2Attr is an optional argument to TFRecordReaderV2.
+type TFRecordReaderV2Attr func(optionalAttr)
+
+// TFRecordReaderV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this reader is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func TFRecordReaderV2Container(value string) TFRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// TFRecordReaderV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this reader is named in the given bucket
+// with this shared_name. Otherwise, the node name is used instead.
+// If not specified, defaults to ""
+func TFRecordReaderV2SharedName(value string) TFRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// TFRecordReaderV2CompressionType sets the optional compression_type attribute to value.
+// If not specified, defaults to ""
+func TFRecordReaderV2CompressionType(value string) TFRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["compression_type"] = value
+	}
+}
+
+// A Reader that outputs the records from a TensorFlow Records file.
+//
+// Returns The handle to reference the Reader.
+func TFRecordReaderV2(scope *Scope, optional ...TFRecordReaderV2Attr) (reader_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TFRecordReaderV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ParseExampleDatasetAttr is an optional argument to ParseExampleDataset.
+type ParseExampleDatasetAttr func(optionalAttr)
+
+// ParseExampleDatasetSloppy sets the optional sloppy attribute to value.
+// If not specified, defaults to false
+func ParseExampleDatasetSloppy(value bool) ParseExampleDatasetAttr {
+	return func(m optionalAttr) {
+		m["sloppy"] = value
+	}
+}
+
+// ParseExampleDatasetRaggedKeys sets the optional ragged_keys attribute to value.
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func ParseExampleDatasetRaggedKeys(value []string) ParseExampleDatasetAttr {
+	return func(m optionalAttr) {
+		m["ragged_keys"] = value
+	}
+}
+
+// ParseExampleDatasetRaggedValueTypes sets the optional ragged_value_types attribute to value.
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func ParseExampleDatasetRaggedValueTypes(value []tf.DataType) ParseExampleDatasetAttr {
+	return func(m optionalAttr) {
+		m["ragged_value_types"] = value
+	}
+}
+
+// ParseExampleDatasetRaggedSplitTypes sets the optional ragged_split_types attribute to value.
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func ParseExampleDatasetRaggedSplitTypes(value []tf.DataType) ParseExampleDatasetAttr {
+	return func(m optionalAttr) {
+		m["ragged_split_types"] = value
+	}
+}
+
+// Transforms `input_dataset` containing `Example` protos as vectors of DT_STRING into a dataset of `Tensor` or `SparseTensor` objects representing the parsed features.
+//
+// Arguments:
+//
+//
+//	dense_defaults: A dict mapping string keys to `Tensor`s.
+// The keys of the dict must match the dense_keys of the feature.
+//	sparse_keys: A list of string keys in the examples features.
+// The results for these keys will be returned as `SparseTensor` objects.
+//	dense_keys: A list of Ndense string Tensors (scalars).
+// The keys expected in the Examples features associated with dense values.
+//	sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
+// Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
+// and `tf.string` (`BytesList`) are supported.
+//	dense_shapes: List of tuples with the same length as `dense_keys`.
+// The shape of the data for each dense feature referenced by `dense_keys`.
+// Required for any input tensors identified by `dense_keys`.  Must be
+// either fully defined, or may contain an unknown first dimension.
+// An unknown first dimension means the feature is treated as having
+// a variable number of blocks, and the output shape along this dimension
+// is considered unknown at graph build time.  Padding is applied for
+// minibatch elements smaller than the maximum number of blocks for the
+// given feature along this dimension.
+//	output_types: The type list for the return values.
+//	output_shapes: The list of shapes being produced.
+func ParseExampleDataset(scope *Scope, input_dataset tf.Output, num_parallel_calls tf.Output, dense_defaults []tf.Output, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ParseExampleDatasetAttr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes, "output_types": output_types, "output_shapes": output_shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ParseExampleDataset",
+		Input: []tf.Input{
+			input_dataset, num_parallel_calls, tf.OutputList(dense_defaults),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// IdentityReaderV2Attr is an optional argument to IdentityReaderV2.
+type IdentityReaderV2Attr func(optionalAttr)
+
+// IdentityReaderV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this reader is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func IdentityReaderV2Container(value string) IdentityReaderV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// IdentityReaderV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this reader is named in the given bucket
+// with this shared_name. Otherwise, the node name is used instead.
+// If not specified, defaults to ""
+func IdentityReaderV2SharedName(value string) IdentityReaderV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// A Reader that outputs the queued work as both the key and value.
+//
+// To use, enqueue strings in a Queue.  ReaderRead will take the front
+// work string and output (work, work).
+//
+// Returns The handle to reference the Reader.
+func IdentityReaderV2(scope *Scope, optional ...IdentityReaderV2Attr) (reader_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "IdentityReaderV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// FixedLengthRecordReaderV2Attr is an optional argument to FixedLengthRecordReaderV2.
+type FixedLengthRecordReaderV2Attr func(optionalAttr)
+
+// FixedLengthRecordReaderV2HeaderBytes sets the optional header_bytes attribute to value.
+//
+// value: Number of bytes in the header, defaults to 0.
+// If not specified, defaults to 0
+func FixedLengthRecordReaderV2HeaderBytes(value int64) FixedLengthRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["header_bytes"] = value
+	}
+}
+
+// FixedLengthRecordReaderV2FooterBytes sets the optional footer_bytes attribute to value.
+//
+// value: Number of bytes in the footer, defaults to 0.
+// If not specified, defaults to 0
+func FixedLengthRecordReaderV2FooterBytes(value int64) FixedLengthRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["footer_bytes"] = value
+	}
+}
+
+// FixedLengthRecordReaderV2HopBytes sets the optional hop_bytes attribute to value.
+//
+// value: Number of bytes to hop before each read. Default of 0 means using
+// record_bytes.
+// If not specified, defaults to 0
+func FixedLengthRecordReaderV2HopBytes(value int64) FixedLengthRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["hop_bytes"] = value
+	}
+}
+
+// FixedLengthRecordReaderV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this reader is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func FixedLengthRecordReaderV2Container(value string) FixedLengthRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// FixedLengthRecordReaderV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this reader is named in the given bucket
+// with this shared_name. Otherwise, the node name is used instead.
+// If not specified, defaults to ""
+func FixedLengthRecordReaderV2SharedName(value string) FixedLengthRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// FixedLengthRecordReaderV2Encoding sets the optional encoding attribute to value.
+//
+// value: The type of encoding for the file. Currently ZLIB and GZIP
+// are supported. Defaults to none.
+// If not specified, defaults to ""
+func FixedLengthRecordReaderV2Encoding(value string) FixedLengthRecordReaderV2Attr {
+	return func(m optionalAttr) {
+		m["encoding"] = value
+	}
+}
+
+// A Reader that outputs fixed-length records from a file.
+//
+// Arguments:
+//	record_bytes: Number of bytes in the record.
+//
+// Returns The handle to reference the Reader.
+func FixedLengthRecordReaderV2(scope *Scope, record_bytes int64, optional ...FixedLengthRecordReaderV2Attr) (reader_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"record_bytes": record_bytes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "FixedLengthRecordReaderV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RestoreSliceAttr is an optional argument to RestoreSlice.
+type RestoreSliceAttr func(optionalAttr)
+
+// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value.
+//
+// value: Index of file to open first if multiple files match
+// `file_pattern`. See the documentation for `Restore`.
+// If not specified, defaults to -1
+func RestoreSlicePreferredShard(value int64) RestoreSliceAttr {
+	return func(m optionalAttr) {
+		m["preferred_shard"] = value
+	}
+}
+
+// Restores a tensor from checkpoint files.
+//
+// This is like `Restore` except that restored tensor can be listed as filling
+// only a slice of a larger tensor.  `shape_and_slice` specifies the shape of the
+// larger tensor and the slice that the restored tensor covers.
+//
+// The `shape_and_slice` input has the same format as the
+// elements of the `shapes_and_slices` input of the `SaveSlices` op.
+//
+// Arguments:
+//	file_pattern: Must have a single element. The pattern of the files from
+// which we read the tensor.
+//	tensor_name: Must have a single element. The name of the tensor to be
+// restored.
+//	shape_and_slice: Scalar. The shapes and slice specifications to use when
+// restoring a tensors.
+//	dt: The type of the tensor to be restored.
+//
+// Returns The restored tensor.
+func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dt": dt}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RestoreSlice",
+		Input: []tf.Input{
+			file_pattern, tensor_name, shape_and_slice,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Saves the input tensors to disk.
+//
+// The size of `tensor_names` must match the number of tensors in `data`. `data[i]`
+// is written to `filename` with name `tensor_names[i]`.
+//
+// See also `SaveSlices`.
+//
+// Arguments:
+//	filename: Must have a single element. The name of the file to which we write
+// the tensor.
+//	tensor_names: Shape `[N]`. The names of the tensors to be saved.
+//	data: `N` tensors to save.
+//
+// Returns the created operation.
+func Save(scope *Scope, filename tf.Output, tensor_names tf.Output, data []tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Save",
+		Input: []tf.Input{
+			filename, tensor_names, tf.OutputList(data),
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// DatasetToGraphV2Attr is an optional argument to DatasetToGraphV2.
+type DatasetToGraphV2Attr func(optionalAttr)
+
+// DatasetToGraphV2ExternalStatePolicy sets the optional external_state_policy attribute to value.
+// If not specified, defaults to 0
+func DatasetToGraphV2ExternalStatePolicy(value int64) DatasetToGraphV2Attr {
+	return func(m optionalAttr) {
+		m["external_state_policy"] = value
+	}
+}
+
+// DatasetToGraphV2StripDeviceAssignment sets the optional strip_device_assignment attribute to value.
+// If not specified, defaults to false
+func DatasetToGraphV2StripDeviceAssignment(value bool) DatasetToGraphV2Attr {
+	return func(m optionalAttr) {
+		m["strip_device_assignment"] = value
+	}
+}
+
+// Returns a serialized GraphDef representing `input_dataset`.
+//
+// Returns a graph representation for `input_dataset`.
+//
+// Arguments:
+//	input_dataset: A variant tensor representing the dataset to return the graph representation for.
+//
+// Returns The graph representation of the dataset (as serialized GraphDef).
+func DatasetToGraphV2(scope *Scope, input_dataset tf.Output, optional ...DatasetToGraphV2Attr) (graph tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DatasetToGraphV2",
+		Input: []tf.Input{
+			input_dataset,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Restores tensors from a V2 checkpoint.
+//
+// For backward compatibility with the V1 format, this Op currently allows
+// restoring from a V1 checkpoint as well:
+//   - This Op first attempts to find the V2 index file pointed to by "prefix", and
+//     if found proceed to read it as a V2 checkpoint;
+//   - Otherwise the V1 read path is invoked.
+// Relying on this behavior is not recommended, as the ability to fall back to read
+// V1 might be deprecated and eventually removed.
+//
+// By default, restores the named tensors in full.  If the caller wishes to restore
+// specific slices of stored tensors, "shape_and_slices" should be non-empty
+// strings and correspondingly well-formed.
+//
+// Callers must ensure all the named tensors are indeed stored in the checkpoint.
+//
+// Arguments:
+//	prefix: Must have a single element.  The prefix of a V2 checkpoint.
+//	tensor_names: shape {N}.  The names of the tensors to be restored.
+//	shape_and_slices: shape {N}.  The slice specs of the tensors to be restored.
+// Empty strings indicate that they are non-partitioned tensors.
+//	dtypes: shape {N}.  The list of expected dtype for the tensors.  Must match
+// those stored in the checkpoint.
+//
+// Returns shape {N}.  The restored tensors, whose shapes are read from the
+// checkpoint directly.
+func RestoreV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, dtypes []tf.DataType) (tensors []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	opspec := tf.OpSpec{
+		Type: "RestoreV2",
+		Input: []tf.Input{
+			prefix, tensor_names, shape_and_slices,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if tensors, idx, err = makeOutputList(op, idx, "tensors"); err != nil {
+		scope.UpdateErr("RestoreV2", err)
+		return
+	}
+	return tensors
+}
+
+// Delete the TensorArray from its resource container.
+//
+// This enables the user to close and release the resource in the middle
+// of a step/run.
+//
+// Arguments:
+//	handle: The handle to a TensorArray (output of TensorArray or TensorArrayGrad).
+//
+// Returns the created operation.
+func TensorArrayCloseV3(scope *Scope, handle tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayCloseV3",
+		Input: []tf.Input{
+			handle,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Saves tensors in V2 checkpoint format.
+//
+// By default, saves the named tensors in full.  If the caller wishes to save
+// specific slices of full tensors, "shape_and_slices" should be non-empty strings
+// and correspondingly well-formed.
+//
+// Arguments:
+//	prefix: Must have a single element. The prefix of the V2 checkpoint to which we
+// write the tensors.
+//	tensor_names: shape {N}. The names of the tensors to be saved.
+//	shape_and_slices: shape {N}.  The slice specs of the tensors to be saved.
+// Empty strings indicate that they are non-partitioned tensors.
+//	tensors: `N` tensors to save.
+//
+// Returns the created operation.
+func SaveV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, tensors []tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SaveV2",
+		Input: []tf.Input{
+			prefix, tensor_names, shape_and_slices, tf.OutputList(tensors),
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// DebugNumericSummaryV2Attr is an optional argument to DebugNumericSummaryV2.
+type DebugNumericSummaryV2Attr func(optionalAttr)
+
+// DebugNumericSummaryV2OutputDtype sets the optional output_dtype attribute to value.
+//
+// value: Optional. The type of the output. Can be float32 or float64 (default: float32).
+// If not specified, defaults to DT_FLOAT
+func DebugNumericSummaryV2OutputDtype(value tf.DataType) DebugNumericSummaryV2Attr {
+	return func(m optionalAttr) {
+		m["output_dtype"] = value
+	}
+}
+
+// DebugNumericSummaryV2TensorDebugMode sets the optional tensor_debug_mode attribute to value.
+//
+// value: Tensor debug mode: the mode in which the input tensor is summarized
+//   by the op. See the TensorDebugMode enum in
+//   tensorflow/core/protobuf/debug_event.proto for details.
+//
+// Supported values:
+//   2 (CURT_HEALTH): Output a float32/64 tensor of shape [2]. The 1st
+//   element is the tensor_id, if provided, and -1 otherwise. The 2nd
+//   element is a bit which is set to 1 if the input tensor has an
+//   infinity or nan value, or zero otherwise.
+//
+//   3 (CONCISE_HEALTH): Ouput a float32/64 tensor of shape [5]. The 1st
+//   element is the tensor_id, if provided, and -1 otherwise. The
+//   remaining four slots are the total number of elements, -infs,
+//   +infs, and nans in the input tensor respectively.
+//
+//   4 (FULL_HEALTH): Output a float32/64 tensor of shape [11]. The 1st
+//   element is the tensor_id, if provided, and -1 otherwise. The 2nd
+//   element is the device_id, if provided, and -1 otherwise. The 3rd
+//   element holds the datatype value of the input tensor as according
+//   to the enumerated type in tensorflow/core/framework/types.proto.
+//   The remaining elements hold the total number of elements, -infs,
+//   +infs, nans, negative finite numbers, zeros, and positive finite
+//   numbers in the input tensor respectively.
+//
+//   5 (SHAPE): Output a float32/64 tensor of shape [10]. The 1st
+//   element is the tensor_id, if provided, and -1 otherwise. The 2nd
+//   element holds the datatype value of the input tensor as according
+//   to the enumerated type in tensorflow/core/framework/types.proto.
+//   The 3rd element holds the rank of the tensor. The 4th element holds
+//   the number of elements within the tensor. Finally the remaining 6
+//   elements hold the shape of the tensor. If the rank of the tensor
+//   is lower than 6, the shape is right padded with zeros. If the rank
+//   is greater than 6, the head of the shape is truncated.
+//
+//   6 (FULL_NUMERICS): Output a float32/64 tensor of shape [22]. The 1st
+//   element is the tensor_id, if provided, and -1 otherwise. The 2nd
+//   element is the device_id, if provided, and -1 otherwise. The 3rd
+//   element holds the datatype value of the input tensor as according
+//   to the enumerated type in tensorflow/core/framework/types.proto.
+//   The 4th element holds the rank of the tensor. The 5th to 11th
+//   elements hold the shape of the tensor. If the rank of the tensor
+//   is lower than 6, the shape is right padded with zeros. If the rank
+//   is greater than 6, the head of the shape is truncated. The 12th to
+//   18th elements hold the number of elements, -infs, +infs, nans,
+//   denormal floats, negative finite numbers, zeros, and positive
+//   finite numbers in the input tensor respectively. The final four
+//   elements hold the min value, max value, mean, and variance of the
+//   input tensor.
+//
+//   8 (REDUCE_INF_NAN_THREE_SLOTS): Output a float32/64 tensor of shape
+//   [3]. The 1st element is -inf if any elements of the input tensor
+//   is -inf, or zero otherwise. The 2nd element is +inf if any elements
+//   of the input tensor is +inf, or zero otherwise.  The 3rd element is
+//   nan if any element of the input tensor is nan, or zero otherwise.
+// If not specified, defaults to -1
+func DebugNumericSummaryV2TensorDebugMode(value int64) DebugNumericSummaryV2Attr {
+	return func(m optionalAttr) {
+		m["tensor_debug_mode"] = value
+	}
+}
+
+// DebugNumericSummaryV2TensorId sets the optional tensor_id attribute to value.
+//
+// value: Optional. An integer identifier for the tensor being summarized by this op.
+// If not specified, defaults to -1
+func DebugNumericSummaryV2TensorId(value int64) DebugNumericSummaryV2Attr {
+	return func(m optionalAttr) {
+		m["tensor_id"] = value
+	}
+}
+
+// Debug Numeric Summary V2 Op.
+//
+// Computes a numeric summary of the input tensor. The shape of the output
+// depends on the tensor_debug_mode attribute.
+// This op is used internally by TensorFlow Debugger (tfdbg) v2.
+//
+// Arguments:
+//	input: Input tensor, to be summarized by the op.
+func DebugNumericSummaryV2(scope *Scope, input tf.Output, optional ...DebugNumericSummaryV2Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DebugNumericSummaryV2",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// DebugNumericSummaryAttr is an optional argument to DebugNumericSummary.
+type DebugNumericSummaryAttr func(optionalAttr)
+
+// DebugNumericSummaryDeviceName sets the optional device_name attribute to value.
+// If not specified, defaults to ""
+func DebugNumericSummaryDeviceName(value string) DebugNumericSummaryAttr {
+	return func(m optionalAttr) {
+		m["device_name"] = value
+	}
+}
+
+// DebugNumericSummaryTensorName sets the optional tensor_name attribute to value.
+//
+// value: Name of the input tensor.
+// If not specified, defaults to ""
+func DebugNumericSummaryTensorName(value string) DebugNumericSummaryAttr {
+	return func(m optionalAttr) {
+		m["tensor_name"] = value
+	}
+}
+
+// DebugNumericSummaryDebugUrls sets the optional debug_urls attribute to value.
+//
+// value: List of URLs to debug targets, e.g.,
+//   file:///foo/tfdbg_dump, grpc:://localhost:11011.
+// If not specified, defaults to {}
+func DebugNumericSummaryDebugUrls(value []string) DebugNumericSummaryAttr {
+	return func(m optionalAttr) {
+		m["debug_urls"] = value
+	}
+}
+
+// DebugNumericSummaryLowerBound sets the optional lower_bound attribute to value.
+//
+// value: (float) The lower bound <= which values will be included in the
+//   generalized -inf count. Default: -inf.
+// If not specified, defaults to -inf
+func DebugNumericSummaryLowerBound(value float32) DebugNumericSummaryAttr {
+	return func(m optionalAttr) {
+		m["lower_bound"] = value
+	}
+}
+
+// DebugNumericSummaryUpperBound sets the optional upper_bound attribute to value.
+//
+// value: (float) The upper bound >= which values will be included in the
+//   generalized +inf count. Default: +inf.
+// If not specified, defaults to inf
+func DebugNumericSummaryUpperBound(value float32) DebugNumericSummaryAttr {
+	return func(m optionalAttr) {
+		m["upper_bound"] = value
+	}
+}
+
+// DebugNumericSummaryMuteIfHealthy sets the optional mute_if_healthy attribute to value.
+//
+// value: (bool) Do not send data to the debug URLs unless at least one
+//   of elements [2], [3] and [7] (i.e., the nan count and the generalized -inf and
+//   inf counts) is non-zero.
+// If not specified, defaults to false
+func DebugNumericSummaryMuteIfHealthy(value bool) DebugNumericSummaryAttr {
+	return func(m optionalAttr) {
+		m["mute_if_healthy"] = value
+	}
+}
+
+// DebugNumericSummaryGatedGrpc sets the optional gated_grpc attribute to value.
+//
+// value: Whether this op will be gated. If any of the debug_urls of this
+//   debug node is of the grpc:// scheme, when the value of this attribute is set
+//   to True, the data will not actually be sent via the grpc stream unless this
+//   debug op has been enabled at the debug_url. If all of the debug_urls of this
+//   debug node are of the grpc:// scheme and the debug op is enabled at none of
+//   them, the output will be an empty Tensor.
+// If not specified, defaults to false
+func DebugNumericSummaryGatedGrpc(value bool) DebugNumericSummaryAttr {
+	return func(m optionalAttr) {
+		m["gated_grpc"] = value
+	}
+}
+
+// Debug Numeric Summary Op.
+//
+// Provide a basic summary of numeric value types, range and distribution.
+//
+// output: A double tensor of shape [14 + nDimensions], where nDimensions is the
+//   the number of dimensions of the tensor's shape. The elements of output are:
+//   [0]: is initialized (1.0) or not (0.0).
+//   [1]: total number of elements
+//   [2]: NaN element count
+//   [3]: generalized -inf count: elements <= lower_bound. lower_bound is -inf by
+//     default.
+//   [4]: negative element count (excluding -inf), if lower_bound is the default
+//     -inf. Otherwise, this is the count of elements > lower_bound and < 0.
+//   [5]: zero element count
+//   [6]: positive element count (excluding +inf), if upper_bound is the default
+//     -inf. Otherwise, this is the count of elements < upper_bound and > 0.
+//   [7]: generalized +inf count, elements >= upper_bound. upper_bound is +inf by
+//     default.
+// Output elements [1:8] are all zero, if the tensor is uninitialized.
+//   [8]: minimum of all non-inf and non-NaN elements.
+//        If uninitialized or no such element exists: +inf.
+//   [9]: maximum of all non-inf and non-NaN elements.
+//        If uninitialized or no such element exists: -inf.
+//   [10]: mean of all non-inf and non-NaN elements.
+//         If uninitialized or no such element exists: NaN.
+//   [11]: variance of all non-inf and non-NaN elements.
+//         If uninitialized or no such element exists: NaN.
+//   [12]: Data type of the tensor encoded as an enum integer. See the DataType
+//         proto for more details.
+//   [13]: Number of dimensions of the tensor (ndims).
+//   [14+]: Sizes of the dimensions.
+//
+//
+// Arguments:
+//	input: Input tensor, non-Reference type.
+func DebugNumericSummary(scope *Scope, input tf.Output, optional ...DebugNumericSummaryAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DebugNumericSummary",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Outputs random integers from a uniform distribution.
+//
+// The generated values are uniform integers in the range `[minval, maxval)`.
+// The lower bound `minval` is included in the range, while the upper bound
+// `maxval` is excluded.
+//
+// The random integers are slightly biased unless `maxval - minval` is an exact
+// power of two.  The bias is small for values of `maxval - minval` significantly
+// smaller than the range of the output (either `2^32` or `2^64`).
+//
+// Arguments:
+//	resource: The handle of the resource variable that stores the state of the RNG.
+//	algorithm: The RNG algorithm.
+//	shape: The shape of the output tensor.
+//	minval: Minimum value (inclusive, scalar).
+//	maxval: Maximum value (exclusive, scalar).
+//
+// Returns Random values with specified shape.
+func StatefulUniformInt(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, minval tf.Output, maxval tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "StatefulUniformInt",
+		Input: []tf.Input{
+			resource, algorithm, shape, minval, maxval,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// An Op to exchange data across TPU replicas.
+//
+// On each replica, the input is split into `split_count` blocks along
+// `split_dimension` and send to the other replicas given group_assignment. After
+// receiving `split_count` - 1 blocks from other replicas, we concatenate the
+// blocks along `concat_dimension` as the output.
+//
+// For example, suppose there are 2 TPU replicas:
+// replica 0 receives input: `[[A, B]]`
+// replica 1 receives input: `[[C, D]]`
+//
+// group_assignment=`[[0, 1]]`
+// concat_dimension=0
+// split_dimension=1
+// split_count=2
+//
+// replica 0's output: `[[A], [C]]`
+// replica 1's output: `[[B], [D]]`
+//
+// Arguments:
+//	input: The local input to the sum.
+//	group_assignment: An int32 tensor with shape
+// [num_groups, num_replicas_per_group]. `group_assignment[i]` represents the
+// replica ids in the ith subgroup.
+//	concat_dimension: The dimension number to concatenate.
+//	split_dimension: The dimension number to split.
+//	split_count: The number of splits, this number must equal to the sub-group
+// size(group_assignment.get_shape()[1])
+//
+// Returns The exchanged result.
+func AllToAll(scope *Scope, input tf.Output, group_assignment tf.Output, concat_dimension int64, split_dimension int64, split_count int64) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"concat_dimension": concat_dimension, "split_dimension": split_dimension, "split_count": split_count}
+	opspec := tf.OpSpec{
+		Type: "AllToAll",
+		Input: []tf.Input{
+			input, group_assignment,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// TridiagonalSolveAttr is an optional argument to TridiagonalSolve.
+type TridiagonalSolveAttr func(optionalAttr)
+
+// TridiagonalSolvePartialPivoting sets the optional partial_pivoting attribute to value.
+//
+// value: Whether to apply partial pivoting. Partial pivoting makes the procedure more
+// stable, but slower.
+// If not specified, defaults to true
+func TridiagonalSolvePartialPivoting(value bool) TridiagonalSolveAttr {
+	return func(m optionalAttr) {
+		m["partial_pivoting"] = value
+	}
+}
+
+// Solves tridiagonal systems of equations.
+//
+//   Solves tridiagonal systems of equations.
+//   Supports batch dimensions and multiple right-hand sides per each left-hand
+//   side.
+//   On CPU, solution is computed via Gaussian elimination with or without partial
+//   pivoting, depending on `partial_pivoting` attribute. On GPU, Nvidia's cuSPARSE
+//   library is used: https://docs.nvidia.com/cuda/cusparse/index.html#gtsv
+//
+// Arguments:
+//	diagonals: Tensor of shape `[..., 3, M]` whose innermost 2 dimensions represent the
+// tridiagonal matrices with three rows being the superdiagonal, diagonals, and
+// subdiagonals, in order. The last element of the superdiagonal and the first
+// element of the subdiagonal is ignored.
+//	rhs: Tensor of shape `[..., M, K]`, representing K right-hand sides per each
+// left-hand side.
+//
+// Returns Tensor of shape `[..., M, K]` containing the solutions
+func TridiagonalSolve(scope *Scope, diagonals tf.Output, rhs tf.Output, optional ...TridiagonalSolveAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TridiagonalSolve",
+		Input: []tf.Input{
+			diagonals, rhs,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Calculate product with tridiagonal matrix.
+//
+// Calculates product of two matrices, where left matrix is a tridiagonal matrix.
+//
+// Arguments:
+//	superdiag: Tensor of shape `[..., 1, M]`, representing superdiagonals of
+// tri-diagonal matrices to the left of multiplication. Last element is ingored.
+//	maindiag: Tensor of shape `[..., 1, M]`, representing main diagonals of tri-diagonal
+// matrices to the left of multiplication.
+//	subdiag: Tensor of shape `[..., 1, M]`, representing subdiagonals of tri-diagonal
+// matrices to the left of multiplication. First element is ingored.
+//	rhs: Tensor of shape `[..., M, N]`, representing MxN matrices to the right of
+// multiplication.
+//
+// Returns Tensor of shape `[..., M, N]` containing the product.
+func TridiagonalMatMul(scope *Scope, superdiag tf.Output, maindiag tf.Output, subdiag tf.Output, rhs tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TridiagonalMatMul",
+		Input: []tf.Input{
+			superdiag, maindiag, subdiag, rhs,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes gradients for SparseSegmentMean.
+//
+// Returns tensor "output" with same shape as grad, except for dimension 0 whose
+// value is output_dim0.
+//
+// Arguments:
+//	grad: gradient propagated to the SparseSegmentMean op.
+//	indices: indices passed to the corresponding SparseSegmentMean op.
+//	segment_ids: segment_ids passed to the corresponding SparseSegmentMean op.
+//	output_dim0: dimension 0 of "data" passed to SparseSegmentMean op.
+func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSegmentMeanGrad",
+		Input: []tf.Input{
+			grad, indices, segment_ids, output_dim0,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SvdAttr is an optional argument to Svd.
+type SvdAttr func(optionalAttr)
+
+// SvdComputeUv sets the optional compute_uv attribute to value.
+//
+// value: If true, left and right singular vectors will be
+// computed and returned in `u` and `v`, respectively.
+// If false, `u` and `v` are not set and should never referenced.
+// If not specified, defaults to true
+func SvdComputeUv(value bool) SvdAttr {
+	return func(m optionalAttr) {
+		m["compute_uv"] = value
+	}
+}
+
+// SvdFullMatrices sets the optional full_matrices attribute to value.
+//
+// value: If true, compute full-sized `u` and `v`. If false
+// (the default), compute only the leading `P` singular vectors.
+// Ignored if `compute_uv` is `False`.
+// If not specified, defaults to false
+func SvdFullMatrices(value bool) SvdAttr {
+	return func(m optionalAttr) {
+		m["full_matrices"] = value
+	}
+}
+
+// Computes the singular value decompositions of one or more matrices.
+//
+// Computes the SVD of each inner matrix in `input` such that
+// `input[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :, :])`
+//
+// ```python
+// # a is a tensor containing a batch of matrices.
+// # s is a tensor of singular values for each matrix.
+// # u is the tensor containing the left singular vectors for each matrix.
+// # v is the tensor containing the right singular vectors for each matrix.
+// s, u, v = svd(a)
+// s, _, _ = svd(a, compute_uv=False)
+// ```
+//
+// Arguments:
+//	input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions
+// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`.
+//
+// Returns:
+//	s: Singular values. Shape is `[..., P]`.
+//	u: Left singular vectors. If `full_matrices` is `False` then shape is
+// `[..., M, P]`; if `full_matrices` is `True` then shape is
+// `[..., M, M]`. Undefined if `compute_uv` is `False`.
+//	v: Left singular vectors. If `full_matrices` is `False` then shape is
+// `[..., N, P]`. If `full_matrices` is `True` then shape is `[..., N, N]`.
+// Undefined if `compute_uv` is false.
+func Svd(scope *Scope, input tf.Output, optional ...SvdAttr) (s tf.Output, u tf.Output, v tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Svd",
+		Input: []tf.Input{
+			input,
+		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Reorders a SparseTensor into the canonical, row-major ordering.
+// QrAttr is an optional argument to Qr.
+type QrAttr func(optionalAttr)
+
+// QrFullMatrices sets the optional full_matrices attribute to value.
 //
-// Note that by convention, all sparse ops preserve the canonical ordering along
-// increasing dimension number. The only time ordering can be violated is during
-// manual manipulation of the indices and values vectors to add entries.
+// value: If true, compute full-sized `q` and `r`. If false
+// (the default), compute only the leading `P` columns of `q`.
+// If not specified, defaults to false
+func QrFullMatrices(value bool) QrAttr {
+	return func(m optionalAttr) {
+		m["full_matrices"] = value
+	}
+}
+
+// Computes the QR decompositions of one or more matrices.
 //
-// Reordering does not affect the shape of the SparseTensor.
+// Computes the QR decomposition of each inner matrix in `tensor` such that
+// `tensor[..., :, :] = q[..., :, :] * r[..., :,:])`
 //
-// If the tensor has rank `R` and `N` non-empty values, `input_indices` has
-// shape `[N, R]`, input_values has length `N`, and input_shape has length `R`.
+// ```python
+// # a is a tensor.
+// # q is a tensor of orthonormal matrices.
+// # r is a tensor of upper triangular matrices.
+// q, r = qr(a)
+// q_full, r_full = qr(a, full_matrices=True)
+// ```
 //
 // Arguments:
-//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-//	input_shape: 1-D.  Shape of the input SparseTensor.
+//	input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions
+// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`.
 //
-// Returns 2-D.  `N x R` matrix with the same indices as input_indices, but
-// in canonical row-major ordering.1-D.  `N` non-empty values corresponding to `output_indices`.
-func SparseReorder(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
+// Returns:
+//	q: Orthonormal basis for range of `a`. If `full_matrices` is `False` then
+// shape is `[..., M, P]`; if `full_matrices` is `True` then shape is
+// `[..., M, M]`.
+//	r: Triangular factor. If `full_matrices` is `False` then shape is
+// `[..., P, N]`. If `full_matrices` is `True` then shape is `[..., M, N]`.
+func Qr(scope *Scope, input tf.Output, optional ...QrAttr) (q tf.Output, r tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Qr",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// MatrixTriangularSolveAttr is an optional argument to MatrixTriangularSolve.
+type MatrixTriangularSolveAttr func(optionalAttr)
+
+// MatrixTriangularSolveLower sets the optional lower attribute to value.
+//
+// value: Boolean indicating whether the innermost matrices in `matrix` are
+// lower or upper triangular.
+// If not specified, defaults to true
+func MatrixTriangularSolveLower(value bool) MatrixTriangularSolveAttr {
+	return func(m optionalAttr) {
+		m["lower"] = value
+	}
+}
+
+// MatrixTriangularSolveAdjoint sets the optional adjoint attribute to value.
+//
+// value: Boolean indicating whether to solve with `matrix` or its (block-wise)
+//          adjoint.
+//
+// @compatibility(numpy)
+// Equivalent to scipy.linalg.solve_triangular
+// @end_compatibility
+// If not specified, defaults to false
+func MatrixTriangularSolveAdjoint(value bool) MatrixTriangularSolveAttr {
+	return func(m optionalAttr) {
+		m["adjoint"] = value
+	}
+}
+
+// Solves systems of linear equations with upper or lower triangular matrices by backsubstitution.
+//
+//
+// `matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form
+// square matrices. If `lower` is `True` then the strictly upper triangular part
+// of each inner-most matrix is assumed to be zero and not accessed.
+// If `lower` is False then the strictly lower triangular part of each inner-most
+// matrix is assumed to be zero and not accessed.
+// `rhs` is a tensor of shape `[..., M, K]`.
+//
+// The output is a tensor of shape `[..., M, K]`. If `adjoint` is
+// `True` then the innermost matrices in `output` satisfy matrix equations
+// `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`.
+// If `adjoint` is `False` then the strictly then the  innermost matrices in
+// `output` satisfy matrix equations
+// `adjoint(matrix[..., i, k]) * output[..., k, j] = rhs[..., i, j]`.
+//
+// Example:
+// ```python
+//
+// a = tf.constant([[3,  0,  0,  0],
+//                  [2,  1,  0,  0],
+//                  [1,  0,  1,  0],
+//                  [1,  1,  1,  1]], dtype=tf.float32)
+//
+// b = tf.constant([[4],
+//                  [2],
+//                  [4],
+//                  [2]], dtype=tf.float32)
+//
+// x = tf.linalg.triangular_solve(a, b, lower=True)
+// x
+// # <tf.Tensor: shape=(4, 1), dtype=float32, numpy=
+// # array([[ 1.3333334 ],
+// #        [-0.66666675],
+// #        [ 2.6666665 ],
+// #        [-1.3333331 ]], dtype=float32)>
+//
+// # in python3 one can use `a@x`
+// tf.matmul(a, x)
+// # <tf.Tensor: shape=(4, 1), dtype=float32, numpy=
+// # array([[4.       ],
+// #        [2.       ],
+// #        [4.       ],
+// #        [1.9999999]], dtype=float32)>
+// ```
+//
+// Arguments:
+//	matrix: Shape is `[..., M, M]`.
+//	rhs: Shape is `[..., M, K]`.
+//
+// Returns Shape is `[..., M, K]`.
+func MatrixTriangularSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixTriangularSolveAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixTriangularSolve",
+		Input: []tf.Input{
+			matrix, rhs,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SelfAdjointEigV2Attr is an optional argument to SelfAdjointEigV2.
+type SelfAdjointEigV2Attr func(optionalAttr)
+
+// SelfAdjointEigV2ComputeV sets the optional compute_v attribute to value.
+//
+// value: If `True` then eigenvectors will be computed and returned in `v`.
+// Otherwise, only the eigenvalues will be computed.
+// If not specified, defaults to true
+func SelfAdjointEigV2ComputeV(value bool) SelfAdjointEigV2Attr {
+	return func(m optionalAttr) {
+		m["compute_v"] = value
+	}
+}
+
+// Computes the eigen decomposition of one or more square self-adjoint matrices.
+//
+// Computes the eigenvalues and (optionally) eigenvectors of each inner matrix in
+// `input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`. The eigenvalues
+// are sorted in non-decreasing order.
+//
+// ```python
+// # a is a tensor.
+// # e is a tensor of eigenvalues.
+// # v is a tensor of eigenvectors.
+// e, v = self_adjoint_eig(a)
+// e = self_adjoint_eig(a, compute_v=False)
+// ```
+//
+// Arguments:
+//	input: `Tensor` input of shape `[N, N]`.
+//
+// Returns:
+//	e: Eigenvalues. Shape is `[N]`.
+//	v: Eigenvectors. Shape is `[N, N]`.
+func SelfAdjointEigV2(scope *Scope, input tf.Output, optional ...SelfAdjointEigV2Attr) (e tf.Output, v tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SelfAdjointEigV2",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Computes the Eigen Decomposition of a batch of square self-adjoint matrices.
+//
+// DEPRECATED at GraphDef version 11: Use SelfAdjointEigV2 instead.
+//
+// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+// form square matrices, with the same constraints as the single matrix
+// SelfAdjointEig.
+//
+// The result is a [..., M+1, M] matrix with [..., 0,:] containing the
+// eigenvalues, and subsequent [...,1:, :] containing the eigenvectors. The eigenvalues
+// are sorted in non-decreasing order.
+//
+// Arguments:
+//	input: Shape is `[..., M, M]`.
+//
+// Returns Shape is `[..., M+1, M]`.
+func SelfAdjointEig(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseReorder",
+		Type: "SelfAdjointEig",
 		Input: []tf.Input{
-			input_indices, input_values, input_shape,
+			input,
 		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
+}
+
+// ResourceApplyGradientDescentAttr is an optional argument to ResourceApplyGradientDescent.
+type ResourceApplyGradientDescentAttr func(optionalAttr)
+
+// ResourceApplyGradientDescentUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, the subtraction will be protected by a lock;
+// otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceApplyGradientDescentUseLocking(value bool) ResourceApplyGradientDescentAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' by subtracting 'alpha' * 'delta' from it.
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	alpha: Scaling factor. Must be a scalar.
+//	delta: The change.
+//
+// Returns the created operation.
+func ResourceApplyGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, delta tf.Output, optional ...ResourceApplyGradientDescentAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyGradientDescent",
+		Input: []tf.Input{
+			var_, alpha, delta,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
 }
 
 // Computes the matrix logarithm of one or more square matrices:
@@ -19791,156 +14592,33 @@ func MatrixLogarithm(scope *Scope, input tf.Output) (output tf.Output) {
 	return op.Output(0)
 }
 
-// LoadTPUEmbeddingMomentumParametersAttr is an optional argument to LoadTPUEmbeddingMomentumParameters.
-type LoadTPUEmbeddingMomentumParametersAttr func(optionalAttr)
-
-// LoadTPUEmbeddingMomentumParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
+// Creates a dataset that emits the key-value pairs in one or more LMDB files.
 //
-// REQUIRES: value >= -1
-func LoadTPUEmbeddingMomentumParametersTableId(value int64) LoadTPUEmbeddingMomentumParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingMomentumParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingMomentumParametersTableName(value string) LoadTPUEmbeddingMomentumParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Load Momentum embedding parameters.
+// The Lightning Memory-Mapped Database Manager, or LMDB, is an embedded binary
+// key-value database. This dataset can read the contents of LMDB database files,
+// the names of which generally have the `.mdb` suffix.
 //
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
+// Each output element consists of a key-value pair represented as a pair of
+// scalar string `Tensor`s, where the first `Tensor` contains the key and the
+// second `Tensor` contains the value.
+//
+// LMDB uses different file formats on big- and little-endian machines.
+// `LMDBDataset` can only read files in the format of the host machine.
 //
 // Arguments:
-//	parameters: Value of parameters used in the Momentum optimization algorithm.
-//	momenta: Value of momenta used in the Momentum optimization algorithm.
+//	filenames: A scalar or a vector containing the name(s) of the binary file(s) to be
+// read.
 //
 //
-//
-// Returns the created operation.
-func LoadTPUEmbeddingMomentumParameters(scope *Scope, parameters tf.Output, momenta tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMomentumParametersAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingMomentumParameters",
-		Input: []tf.Input{
-			parameters, momenta,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Computes the trignometric inverse sine of x element-wise.
-//
-// The `tf.math.asin` operation returns the inverse of `tf.math.sin`, such that
-// if `y = tf.math.sin(x)` then, `x = tf.math.asin(y)`.
-//
-// **Note**: The output of `tf.math.asin` will lie within the invertible range
-// of sine, i.e [-pi/2, pi/2].
-//
-// For example:
-//
-// ```python
-// # Note: [1.047, 0.785] ~= [(pi/3), (pi/4)]
-// x = tf.constant([1.047, 0.785])
-// y = tf.math.sin(x) # [0.8659266, 0.7068252]
-//
-// tf.math.asin(y) # [1.047, 0.785] = x
-// ```
-//
-func Asin(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Asin",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// DecodePaddedRawAttr is an optional argument to DecodePaddedRaw.
-type DecodePaddedRawAttr func(optionalAttr)
-
-// DecodePaddedRawLittleEndian sets the optional little_endian attribute to value.
-//
-// value: Whether the input `input_bytes` is in little-endian order. Ignored for
-// `out_type` values that are stored in a single byte, like `uint8`
-// If not specified, defaults to true
-func DecodePaddedRawLittleEndian(value bool) DecodePaddedRawAttr {
-	return func(m optionalAttr) {
-		m["little_endian"] = value
-	}
-}
-
-// Reinterpret the bytes of a string as a vector of numbers.
-//
-// Arguments:
-//	input_bytes: Tensor of string to be decoded.
-//	fixed_length: Length in bytes for each element of the decoded output. Must be a multiple
-// of the size of the output type.
-//
-//
-// Returns A Tensor with one more dimension than the input `bytes`. The added dimension
-// will have size equal to the length of the elements of `bytes` divided by the
-// number of bytes to represent `out_type`.
-func DecodePaddedRaw(scope *Scope, input_bytes tf.Output, fixed_length tf.Output, out_type tf.DataType, optional ...DecodePaddedRawAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"out_type": out_type}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DecodePaddedRaw",
-		Input: []tf.Input{
-			input_bytes, fixed_length,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that batches input elements into a SparseTensor.
-//
-// Arguments:
-//	input_dataset: A handle to an input dataset. Must have a single component.
-//	batch_size: A scalar representing the number of elements to accumulate in a
-// batch.
-//	row_shape: A vector representing the dense shape of each row in the produced
-// SparseTensor. The shape may be partially specified, using `-1` to indicate
-// that a particular dimension should use the maximum size of all batch elements.
-//
-//
-func DenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, row_shape tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+func LMDBDataset(scope *Scope, filenames tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
 	opspec := tf.OpSpec{
-		Type: "DenseToSparseBatchDataset",
+		Type: "LMDBDataset",
 		Input: []tf.Input{
-			input_dataset, batch_size, row_shape,
+			filenames,
 		},
 		Attrs: attrs,
 	}
@@ -19948,64 +14626,40 @@ func DenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size
 	return op.Output(0)
 }
 
-// Enqueue a Tensor on the computation outfeed.
+// MatrixInverseAttr is an optional argument to MatrixInverse.
+type MatrixInverseAttr func(optionalAttr)
+
+// MatrixInverseAdjoint sets the optional adjoint attribute to value.
+// If not specified, defaults to false
+func MatrixInverseAdjoint(value bool) MatrixInverseAttr {
+	return func(m optionalAttr) {
+		m["adjoint"] = value
+	}
+}
+
+// Computes the inverse of one or more square invertible matrices or their
+//
+// adjoints (conjugate transposes).
+//
+// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+// form square matrices. The output is a tensor of the same shape as the input
+// containing the inverse for all input submatrices `[..., :, :]`.
+//
+// The op uses LU decomposition with partial pivoting to compute the inverses.
+//
+// If a matrix is not invertible there is no guarantee what the op does. It
+// may detect the condition and raise an exception or it may simply return a
+// garbage result.
 //
 // Arguments:
-//	input: A tensor that will be inserted into the outfeed queue.
+//	input: Shape is `[..., M, M]`.
 //
-// Returns the created operation.
-func OutfeedEnqueue(scope *Scope, input tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "OutfeedEnqueue",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// GatherAttr is an optional argument to Gather.
-type GatherAttr func(optionalAttr)
-
-// GatherValidateIndices sets the optional validate_indices attribute to value.
-// If not specified, defaults to true
-func GatherValidateIndices(value bool) GatherAttr {
-	return func(m optionalAttr) {
-		m["validate_indices"] = value
-	}
-}
-
-// Gather slices from `params` according to `indices`.
+// Returns Shape is `[..., M, M]`.
 //
-// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
-// Produces an output tensor with shape `indices.shape + params.shape[1:]` where:
-//
-// ```python
-//     # Scalar indices
-//     output[:, ..., :] = params[indices, :, ... :]
-//
-//     # Vector indices
-//     output[i, :, ..., :] = params[indices[i], :, ... :]
-//
-//     # Higher rank indices
-//     output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :]
-// ```
-//
-// If `indices` is a permutation and `len(indices) == params.shape[0]` then
-// this operation will permute `params` accordingly.
-//
-// `validate_indices`: DEPRECATED. If this operation is assigned to CPU, values in
-// `indices` are always validated to be within range. If assigned to GPU,
-// out-of-bound indices result in safe but unspecified behavior, which may include
-// raising an error.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/Gather.png" alt>
-// </div>
-func Gather(scope *Scope, params tf.Output, indices tf.Output, optional ...GatherAttr) (output tf.Output) {
+// @compatibility(numpy)
+// Equivalent to np.linalg.inv
+// @end_compatibility
+func MatrixInverse(scope *Scope, input tf.Output, optional ...MatrixInverseAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -20014,9 +14668,1585 @@ func Gather(scope *Scope, params tf.Output, indices tf.Output, optional ...Gathe
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Gather",
+		Type: "MatrixInverse",
 		Input: []tf.Input{
-			params, indices,
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the sign and the log of the absolute value of the determinant of
+//
+// one or more square matrices.
+//
+// The input is a tensor of shape `[N, M, M]` whose inner-most 2 dimensions
+// form square matrices. The outputs are two tensors containing the signs and
+// absolute values of the log determinants for all N input submatrices
+// `[..., :, :]` such that the determinant = sign*exp(log_abs_determinant).
+// The log_abs_determinant is computed as det(P)*sum(log(diag(LU))) where LU
+// is the LU decomposition of the input and P is the corresponding
+// permutation matrix.
+//
+// Arguments:
+//	input: Shape is `[N, M, M]`.
+//
+// Returns:
+//	sign: The signs of the log determinants of the inputs. Shape is `[N]`.
+//	log_abs_determinant: The logs of the absolute values of the determinants
+// of the N input matrices.  Shape is `[N]`.
+func LogMatrixDeterminant(scope *Scope, input tf.Output) (sign tf.Output, log_abs_determinant tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "LogMatrixDeterminant",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Computes the determinant of one or more square matrices.
+//
+// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+// form square matrices. The output is a tensor containing the determinants
+// for all input submatrices `[..., :, :]`.
+//
+// Arguments:
+//	input: Shape is `[..., M, M]`.
+//
+// Returns Shape is `[...]`.
+func MatrixDeterminant(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixDeterminant",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResizeBilinearAttr is an optional argument to ResizeBilinear.
+type ResizeBilinearAttr func(optionalAttr)
+
+// ResizeBilinearAlignCorners sets the optional align_corners attribute to value.
+//
+// value: If true, the centers of the 4 corner pixels of the input and output tensors are
+// aligned, preserving the values at the corner pixels. Defaults to false.
+// If not specified, defaults to false
+func ResizeBilinearAlignCorners(value bool) ResizeBilinearAttr {
+	return func(m optionalAttr) {
+		m["align_corners"] = value
+	}
+}
+
+// ResizeBilinearHalfPixelCenters sets the optional half_pixel_centers attribute to value.
+// If not specified, defaults to false
+func ResizeBilinearHalfPixelCenters(value bool) ResizeBilinearAttr {
+	return func(m optionalAttr) {
+		m["half_pixel_centers"] = value
+	}
+}
+
+// Resize `images` to `size` using bilinear interpolation.
+//
+// Input images can be of different types but output images are always float.
+//
+// Arguments:
+//	images: 4-D with shape `[batch, height, width, channels]`.
+//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+// new size for the images.
+//
+// Returns 4-D with shape
+// `[batch, new_height, new_width, channels]`.
+func ResizeBilinear(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBilinearAttr) (resized_images tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResizeBilinear",
+		Input: []tf.Input{
+			images, size,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a TensorList by indexing into a Tensor.
+//
+// Each member of the TensorList corresponds to one row of the input tensor,
+// specified by the given index (see `tf.gather`).
+//
+// tensor: The input tensor.
+// indices: The indices used to index into the list.
+// element_shape: The shape of the elements in the list (can be less specified than
+//   the shape of the tensor).
+// num_elements: The size of the output list. Must be large enough to accommodate
+//   the largest index in indices. If -1, the list is just large enough to include
+//   the largest index in indices.
+// output_handle: The TensorList.
+func TensorListScatterV2(scope *Scope, tensor tf.Output, indices tf.Output, element_shape tf.Output, num_elements tf.Output) (output_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorListScatterV2",
+		Input: []tf.Input{
+			tensor, indices, element_shape, num_elements,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a TensorList by indexing into a Tensor.
+//
+// Each member of the TensorList corresponds to one row of the input tensor,
+// specified by the given index (see `tf.gather`).
+//
+// tensor: The input tensor.
+// indices: The indices used to index into the list.
+// element_shape: The shape of the elements in the list (can be less specified than
+//   the shape of the tensor).
+// output_handle: The TensorList.
+func TensorListScatter(scope *Scope, tensor tf.Output, indices tf.Output, element_shape tf.Output) (output_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorListScatter",
+		Input: []tf.Input{
+			tensor, indices, element_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the gradient of the sigmoid of `x` wrt its input.
+//
+// Specifically, `grad = dy * y * (1 - y)`, where `y = sigmoid(x)`, and
+// `dy` is the corresponding input gradient.
+func SigmoidGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SigmoidGrad",
+		Input: []tf.Input{
+			y, dy,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a Tensor by indexing into the TensorList.
+//
+// Each row in the produced Tensor corresponds to the element in the TensorList
+// specified by the given index (see `tf.gather`).
+//
+// input_handle: The input tensor list.
+// indices: The indices used to index into the list.
+// values: The tensor.
+func TensorListGather(scope *Scope, input_handle tf.Output, indices tf.Output, element_shape tf.Output, element_dtype tf.DataType) (values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"element_dtype": element_dtype}
+	opspec := tf.OpSpec{
+		Type: "TensorListGather",
+		Input: []tf.Input{
+			input_handle, indices, element_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// The shape of the elements of the given list, as a tensor.
+//
+//   input_handle: the list
+//   element_shape: the shape of elements of the list
+func TensorListElementShape(scope *Scope, input_handle tf.Output, shape_type tf.DataType) (element_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"shape_type": shape_type}
+	opspec := tf.OpSpec{
+		Type: "TensorListElementShape",
+		Input: []tf.Input{
+			input_handle,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ExperimentalThreadPoolHandleAttr is an optional argument to ExperimentalThreadPoolHandle.
+type ExperimentalThreadPoolHandleAttr func(optionalAttr)
+
+// ExperimentalThreadPoolHandleMaxIntraOpParallelism sets the optional max_intra_op_parallelism attribute to value.
+//
+// value: The maximum degree of parallelism to use within operations that execute on this
+// threadpool.
+// If not specified, defaults to 1
+func ExperimentalThreadPoolHandleMaxIntraOpParallelism(value int64) ExperimentalThreadPoolHandleAttr {
+	return func(m optionalAttr) {
+		m["max_intra_op_parallelism"] = value
+	}
+}
+
+// ExperimentalThreadPoolHandleContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func ExperimentalThreadPoolHandleContainer(value string) ExperimentalThreadPoolHandleAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// ExperimentalThreadPoolHandleSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func ExperimentalThreadPoolHandleSharedName(value string) ExperimentalThreadPoolHandleAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Creates a dataset that uses a custom thread pool to compute `input_dataset`.
+//
+// Arguments:
+//	num_threads: The number of threads in the thread pool.
+//	display_name: A human-readable name for the threads that may be visible in some
+// visualizations.
+// threadpool.
+//
+// Returns A resource that can be consumed by one or more ExperimentalThreadPoolDataset
+// ops.
+func ExperimentalThreadPoolHandle(scope *Scope, num_threads int64, display_name string, optional ...ExperimentalThreadPoolHandleAttr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_threads": num_threads, "display_name": display_name}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalThreadPoolHandle",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a TensorList which, when stacked, has the value of `tensor`.
+//
+// Each tensor in the result list corresponds to one row of the input tensor.
+//
+// tensor: The input tensor.
+// output_handle: The list.
+func TensorListFromTensor(scope *Scope, tensor tf.Output, element_shape tf.Output) (output_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorListFromTensor",
+		Input: []tf.Input{
+			tensor, element_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// TensorListStackAttr is an optional argument to TensorListStack.
+type TensorListStackAttr func(optionalAttr)
+
+// TensorListStackNumElements sets the optional num_elements attribute to value.
+// If not specified, defaults to -1
+func TensorListStackNumElements(value int64) TensorListStackAttr {
+	return func(m optionalAttr) {
+		m["num_elements"] = value
+	}
+}
+
+// Stacks all tensors in the list.
+//
+// Requires that all tensors have the same shape.
+//
+// input_handle: the input list
+// tensor: the gathered result
+// num_elements: optional. If not -1, the number of elements in the list.
+//
+func TensorListStack(scope *Scope, input_handle tf.Output, element_shape tf.Output, element_dtype tf.DataType, optional ...TensorListStackAttr) (tensor tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"element_dtype": element_dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorListStack",
+		Input: []tf.Input{
+			input_handle, element_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the number of tensors in the input tensor list.
+//
+// input_handle: the input list
+// length: the number of tensors in the list
+func TensorListLength(scope *Scope, input_handle tf.Output) (length tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorListLength",
+		Input: []tf.Input{
+			input_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Merges summaries.
+//
+// This op creates a
+// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
+// protocol buffer that contains the union of all the values in the input
+// summaries.
+//
+// When the Op is run, it reports an `InvalidArgument` error if multiple values
+// in the summaries to merge use the same tag.
+//
+// Arguments:
+//	inputs: Can be of any shape.  Each must contain serialized `Summary` protocol
+// buffers.
+//
+// Returns Scalar. Serialized `Summary` protocol buffer.
+func MergeSummary(scope *Scope, inputs []tf.Output) (summary tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MergeSummary",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Draw bounding boxes on a batch of images.
+//
+// Outputs a copy of `images` but draws on top of the pixels zero or more bounding
+// boxes specified by the locations in `boxes`. The coordinates of the each
+// bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The
+// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
+// height of the underlying image.
+//
+// For example, if an image is 100 x 200 pixels (height x width) and the bounding
+// box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
+// the bounding box will be `(40, 10)` to `(100, 50)` (in (x,y) coordinates).
+//
+// Parts of the bounding box may fall outside the image.
+//
+// Arguments:
+//	images: 4-D with shape `[batch, height, width, depth]`. A batch of images.
+//	boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding
+// boxes.
+//	colors: 2-D. A list of RGBA colors to cycle through for the boxes.
+//
+// Returns 4-D with the same shape as `images`. The batch of input images with
+// bounding boxes drawn on the images.
+func DrawBoundingBoxesV2(scope *Scope, images tf.Output, boxes tf.Output, colors tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DrawBoundingBoxesV2",
+		Input: []tf.Input{
+			images, boxes, colors,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// AvgPoolAttr is an optional argument to AvgPool.
+type AvgPoolAttr func(optionalAttr)
+
+// AvgPoolDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func AvgPoolDataFormat(value string) AvgPoolAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Performs average pooling on the input.
+//
+// Each entry in `output` is the mean of the corresponding size `ksize`
+// window in `value`.
+//
+// Arguments:
+//	value: 4-D with shape `[batch, height, width, channels]`.
+//	ksize: The size of the sliding window for each dimension of `value`.
+//	strides: The stride of the sliding window for each dimension of `value`.
+//	padding: The type of padding algorithm to use.
+//
+// Returns The average pooled output tensor.
+func AvgPool(scope *Scope, value tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "AvgPool",
+		Input: []tf.Input{
+			value,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// AudioSummaryV2Attr is an optional argument to AudioSummaryV2.
+type AudioSummaryV2Attr func(optionalAttr)
+
+// AudioSummaryV2MaxOutputs sets the optional max_outputs attribute to value.
+//
+// value: Max number of batch elements to generate audio for.
+// If not specified, defaults to 3
+//
+// REQUIRES: value >= 1
+func AudioSummaryV2MaxOutputs(value int64) AudioSummaryV2Attr {
+	return func(m optionalAttr) {
+		m["max_outputs"] = value
+	}
+}
+
+// Outputs a `Summary` protocol buffer with audio.
+//
+// The summary has up to `max_outputs` summary values containing audio. The
+// audio is built from `tensor` which must be 3-D with shape `[batch_size,
+// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are
+// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.
+//
+// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
+// build the `tag` of the summary values:
+//
+// *  If `max_outputs` is 1, the summary value tag is '*tag*/audio'.
+// *  If `max_outputs` is greater than 1, the summary value tags are
+//    generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
+//
+// Arguments:
+//	tag: Scalar. Used to build the `tag` attribute of the summary values.
+//	tensor: 2-D of shape `[batch_size, frames]`.
+//	sample_rate: The sample rate of the signal in hertz.
+//
+// Returns Scalar. Serialized `Summary` protocol buffer.
+func AudioSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...AudioSummaryV2Attr) (summary tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "AudioSummaryV2",
+		Input: []tf.Input{
+			tag, tensor, sample_rate,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Outputs a `Summary` protocol buffer with a histogram.
+//
+// The generated
+// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
+// has one summary value containing a histogram for `values`.
+//
+// This op reports an `InvalidArgument` error if any value is not finite.
+//
+// Arguments:
+//	tag: Scalar.  Tag to use for the `Summary.Value`.
+//	values: Any shape. Values to use to build the histogram.
+//
+// Returns Scalar. Serialized `Summary` protocol buffer.
+func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "HistogramSummary",
+		Input: []tf.Input{
+			tag, values,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// StringLengthAttr is an optional argument to StringLength.
+type StringLengthAttr func(optionalAttr)
+
+// StringLengthUnit sets the optional unit attribute to value.
+//
+// value: The unit that is counted to compute string length.  One of: `"BYTE"` (for
+// the number of bytes in each string) or `"UTF8_CHAR"` (for the number of UTF-8
+// encoded Unicode code points in each string).  Results are undefined
+// if `unit=UTF8_CHAR` and the `input` strings do not contain structurally
+// valid UTF-8.
+// If not specified, defaults to "BYTE"
+func StringLengthUnit(value string) StringLengthAttr {
+	return func(m optionalAttr) {
+		m["unit"] = value
+	}
+}
+
+// String lengths of `input`.
+//
+// Computes the length of each string given in the input tensor.
+//
+// >>> strings = tf.constant(['Hello','TensorFlow', '\U0001F642'])
+// >>> tf.strings.length(strings).numpy() # default counts bytes
+// array([ 5, 10, 4], dtype=int32)
+// >>> tf.strings.length(strings, unit="UTF8_CHAR").numpy()
+// array([ 5, 10, 1], dtype=int32)
+//
+//
+// Arguments:
+//	input: The strings for which to compute the length for each element.
+//
+// Returns Integer tensor that has the same shape as `input`. The output contains the
+// element-wise string lengths of `input`.
+func StringLength(scope *Scope, input tf.Output, optional ...StringLengthAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StringLength",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// TensorSummaryAttr is an optional argument to TensorSummary.
+type TensorSummaryAttr func(optionalAttr)
+
+// TensorSummaryDescription sets the optional description attribute to value.
+//
+// value: A json-encoded SummaryDescription proto.
+// If not specified, defaults to ""
+func TensorSummaryDescription(value string) TensorSummaryAttr {
+	return func(m optionalAttr) {
+		m["description"] = value
+	}
+}
+
+// TensorSummaryLabels sets the optional labels attribute to value.
+//
+// value: An unused list of strings.
+// If not specified, defaults to {}
+func TensorSummaryLabels(value []string) TensorSummaryAttr {
+	return func(m optionalAttr) {
+		m["labels"] = value
+	}
+}
+
+// TensorSummaryDisplayName sets the optional display_name attribute to value.
+//
+// value: An unused string.
+// If not specified, defaults to ""
+func TensorSummaryDisplayName(value string) TensorSummaryAttr {
+	return func(m optionalAttr) {
+		m["display_name"] = value
+	}
+}
+
+// Outputs a `Summary` protocol buffer with a tensor.
+//
+// This op is being phased out in favor of TensorSummaryV2, which lets callers pass
+// a tag as well as a serialized SummaryMetadata proto string that contains
+// plugin-specific data. We will keep this op to maintain backwards compatibility.
+//
+// Arguments:
+//	tensor: A tensor to serialize.
+func TensorSummary(scope *Scope, tensor tf.Output, optional ...TensorSummaryAttr) (summary tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorSummary",
+		Input: []tf.Input{
+			tensor,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Scatters tensor at indices in an input list.
+//
+// Each member of the TensorList corresponds to one row of the input tensor,
+// specified by the given index (see `tf.gather`).
+//
+// input_handle: The list to scatter into.
+// tensor: The input tensor.
+// indices: The indices used to index into the list.
+// output_handle: The TensorList.
+func TensorListScatterIntoExistingList(scope *Scope, input_handle tf.Output, tensor tf.Output, indices tf.Output) (output_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorListScatterIntoExistingList",
+		Input: []tf.Input{
+			input_handle, tensor, indices,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Outputs a `Summary` protocol buffer with a tensor and per-plugin data.
+//
+// Arguments:
+//	tag: A string attached to this summary. Used for organization in TensorBoard.
+//	tensor: A tensor to serialize.
+//	serialized_summary_metadata: A serialized SummaryMetadata proto. Contains plugin
+// data.
+func TensorSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, serialized_summary_metadata tf.Output) (summary tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorSummaryV2",
+		Input: []tf.Input{
+			tag, tensor, serialized_summary_metadata,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Invert (flip) each bit of supported types; for example, type `uint8` value 01010101 becomes 10101010.
+//
+// Flip each bit of supported types.  For example, type `int8` (decimal 2) binary 00000010 becomes (decimal -3) binary 11111101.
+// This operation is performed on each element of the tensor argument `x`.
+//
+// Example:
+// ```python
+// import tensorflow as tf
+// from tensorflow.python.ops import bitwise_ops
+//
+// # flip 2 (00000010) to -3 (11111101)
+// tf.assert_equal(-3, bitwise_ops.invert(2))
+//
+// dtype_list = [dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
+//               dtypes.uint8, dtypes.uint16, dtypes.uint32, dtypes.uint64]
+//
+// inputs = [0, 5, 3, 14]
+// for dtype in dtype_list:
+//   # Because of issues with negative numbers, let's test this indirectly.
+//   # 1. invert(a) and a = 0
+//   # 2. invert(a) or a = invert(0)
+//   input_tensor = tf.constant([0, 5, 3, 14], dtype=dtype)
+//   not_a_and_a, not_a_or_a, not_0 = [bitwise_ops.bitwise_and(
+//                                       input_tensor, bitwise_ops.invert(input_tensor)),
+//                                     bitwise_ops.bitwise_or(
+//                                       input_tensor, bitwise_ops.invert(input_tensor)),
+//                                     bitwise_ops.invert(
+//                                       tf.constant(0, dtype=dtype))]
+//
+//   expected = tf.constant([0, 0, 0, 0], dtype=tf.float32)
+//   tf.assert_equal(tf.cast(not_a_and_a, tf.float32), expected)
+//
+//   expected = tf.cast([not_0] * 4, tf.float32)
+//   tf.assert_equal(tf.cast(not_a_or_a, tf.float32), expected)
+//
+//   # For unsigned dtypes let's also check the result directly.
+//   if dtype.is_unsigned:
+//     inverted = bitwise_ops.invert(input_tensor)
+//     expected = tf.constant([dtype.max - x for x in inputs], dtype=tf.float32)
+//     tf.assert_equal(tf.cast(inverted, tf.float32), tf.cast(expected, tf.float32))
+// ```
+func Invert(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Invert",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// DecodePngAttr is an optional argument to DecodePng.
+type DecodePngAttr func(optionalAttr)
+
+// DecodePngChannels sets the optional channels attribute to value.
+//
+// value: Number of color channels for the decoded image.
+// If not specified, defaults to 0
+func DecodePngChannels(value int64) DecodePngAttr {
+	return func(m optionalAttr) {
+		m["channels"] = value
+	}
+}
+
+// DecodePngDtype sets the optional dtype attribute to value.
+// If not specified, defaults to DT_UINT8
+func DecodePngDtype(value tf.DataType) DecodePngAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Decode a PNG-encoded image to a uint8 or uint16 tensor.
+//
+// The attr `channels` indicates the desired number of color channels for the
+// decoded image.
+//
+// Accepted values are:
+//
+// *   0: Use the number of channels in the PNG-encoded image.
+// *   1: output a grayscale image.
+// *   3: output an RGB image.
+// *   4: output an RGBA image.
+//
+// If needed, the PNG-encoded image is transformed to match the requested number
+// of color channels.
+//
+// This op also supports decoding JPEGs and non-animated GIFs since the interface
+// is the same, though it is cleaner to use `tf.image.decode_image`.
+//
+// Arguments:
+//	contents: 0-D.  The PNG-encoded image.
+//
+// Returns 3-D with shape `[height, width, channels]`.
+func DecodePng(scope *Scope, contents tf.Output, optional ...DecodePngAttr) (image tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodePng",
+		Input: []tf.Input{
+			contents,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the gradient for the sqrt of `x` wrt its input.
+//
+// Specifically, `grad = dy * 0.5 / y`, where `y = sqrt(x)`, and `dy`
+// is the corresponding input gradient.
+func SqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SqrtGrad",
+		Input: []tf.Input{
+			y, dy,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MutableHashTableOfTensorsV2Attr is an optional argument to MutableHashTableOfTensorsV2.
+type MutableHashTableOfTensorsV2Attr func(optionalAttr)
+
+// MutableHashTableOfTensorsV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this table is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func MutableHashTableOfTensorsV2Container(value string) MutableHashTableOfTensorsV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// MutableHashTableOfTensorsV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this table is shared under the given name across
+// multiple sessions.
+// If not specified, defaults to ""
+func MutableHashTableOfTensorsV2SharedName(value string) MutableHashTableOfTensorsV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// MutableHashTableOfTensorsV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value.
+// If not specified, defaults to false
+func MutableHashTableOfTensorsV2UseNodeNameSharing(value bool) MutableHashTableOfTensorsV2Attr {
+	return func(m optionalAttr) {
+		m["use_node_name_sharing"] = value
+	}
+}
+
+// MutableHashTableOfTensorsV2ValueShape sets the optional value_shape attribute to value.
+// If not specified, defaults to {}
+func MutableHashTableOfTensorsV2ValueShape(value tf.Shape) MutableHashTableOfTensorsV2Attr {
+	return func(m optionalAttr) {
+		m["value_shape"] = value
+	}
+}
+
+// Creates an empty hash table.
+//
+// This op creates a mutable hash table, specifying the type of its keys and
+// values. Each value must be a vector. Data can be inserted into the table using
+// the insert operations. It does not support the initialization operation.
+//
+// Arguments:
+//	key_dtype: Type of the table keys.
+//	value_dtype: Type of the table values.
+//
+// Returns Handle to a table.
+func MutableHashTableOfTensorsV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableOfTensorsV2Attr) (table_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MutableHashTableOfTensorsV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the grayscale dilation of 4-D `input` and 3-D `filter` tensors.
+//
+// The `input` tensor has shape `[batch, in_height, in_width, depth]` and the
+// `filter` tensor has shape `[filter_height, filter_width, depth]`, i.e., each
+// input channel is processed independently of the others with its own structuring
+// function. The `output` tensor has shape
+// `[batch, out_height, out_width, depth]`. The spatial dimensions of the output
+// tensor depend on the `padding` algorithm. We currently only support the default
+// "NHWC" `data_format`.
+//
+// In detail, the grayscale morphological 2-D dilation is the max-sum correlation
+// (for consistency with `conv2d`, we use unmirrored filters):
+//
+//     output[b, y, x, c] =
+//        max_{dy, dx} input[b,
+//                           strides[1] * y + rates[1] * dy,
+//                           strides[2] * x + rates[2] * dx,
+//                           c] +
+//                     filter[dy, dx, c]
+//
+// Max-pooling is a special case when the filter has size equal to the pooling
+// kernel size and contains all zeros.
+//
+// Note on duality: The dilation of `input` by the `filter` is equal to the
+// negation of the erosion of `-input` by the reflected `filter`.
+//
+// Arguments:
+//	input: 4-D with shape `[batch, in_height, in_width, depth]`.
+//	filter: 3-D with shape `[filter_height, filter_width, depth]`.
+//	strides: The stride of the sliding window for each dimension of the input
+// tensor. Must be: `[1, stride_height, stride_width, 1]`.
+//	rates: The input stride for atrous morphological dilation. Must be:
+// `[1, rate_height, rate_width, 1]`.
+//	padding: The type of padding algorithm to use.
+//
+// Returns 4-D with shape `[batch, out_height, out_width, depth]`.
+func Dilation2D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, rates []int64, padding string) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "Dilation2D",
+		Input: []tf.Input{
+			input, filter,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes softplus: `log(exp(features) + 1)`.
+func Softplus(scope *Scope, features tf.Output) (activations tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Softplus",
+		Input: []tf.Input{
+			features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MutableHashTableV2Attr is an optional argument to MutableHashTableV2.
+type MutableHashTableV2Attr func(optionalAttr)
+
+// MutableHashTableV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this table is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func MutableHashTableV2Container(value string) MutableHashTableV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// MutableHashTableV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this table is shared under the given name across
+// multiple sessions.
+// If not specified, defaults to ""
+func MutableHashTableV2SharedName(value string) MutableHashTableV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// MutableHashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value.
+//
+// value: If true and shared_name is empty, the table is shared
+// using the node name.
+// If not specified, defaults to false
+func MutableHashTableV2UseNodeNameSharing(value bool) MutableHashTableV2Attr {
+	return func(m optionalAttr) {
+		m["use_node_name_sharing"] = value
+	}
+}
+
+// Creates an empty hash table.
+//
+// This op creates a mutable hash table, specifying the type of its keys and
+// values. Each value must be a scalar. Data can be inserted into the table using
+// the insert operations. It does not support the initialization operation.
+//
+// Arguments:
+//	key_dtype: Type of the table keys.
+//	value_dtype: Type of the table values.
+//
+// Returns Handle to a table.
+func MutableHashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableV2Attr) (table_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MutableHashTableV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Calculates the prior from the training data (the bias) and fills in the first node with the logits' prior. Returns a boolean indicating whether to continue centering.
+//
+// Arguments:
+//	tree_ensemble_handle: Handle to the tree ensemble.
+//	mean_gradients: A tensor with shape=[logits_dimension] with mean of gradients for a first node.
+//	mean_hessians: A tensor with shape=[logits_dimension] mean of hessians for a first node.
+//	l1: l1 regularization factor on leaf weights, per instance based.
+//	l2: l2 regularization factor on leaf weights, per instance based.
+//
+// Returns Bool, whether to continue bias centering.
+func BoostedTreesCenterBias(scope *Scope, tree_ensemble_handle tf.Output, mean_gradients tf.Output, mean_hessians tf.Output, l1 tf.Output, l2 tf.Output) (continue_centering tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesCenterBias",
+		Input: []tf.Input{
+			tree_ensemble_handle, mean_gradients, mean_hessians, l1, l2,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// HashTableV2Attr is an optional argument to HashTableV2.
+type HashTableV2Attr func(optionalAttr)
+
+// HashTableV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this table is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func HashTableV2Container(value string) HashTableV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// HashTableV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this table is shared under the given name across
+// multiple sessions.
+// If not specified, defaults to ""
+func HashTableV2SharedName(value string) HashTableV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// HashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value.
+//
+// value: If true and shared_name is empty, the table is shared
+// using the node name.
+// If not specified, defaults to false
+func HashTableV2UseNodeNameSharing(value bool) HashTableV2Attr {
+	return func(m optionalAttr) {
+		m["use_node_name_sharing"] = value
+	}
+}
+
+// Creates a non-initialized hash table.
+//
+// This op creates a hash table, specifying the type of its keys and values.
+// Before using the table you will have to initialize it.  After initialization the
+// table will be immutable.
+//
+// Arguments:
+//	key_dtype: Type of the table keys.
+//	value_dtype: Type of the table values.
+//
+// Returns Handle to a table.
+func HashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...HashTableV2Attr) (table_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "HashTableV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Check if the input matches the regex pattern.
+//
+// The input is a string tensor of any shape. The pattern is a scalar
+// string tensor which is applied to every element of the input tensor.
+// The boolean values (True or False) of the output tensor indicate
+// if the input matches the regex pattern provided.
+//
+// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
+//
+// Arguments:
+//	input: A string tensor of the text to be processed.
+//	pattern: A scalar string tensor containing the regular expression to match the input.
+//
+// Returns A bool tensor with the same shape as `input`.
+func RegexFullMatch(scope *Scope, input tf.Output, pattern tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "RegexFullMatch",
+		Input: []tf.Input{
+			input, pattern,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MatrixDiagV3Attr is an optional argument to MatrixDiagV3.
+type MatrixDiagV3Attr func(optionalAttr)
+
+// MatrixDiagV3Align sets the optional align attribute to value.
+//
+// value: Some diagonals are shorter than `max_diag_len` and need to be padded. `align` is
+// a string specifying how superdiagonals and subdiagonals should be aligned,
+// respectively. There are four possible alignments: "RIGHT_LEFT" (default),
+// "LEFT_RIGHT", "LEFT_LEFT", and "RIGHT_RIGHT". "RIGHT_LEFT" aligns superdiagonals
+// to the right (left-pads the row) and subdiagonals to the left (right-pads the
+// row). It is the packing format LAPACK uses. cuSPARSE uses "LEFT_RIGHT", which is
+// the opposite alignment.
+// If not specified, defaults to "RIGHT_LEFT"
+func MatrixDiagV3Align(value string) MatrixDiagV3Attr {
+	return func(m optionalAttr) {
+		m["align"] = value
+	}
+}
+
+// Returns a batched diagonal tensor with given batched diagonal values.
+//
+// Returns a tensor with the contents in `diagonal` as `k[0]`-th to `k[1]`-th
+// diagonals of a matrix, with everything else padded with `padding`. `num_rows`
+// and `num_cols` specify the dimension of the innermost matrix of the output. If
+// both are not specified, the op assumes the innermost matrix is square and infers
+// its size from `k` and the innermost dimension of `diagonal`. If only one of them
+// is specified, the op assumes the unspecified value is the smallest possible
+// based on other criteria.
+//
+// Let `diagonal` have `r` dimensions `[I, J, ..., L, M, N]`. The output tensor has
+// rank `r+1` with shape `[I, J, ..., L, M, num_rows, num_cols]` when only one
+// diagonal is given (`k` is an integer or `k[0] == k[1]`). Otherwise, it has rank
+// `r` with shape `[I, J, ..., L, num_rows, num_cols]`.
+//
+// The second innermost dimension of `diagonal` has double meaning.
+// When `k` is scalar or `k[0] == k[1]`, `M` is part of the batch size
+// [I, J, ..., M], and the output tensor is:
+//
+// ```
+// output[i, j, ..., l, m, n]
+//   = diagonal[i, j, ..., l, n-max(d_upper, 0)] ; if n - m == d_upper
+//     padding_value                             ; otherwise
+// ```
+//
+// Otherwise, `M` is treated as the number of diagonals for the matrix in the
+// same batch (`M = k[1]-k[0]+1`), and the output tensor is:
+//
+// ```
+// output[i, j, ..., l, m, n]
+//   = diagonal[i, j, ..., l, diag_index, index_in_diag] ; if k[0] <= d <= k[1]
+//     padding_value                                     ; otherwise
+// ```
+// where `d = n - m`, `diag_index = [k] - d`, and
+// `index_in_diag = n - max(d, 0) + offset`.
+//
+// `offset` is zero except when the alignment of the diagonal is to the right.
+// ```
+// offset = max_diag_len - diag_len(d) ; if (`align` in {RIGHT_LEFT, RIGHT_RIGHT}
+//                                            and `d >= 0`) or
+//                                          (`align` in {LEFT_RIGHT, RIGHT_RIGHT}
+//                                            and `d <= 0`)
+//          0                          ; otherwise
+// ```
+// where `diag_len(d) = min(cols - max(d, 0), rows + min(d, 0))`.
+//
+// For example:
+//
+// ```
+// # The main diagonal.
+// diagonal = np.array([[1, 2, 3, 4],            # Input shape: (2, 4)
+//                      [5, 6, 7, 8]])
+// tf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0],  # Output shape: (2, 4, 4)
+//                                [0, 2, 0, 0],
+//                                [0, 0, 3, 0],
+//                                [0, 0, 0, 4]],
+//                               [[5, 0, 0, 0],
+//                                [0, 6, 0, 0],
+//                                [0, 0, 7, 0],
+//                                [0, 0, 0, 8]]]
+//
+// # A superdiagonal (per batch).
+// diagonal = np.array([[1, 2, 3],  # Input shape: (2, 3)
+//                      [4, 5, 6]])
+// tf.matrix_diag(diagonal, k = 1)
+//   ==> [[[0, 1, 0, 0],  # Output shape: (2, 4, 4)
+//         [0, 0, 2, 0],
+//         [0, 0, 0, 3],
+//         [0, 0, 0, 0]],
+//        [[0, 4, 0, 0],
+//         [0, 0, 5, 0],
+//         [0, 0, 0, 6],
+//         [0, 0, 0, 0]]]
+//
+// # A tridiagonal band (per batch).
+// diagonals = np.array([[[0, 8, 9],  # Input shape: (2, 2, 3)
+//                        [1, 2, 3],
+//                        [4, 5, 0]],
+//                       [[0, 2, 3],
+//                        [6, 7, 9],
+//                        [9, 1, 0]]])
+// tf.matrix_diag(diagonals, k = (-1, 1))
+//   ==> [[[1, 8, 0],  # Output shape: (2, 3, 3)
+//         [4, 2, 9],
+//         [0, 5, 3]],
+//        [[6, 2, 0],
+//         [9, 7, 3],
+//         [0, 1, 9]]]
+//
+// # LEFT_RIGHT alignment.
+// diagonals = np.array([[[8, 9, 0],  # Input shape: (2, 2, 3)
+//                        [1, 2, 3],
+//                        [0, 4, 5]],
+//                       [[2, 3, 0],
+//                        [6, 7, 9],
+//                        [0, 9, 1]]])
+// tf.matrix_diag(diagonals, k = (-1, 1), align="LEFT_RIGHT")
+//   ==> [[[1, 8, 0],  # Output shape: (2, 3, 3)
+//         [4, 2, 9],
+//         [0, 5, 3]],
+//        [[6, 2, 0],
+//         [9, 7, 3],
+//         [0, 1, 9]]]
+//
+// # Rectangular matrix.
+// diagonal = np.array([1, 2])  # Input shape: (2)
+// tf.matrix_diag(diagonal, k = -1, num_rows = 3, num_cols = 4)
+//   ==> [[0, 0, 0, 0],  # Output shape: (3, 4)
+//        [1, 0, 0, 0],
+//        [0, 2, 0, 0]]
+//
+// # Rectangular matrix with inferred num_cols and padding_value = 9.
+// tf.matrix_diag(diagonal, k = -1, num_rows = 3, padding_value = 9)
+//   ==> [[9, 9],  # Output shape: (3, 2)
+//        [1, 9],
+//        [9, 2]]
+//
+// ```
+//
+// Arguments:
+//	diagonal: Rank `r`, where `r >= 1`
+//	k: Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main
+// diagonal, and negative value means subdiagonals. `k` can be a single integer
+// (for a single diagonal) or a pair of integers specifying the low and high ends
+// of a matrix band. `k[0]` must not be larger than `k[1]`.
+//	num_rows: The number of rows of the output matrix. If it is not provided, the op assumes
+// the output matrix is a square matrix and infers the matrix size from k and the
+// innermost dimension of `diagonal`.
+//	num_cols: The number of columns of the output matrix. If it is not provided, the op
+// assumes the output matrix is a square matrix and infers the matrix size from
+// k and the innermost dimension of `diagonal`.
+//	padding_value: The number to fill the area outside the specified diagonal band with.
+// Default is 0.
+//
+// Returns Has rank `r+1` when `k` is an integer or `k[0] == k[1]`, rank `r` otherwise.
+func MatrixDiagV3(scope *Scope, diagonal tf.Output, k tf.Output, num_rows tf.Output, num_cols tf.Output, padding_value tf.Output, optional ...MatrixDiagV3Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixDiagV3",
+		Input: []tf.Input{
+			diagonal, k, num_rows, num_cols, padding_value,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Greedily selects a subset of bounding boxes in descending order of score,
+//
+// pruning away boxes that have high overlaps
+// with previously selected boxes.  Bounding boxes with score less than
+// `score_threshold` are removed. N-by-n overlap values are supplied as square matrix,
+// which allows for defining a custom overlap criterium (eg. intersection over union,
+// intersection over area, etc.).
+//
+// The output of this operation is a set of integers indexing into the input
+// collection of bounding boxes representing the selected boxes.  The bounding
+// box coordinates corresponding to the selected indices can then be obtained
+// using the `tf.gather operation`.  For example:
+//
+//   selected_indices = tf.image.non_max_suppression_with_overlaps(
+//       overlaps, scores, max_output_size, overlap_threshold, score_threshold)
+//   selected_boxes = tf.gather(boxes, selected_indices)
+//
+// Arguments:
+//	overlaps: A 2-D float tensor of shape `[num_boxes, num_boxes]` representing
+// the n-by-n box overlap values.
+//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
+// score corresponding to each box (each row of boxes).
+//	max_output_size: A scalar integer tensor representing the maximum number of
+// boxes to be selected by non max suppression.
+//	overlap_threshold: A 0-D float tensor representing the threshold for deciding whether
+// boxes overlap too.
+//	score_threshold: A 0-D float tensor representing the threshold for deciding when to remove
+// boxes based on score.
+//
+// Returns A 1-D integer tensor of shape `[M]` representing the selected
+// indices from the boxes tensor, where `M <= max_output_size`.
+func NonMaxSuppressionWithOverlaps(scope *Scope, overlaps tf.Output, scores tf.Output, max_output_size tf.Output, overlap_threshold tf.Output, score_threshold tf.Output) (selected_indices tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "NonMaxSuppressionWithOverlaps",
+		Input: []tf.Input{
+			overlaps, scores, max_output_size, overlap_threshold, score_threshold,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Outputs all keys and values in the table.
+//
+// Arguments:
+//	table_handle: Handle to the table.
+//
+//
+//
+// Returns:
+//	keys: Vector of all keys present in the table.
+//	values: Tensor of all values in the table. Indexed in parallel with `keys`.
+func LookupTableExportV2(scope *Scope, table_handle tf.Output, Tkeys tf.DataType, Tvalues tf.DataType) (keys tf.Output, values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"Tkeys": Tkeys, "Tvalues": Tvalues}
+	opspec := tf.OpSpec{
+		Type: "LookupTableExportV2",
+		Input: []tf.Input{
+			table_handle,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// RetrieveTPUEmbeddingAdagradParametersAttr is an optional argument to RetrieveTPUEmbeddingAdagradParameters.
+type RetrieveTPUEmbeddingAdagradParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingAdagradParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingAdagradParametersTableId(value int64) RetrieveTPUEmbeddingAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingAdagradParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingAdagradParametersTableName(value string) RetrieveTPUEmbeddingAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingAdagradParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingAdagradParametersConfig(value string) RetrieveTPUEmbeddingAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve Adagrad embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the Adagrad optimization algorithm.
+//	accumulators: Parameter accumulators updated by the Adagrad optimization algorithm.
+func RetrieveTPUEmbeddingAdagradParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdagradParametersAttr) (parameters tf.Output, accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingAdagradParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Computes the sum along sparse segments of a tensor divided by the sqrt of N.
+//
+// N is the size of the segment being reduced.
+//
+// Like `SparseSegmentSqrtN`, but allows missing ids in `segment_ids`. If an id is
+// misisng, the `output` tensor at that position will be zeroed.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+// for an explanation of segments.
+//
+// Arguments:
+//
+//	indices: A 1-D tensor. Has same rank as `segment_ids`.
+//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+//	num_segments: Should equal the number of distinct segment IDs.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SparseSegmentSqrtNWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSegmentSqrtNWithNumSegments",
+		Input: []tf.Input{
+			data, indices, segment_ids, num_segments,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the Cholesky decomposition of one or more square matrices.
+//
+// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+// form square matrices.
+//
+// The input has to be symmetric and positive definite. Only the lower-triangular
+// part of the input will be used for this operation. The upper-triangular part
+// will not be read.
+//
+// The output is a tensor of the same shape as the input
+// containing the Cholesky decompositions for all input submatrices `[..., :, :]`.
+//
+// **Note**: The gradient computation on GPU is faster for large matrices but
+// not for large batch dimensions when the submatrices are small. In this
+// case it might be faster to use the CPU.
+//
+// Arguments:
+//	input: Shape is `[..., M, M]`.
+//
+// Returns Shape is `[..., M, M]`.
+func Cholesky(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Cholesky",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Splits a tensor into a list.
+//
+// list[i] corresponds to lengths[i] tensors from the input tensor.
+// The tensor must have rank at least 1 and contain exactly sum(lengths) elements.
+//
+// tensor: The input tensor.
+// element_shape: A shape compatible with that of elements in the tensor.
+// lengths: Vector of sizes of the 0th dimension of tensors in the list.
+// output_handle: The list.
+func TensorListSplit(scope *Scope, tensor tf.Output, element_shape tf.Output, lengths tf.Output) (output_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorListSplit",
+		Input: []tf.Input{
+			tensor, element_shape, lengths,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Check if the input matches the regex pattern.
+//
+// The input is a string tensor of any shape. The pattern is the
+// regular expression to be matched with every element of the input tensor.
+// The boolean values (True or False) of the output tensor indicate
+// if the input matches the regex pattern provided.
+//
+// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
+//
+// Arguments:
+//	input: A string tensor of the text to be processed.
+//	pattern: The regular expression to match the input.
+//
+// Returns A bool tensor with the same shape as `input`.
+func StaticRegexFullMatch(scope *Scope, input tf.Output, pattern string) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"pattern": pattern}
+	opspec := tf.OpSpec{
+		Type: "StaticRegexFullMatch",
+		Input: []tf.Input{
+			input,
 		},
 		Attrs: attrs,
 	}
@@ -20033,7 +16263,7 @@ type ParseSingleSequenceExampleAttr func(optionalAttr)
 // each context Feature given in context_sparse_keys.
 // Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
 // DT_INT64 (Int64List), and DT_STRING (BytesList).
-// If not specified, defaults to <>
+// If not specified, defaults to {}
 //
 // REQUIRES: len(value) >= 0
 func ParseSingleSequenceExampleContextSparseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr {
@@ -20043,7 +16273,7 @@ func ParseSingleSequenceExampleContextSparseTypes(value []tf.DataType) ParseSing
 }
 
 // ParseSingleSequenceExampleFeatureListDenseTypes sets the optional feature_list_dense_types attribute to value.
-// If not specified, defaults to <>
+// If not specified, defaults to {}
 //
 // REQUIRES: len(value) >= 0
 func ParseSingleSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr {
@@ -20059,7 +16289,7 @@ func ParseSingleSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseS
 // The number of elements in the Feature corresponding to context_dense_key[j]
 // must always equal context_dense_shapes[j].NumEntries().
 // The shape of context_dense_values[j] will match context_dense_shapes[j].
-// If not specified, defaults to <>
+// If not specified, defaults to {}
 //
 // REQUIRES: len(value) >= 0
 func ParseSingleSequenceExampleContextDenseShapes(value []tf.Shape) ParseSingleSequenceExampleAttr {
@@ -20074,7 +16304,7 @@ func ParseSingleSequenceExampleContextDenseShapes(value []tf.Shape) ParseSingleS
 // of data in each FeatureList given in feature_list_sparse_keys.
 // Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
 // DT_INT64 (Int64List), and DT_STRING (BytesList).
-// If not specified, defaults to <>
+// If not specified, defaults to {}
 //
 // REQUIRES: len(value) >= 0
 func ParseSingleSequenceExampleFeatureListSparseTypes(value []tf.DataType) ParseSingleSequenceExampleAttr {
@@ -20090,7 +16320,7 @@ func ParseSingleSequenceExampleFeatureListSparseTypes(value []tf.DataType) Parse
 // The shape of each Feature in the FeatureList corresponding to
 // feature_list_dense_key[j] must always equal
 // feature_list_dense_shapes[j].NumEntries().
-// If not specified, defaults to <>
+// If not specified, defaults to {}
 //
 // REQUIRES: len(value) >= 0
 func ParseSingleSequenceExampleFeatureListDenseShapes(value []tf.Shape) ParseSingleSequenceExampleAttr {
@@ -20188,43 +16418,42 @@ func ParseSingleSequenceExample(scope *Scope, serialized tf.Output, feature_list
 	return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values
 }
 
-// An Op to sum inputs across replicated TPU instances.
-//
-// Each instance supplies its own input.
-//
-// For example, suppose there are 8 TPU instances: `[A, B, C, D, E, F, G, H]`.
-// Passing group_assignment=`[[0,2,4,6],[1,3,5,7]]` sets `A, C, E, G` as group 0,
-// and `B, D, F, H` as group 1. Thus we get the outputs:
-// `[A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H]`.
+// Computes the number of elements in the given table.
 //
 // Arguments:
-//	input: The local input to the sum.
-//	group_assignment: An int32 tensor with shape
-// [num_groups, num_replicas_per_group]. `group_assignment[i]` represents the
-// replica ids in the ith subgroup.
+//	table_handle: Handle to the table.
 //
-// Returns The sum of all the distributed inputs.
-func CrossReplicaSum(scope *Scope, input tf.Output, group_assignment tf.Output) (output tf.Output) {
+// Returns Scalar that contains number of elements in the table.
+func LookupTableSizeV2(scope *Scope, table_handle tf.Output) (size tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "CrossReplicaSum",
+		Type: "LookupTableSizeV2",
 		Input: []tf.Input{
-			input, group_assignment,
+			table_handle,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns the truth value of NOT x element-wise.
-func LogicalNot(scope *Scope, x tf.Output) (y tf.Output) {
+// Computes inverse hyperbolic sine of x element-wise.
+//
+//   Given an input tensor, this function computes inverse hyperbolic sine
+//   for every element in the tensor. Both input and output has a range of
+//   `[-inf, inf]`.
+//
+//   ```python
+//   x = tf.constant([-float("inf"), -2, -0.5, 1, 1.2, 200, 10000, float("inf")])
+//   tf.math.asinh(x) ==> [-inf -1.4436355 -0.4812118 0.8813736 1.0159732 5.991471 9.903487 inf]
+//   ```
+func Asinh(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "LogicalNot",
+		Type: "Asinh",
 		Input: []tf.Input{
 			x,
 		},
@@ -20233,133 +16462,1649 @@ func LogicalNot(scope *Scope, x tf.Output) (y tf.Output) {
 	return op.Output(0)
 }
 
-// Restores tensors from a V2 checkpoint.
+// Looks up keys in a table, outputs the corresponding values.
 //
-// For backward compatibility with the V1 format, this Op currently allows
-// restoring from a V1 checkpoint as well:
-//   - This Op first attempts to find the V2 index file pointed to by "prefix", and
-//     if found proceed to read it as a V2 checkpoint;
-//   - Otherwise the V1 read path is invoked.
-// Relying on this behavior is not recommended, as the ability to fall back to read
-// V1 might be deprecated and eventually removed.
+// The tensor `keys` must of the same type as the keys of the table.
+// The output `values` is of the type of the table values.
 //
-// By default, restores the named tensors in full.  If the caller wishes to restore
-// specific slices of stored tensors, "shape_and_slices" should be non-empty
-// strings and correspondingly well-formed.
-//
-// Callers must ensure all the named tensors are indeed stored in the checkpoint.
+// The scalar `default_value` is the value output for keys not present in the
+// table. It must also be of the same type as the table values.
 //
 // Arguments:
-//	prefix: Must have a single element.  The prefix of a V2 checkpoint.
-//	tensor_names: shape {N}.  The names of the tensors to be restored.
-//	shape_and_slices: shape {N}.  The slice specs of the tensors to be restored.
-// Empty strings indicate that they are non-partitioned tensors.
-//	dtypes: shape {N}.  The list of expected dtype for the tensors.  Must match
-// those stored in the checkpoint.
+//	table_handle: Handle to the table.
+//	keys: Any shape.  Keys to look up.
 //
-// Returns shape {N}.  The restored tensors, whose shapes are read from the
-// checkpoint directly.
-func RestoreV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, dtypes []tf.DataType) (tensors []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	opspec := tf.OpSpec{
-		Type: "RestoreV2",
-		Input: []tf.Input{
-			prefix, tensor_names, shape_and_slices,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if tensors, idx, err = makeOutputList(op, idx, "tensors"); err != nil {
-		scope.UpdateErr("RestoreV2", err)
-		return
-	}
-	return tensors
-}
-
-// Produce a string tensor that encodes the state of a Reader.
 //
-// Not all Readers support being serialized, so this can produce an
-// Unimplemented error.
-//
-// Arguments:
-//	reader_handle: Handle to a Reader.
-func ReaderSerializeStateV2(scope *Scope, reader_handle tf.Output) (state tf.Output) {
+// Returns Same shape as `keys`.  Values found in the table, or `default_values`
+// for missing keys.
+func LookupTableFindV2(scope *Scope, table_handle tf.Output, keys tf.Output, default_value tf.Output) (values tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ReaderSerializeStateV2",
+		Type: "LookupTableFindV2",
 		Input: []tf.Input{
-			reader_handle,
+			table_handle, keys, default_value,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Connects outputs of an N-way replicated computation to N outputs.
-func TPUReplicatedOutput(scope *Scope, input tf.Output, num_replicas int64) (outputs []tf.Output) {
+// MaxPoolGradAttr is an optional argument to MaxPoolGrad.
+type MaxPoolGradAttr func(optionalAttr)
+
+// MaxPoolGradDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func MaxPoolGradDataFormat(value string) MaxPoolGradAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Computes gradients of the maxpooling function.
+//
+// Arguments:
+//	orig_input: The original input tensor.
+//	orig_output: The original output tensor.
+//	grad: 4-D.  Gradients w.r.t. the output of `max_pool`.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns Gradients w.r.t. the input to `max_pool`.
+func MaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_replicas": num_replicas}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "TPUReplicatedOutput",
+		Type: "MaxPoolGrad",
+		Input: []tf.Input{
+			orig_input, orig_output, grad,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Rolls the elements of a tensor along an axis.
+//
+// The elements are shifted positively (towards larger indices) by the offset of
+// `shift` along the dimension of `axis`. Negative `shift` values will shift
+// elements in the opposite direction. Elements that roll passed the last position
+// will wrap around to the first and vice versa. Multiple shifts along multiple
+// axes may be specified.
+//
+// For example:
+//
+// ```
+// # 't' is [0, 1, 2, 3, 4]
+// roll(t, shift=2, axis=0) ==> [3, 4, 0, 1, 2]
+//
+// # shifting along multiple dimensions
+// # 't' is [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]
+// roll(t, shift=[1, -2], axis=[0, 1]) ==> [[7, 8, 9, 5, 6], [2, 3, 4, 0, 1]]
+//
+// # shifting along the same axis multiple times
+// # 't' is [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]
+// roll(t, shift=[2, -3], axis=[1, 1]) ==> [[1, 2, 3, 4, 0], [6, 7, 8, 9, 5]]
+// ```
+//
+// Arguments:
+//
+//	shift: Dimension must be 0-D or 1-D. `shift[i]` specifies the number of places by which
+// elements are shifted positively (towards larger indices) along the dimension
+// specified by `axis[i]`. Negative shifts will roll the elements in the opposite
+// direction.
+//	axis: Dimension must be 0-D or 1-D. `axis[i]` specifies the dimension that the shift
+// `shift[i]` should occur. If the same axis is referenced more than once, the
+// total shift for that axis will be the sum of all the shifts that belong to that
+// axis.
+//
+// Returns Has the same shape and size as the input. The elements are shifted
+// positively (towards larger indices) by the offsets of `shift` along the
+// dimensions of `axis`.
+func Roll(scope *Scope, input tf.Output, shift tf.Output, axis tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Roll",
+		Input: []tf.Input{
+			input, shift, axis,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// QuantizedReluAttr is an optional argument to QuantizedRelu.
+type QuantizedReluAttr func(optionalAttr)
+
+// QuantizedReluOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_QUINT8
+func QuantizedReluOutType(value tf.DataType) QuantizedReluAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Computes Quantized Rectified Linear: `max(features, 0)`
+//
+// Arguments:
+//
+//	min_features: The float value that the lowest quantized value represents.
+//	max_features: The float value that the highest quantized value represents.
+//
+// Returns:
+//	activations: Has the same output shape as "features".
+//	min_activations: The float value that the lowest quantized value represents.
+//	max_activations: The float value that the highest quantized value represents.
+func QuantizedRelu(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizedRelu",
+		Input: []tf.Input{
+			features, min_features, max_features,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Returns the next representable value of `x1` in the direction of `x2`, element-wise.
+//
+// This operation returns the same result as the C++ std::nextafter function.
+//
+// It can also return a subnormal number.
+//
+// @compatibility(cpp)
+// Equivalent to C++ std::nextafter function.
+// @end_compatibility
+func NextAfter(scope *Scope, x1 tf.Output, x2 tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "NextAfter",
+		Input: []tf.Input{
+			x1, x2,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Draw bounding boxes on a batch of images.
+//
+// Outputs a copy of `images` but draws on top of the pixels zero or more bounding
+// boxes specified by the locations in `boxes`. The coordinates of the each
+// bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The
+// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
+// height of the underlying image.
+//
+// For example, if an image is 100 x 200 pixels (height x width) and the bounding
+// box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
+// the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates).
+//
+// Parts of the bounding box may fall outside the image.
+//
+// Arguments:
+//	images: 4-D with shape `[batch, height, width, depth]`. A batch of images.
+//	boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding
+// boxes.
+//
+// Returns 4-D with the same shape as `images`. The batch of input images with
+// bounding boxes drawn on the images.
+func DrawBoundingBoxes(scope *Scope, images tf.Output, boxes tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DrawBoundingBoxes",
+		Input: []tf.Input{
+			images, boxes,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Bucketizes 'input' based on 'boundaries'.
+//
+// For example, if the inputs are
+//     boundaries = [0, 10, 100]
+//     input = [[-5, 10000]
+//              [150,   10]
+//              [5,    100]]
+//
+// then the output will be
+//     output = [[0, 3]
+//               [3, 2]
+//               [1, 3]]
+//
+// Arguments:
+//	input: Any shape of Tensor contains with int or float type.
+//	boundaries: A sorted list of floats gives the boundary of the buckets.
+//
+// Returns Same shape with 'input', each value of input replaced with bucket index.
+//
+// @compatibility(numpy)
+// Equivalent to np.digitize.
+// @end_compatibility
+func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"boundaries": boundaries}
+	opspec := tf.OpSpec{
+		Type: "Bucketize",
 		Input: []tf.Input{
 			input,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
-		scope.UpdateErr("TPUReplicatedOutput", err)
-		return
-	}
-	return outputs
+	return op.Output(0)
 }
 
-// Encode audio data using the WAV file format.
+// Computes the log of the absolute value of `Gamma(x)` element-wise.
 //
-// This operation will generate a string suitable to be saved out to create a .wav
-// audio file. It will be encoded in the 16-bit PCM format. It takes in float
-// values in the range -1.0f to 1.0f, and any outside that value will be clamped to
-// that range.
+//   For positive numbers, this function computes log((input - 1)!) for every element in the tensor.
+//   `lgamma(5) = log((5-1)!) = log(4!) = log(24) = 3.1780539`
 //
-// `audio` is a 2-D float Tensor of shape `[length, channels]`.
-// `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100).
+// Example:
 //
-// Arguments:
-//	audio: 2-D with shape `[length, channels]`.
-//	sample_rate: Scalar containing the sample frequency.
-//
-// Returns 0-D. WAV-encoded file contents.
-func EncodeWav(scope *Scope, audio tf.Output, sample_rate tf.Output) (contents tf.Output) {
+// ```python
+// x = tf.constant([0, 0.5, 1, 4.5, -4, -5.6])
+// tf.math.lgamma(x) ==> [inf, 0.5723649, 0., 2.4537368, inf, -4.6477685]
+// ```
+func Lgamma(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "EncodeWav",
+		Type: "Lgamma",
 		Input: []tf.Input{
-			audio, sample_rate,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
+// Reads the value of a variable.
+//
+// The tensor returned by this operation is immutable.
+//
+// The value returned by this operation is guaranteed to be influenced by all the
+// writes on which this operation depends directly or indirectly, and to not be
+// influenced by any of the writes which depend directly or indirectly on this
+// operation.
+//
+// Arguments:
+//	resource: handle to the resource in which to store the variable.
+//	dtype: the dtype of the value.
+func ReadVariableOp(scope *Scope, resource tf.Output, dtype tf.DataType) (value tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	opspec := tf.OpSpec{
+		Type: "ReadVariableOp",
+		Input: []tf.Input{
+			resource,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes a range that covers the actual values present in a quantized tensor.
+//
+// Given a quantized tensor described by `(input, input_min, input_max)`, outputs a
+// range that covers the actual values present in that tensor. This op is typically
+// used to produce the `requested_output_min` and `requested_output_max` for
+// `Requantize`.
+//
+// Arguments:
+//
+//	input_min: The float value that the minimum quantized input value represents.
+//	input_max: The float value that the maximum quantized input value represents.
+//
+// Returns:
+//	output_min: The computed min output.
+//	output_max: the computed max output.
+func RequantizationRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output) (output_min tf.Output, output_max tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "RequantizationRange",
+		Input: []tf.Input{
+			input, input_min, input_max,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Compare values of `input` to `threshold` and pack resulting bits into a `uint8`.
+//
+// Each comparison returns a boolean `true` (if `input_value > threshold`)
+// or and `false` otherwise.
+//
+// This operation is useful for Locality-Sensitive-Hashing (LSH) and other
+// algorithms that use hashing approximations of cosine and `L2` distances;
+// codes can be generated from an input via:
+//
+// ```python
+// codebook_size = 50
+// codebook_bits = codebook_size * 32
+// codebook = tf.get_variable('codebook', [x.shape[-1].value, codebook_bits],
+//                            dtype=x.dtype,
+//                            initializer=tf.orthogonal_initializer())
+// codes = compare_and_threshold(tf.matmul(x, codebook), threshold=0.)
+// codes = tf.bitcast(codes, tf.int32)  # go from uint8 to int32
+// # now codes has shape x.shape[:-1] + [codebook_size]
+// ```
+//
+// **NOTE**: Currently, the innermost dimension of the tensor must be divisible
+// by 8.
+//
+// Given an `input` shaped `[s0, s1, ..., s_n]`, the output is
+// a `uint8` tensor shaped `[s0, s1, ..., s_n / 8]`.
+//
+// Arguments:
+//	input: Values to compare against `threshold` and bitpack.
+//	threshold: Threshold to compare against.
+//
+// Returns The bitpacked comparisons.
+func CompareAndBitpack(scope *Scope, input tf.Output, threshold tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "CompareAndBitpack",
+		Input: []tf.Input{
+			input, threshold,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Tensor contraction according to Einstein summation convention.
+//
+// Implements generalized Tensor contraction and reduction. Each input Tensor must
+// have a corresponding input subscript appearing in the comma-separated left-hand
+// side of the equation. The right-hand side of the equation consists of the
+// output subscript. The input subscripts and the output subscript should consist
+// of zero or more named axis labels and at most one ellipsis (`...`).
+//
+// The named axis labels may be any single character other than those having
+// special meaning, namely `,.->`. The behavior of this Op is undefined if it
+// receives an ill-formatted equation; since the validation is done at
+// graph-building time, we omit format validation checks at runtime.
+//
+// Note: This Op is *not* intended to be called by the user; instead users should
+// call `tf.einsum` directly. It is a hidden Op used by `tf.einsum`.
+//
+// Operations are applied to the input(s) according to the following rules:
+//
+//  (a) Generalized Diagonals: For input dimensions corresponding to axis labels
+//      appearing more than once in the same input subscript, we take the
+//      generalized (`k`-dimensional) diagonal.
+//      For example, in the equation `iii->i` with input shape `[3, 3, 3]`, the
+//      generalized diagonal would consist of `3` elements at indices `(0, 0, 0)`,
+//      `(1, 1, 1)` and `(2, 2, 2)` to create a Tensor of shape `[3]`.
+//
+//  (b) Reduction: Axes corresponding to labels appearing only in one input
+//      subscript but not in the output subscript are summed over prior to Tensor
+//      contraction.
+//      For example, in the equation `ab,bc->b`, the axis labels `a` and `c` are
+//      the reduction axis labels.
+//
+//  (c) Batch Dimensions: Axes corresponding to labels appearing in each of the
+//      input subscripts and also in the output subscript make up the batch
+//      dimensions in Tensor contraction. Unnamed axis labels corresponding to
+//      ellipsis (`...`) also correspond to batch dimensions.
+//      For example, for the equation denoting batch matrix multiplication,
+//      `bij,bjk->bik`, the axis label `b` corresponds to a batch dimension.
+//
+//  (d) Contraction: In case of binary einsum, axes corresponding to labels
+//      appearing in two different inputs (and not in the output) are contracted
+//      against each other.
+//      Considering the batch matrix multiplication equation again
+//      (`bij,bjk->bik`), the contracted axis label is `j`.
+//
+//  (e) Expand Diagonal: If the output subcripts contain repeated (explicit) axis
+//      labels, the opposite operation of (a) is applied. For example, in the
+//      equation `i->iii`, and input shape `[3]`, the output of shape `[3, 3, 3]`
+//      are all zeros, except for the (generalized) diagonal which is populated
+//      with values from the input.
+//      Note: This operation is not supported by `np.einsum` or `tf.einsum`; it is
+//      provided to enable computing the symbolic gradient of `tf.einsum`.
+//
+// The output subcripts must contain only labels appearing in at least one of the
+// input subscripts. Furthermore, all dimensions mapping to the same axis label
+// must be equal.
+//
+// Any of the input and output subscripts may contain at most a single ellipsis
+// (`...`). These ellipsis are mapped against dimensions not corresponding to any
+// named axis label. If two inputs contain ellipsis, then they are broadcasted
+// according to standard NumPy broadcasting
+// [rules](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html).
+//
+// The broadcasted dimensions are placed in the corresponding location of the
+// ellipsis in the output subscript. If the broadcasted dimensions are non-empty
+// and the output subcripts do not contain ellipsis, then an InvalidArgument error
+// is raised.
+//
+// @compatibility(numpy)
+// Similar to [`numpy.einsum`](https://docs.scipy.org/doc/numpy/reference/generated/numpy.einsum.html).
+//
+// Comparison with `numpy.einsum`:
+//
+//  * This Op only supports unary and binary forms of `numpy.einsum`.
+//  * This Op does not support implicit form. (i.e. equations without `->`).
+//  * This Op also supports repeated indices in the output subscript, which is not
+//    supported by `numpy.einsum`.
+// @end_compatibility
+//
+//
+// Arguments:
+//	inputs: List of 1 or 2 Tensors.
+//	equation: String describing the Einstein Summation operation; in the format of np.einsum.
+//
+// Returns Output Tensor with shape depending upon `equation`.
+func Einsum(scope *Scope, inputs []tf.Output, equation string) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"equation": equation}
+	opspec := tf.OpSpec{
+		Type: "Einsum",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Convert the quantized 'input' tensor into a lower-precision 'output', using the
+//
+// actual distribution of the values to maximize the usage of the lower bit depth
+// and adjusting the output min and max ranges accordingly.
+//
+// [input_min, input_max] are scalar floats that specify the range for the float
+// interpretation of the 'input' data. For example, if input_min is -1.0f and
+// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
+// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
+//
+// This operator tries to squeeze as much precision as possible into an output with
+// a lower bit depth by calculating the actual min and max values found in the
+// data. For example, maybe that quint16 input has no values lower than 16,384 and
+// none higher than 49,152. That means only half the range is actually needed, all
+// the float interpretations are between -0.5f and 0.5f, so if we want to compress
+// the data into a quint8 output, we can use that range rather than the theoretical
+// -1.0f to 1.0f that is suggested by the input min and max.
+//
+// In practice, this is most useful for taking output from operations like
+// QuantizedMatMul that can produce higher bit-depth outputs than their inputs and
+// may have large potential output ranges, but in practice have a distribution of
+// input values that only uses a small fraction of the possible range. By feeding
+// that output into this operator, we can reduce it from 32 bits down to 8 with
+// minimal loss of accuracy.
+//
+// Arguments:
+//
+//	input_min: The float value that the minimum quantized input value represents.
+//	input_max: The float value that the maximum quantized input value represents.
+//	out_type: The type of the output. Should be a lower bit depth than Tinput.
+//
+// Returns:
+//	output
+//	output_min: The float value that the minimum quantized output value represents.
+//	output_max: The float value that the maximum quantized output value represents.
+func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"out_type": out_type}
+	opspec := tf.OpSpec{
+		Type: "QuantizeDownAndShrinkRange",
+		Input: []tf.Input{
+			input, input_min, input_max,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Converts each string in the input Tensor to its hash mod by a number of buckets.
+//
+// The hash function is deterministic on the content of the string within the
+// process.
+//
+// Note that the hash function may change from time to time.
+// This functionality will be deprecated and it's recommended to use
+// `tf.string_to_hash_bucket_fast()` or `tf.string_to_hash_bucket_strong()`.
+//
+// Arguments:
+//
+//	num_buckets: The number of buckets.
+//
+// Returns A Tensor of the same shape as the input `string_tensor`.
+func StringToHashBucket(scope *Scope, string_tensor tf.Output, num_buckets int64) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_buckets": num_buckets}
+	opspec := tf.OpSpec{
+		Type: "StringToHashBucket",
+		Input: []tf.Input{
+			string_tensor,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes softsign: `features / (abs(features) + 1)`.
+func Softsign(scope *Scope, features tf.Output) (activations tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Softsign",
+		Input: []tf.Input{
+			features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// QuantizedAddAttr is an optional argument to QuantizedAdd.
+type QuantizedAddAttr func(optionalAttr)
+
+// QuantizedAddToutput sets the optional Toutput attribute to value.
+// If not specified, defaults to DT_QINT32
+func QuantizedAddToutput(value tf.DataType) QuantizedAddAttr {
+	return func(m optionalAttr) {
+		m["Toutput"] = value
+	}
+}
+
+// Returns x + y element-wise, working on quantized buffers.
+//
+// Arguments:
+//
+//
+//	min_x: The float value that the lowest quantized `x` value represents.
+//	max_x: The float value that the highest quantized `x` value represents.
+//	min_y: The float value that the lowest quantized `y` value represents.
+//	max_y: The float value that the highest quantized `y` value represents.
+//
+// Returns:
+//	z
+//	min_z: The float value that the lowest quantized output value represents.
+//	max_z: The float value that the highest quantized output value represents.
+//
+// *NOTE*: `QuantizedAdd` supports limited forms of broadcasting. More about
+// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func QuantizedAdd(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedAddAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizedAdd",
+		Input: []tf.Input{
+			x, y, min_x, max_x, min_y, max_y,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Creates a dataset that shuffles and repeats elements from `input_dataset`
+//
+// pseudorandomly.
+//
+// Arguments:
+//
+//	buffer_size: The number of output elements to buffer in an iterator over
+// this dataset. Compare with the `min_after_dequeue` attr when creating a
+// `RandomShuffleQueue`.
+//	seed: A scalar seed for the random number generator. If either `seed` or
+// `seed2` is set to be non-zero, the random number generator is seeded
+// by the given seed.  Otherwise, a random seed is used.
+//	seed2: A second scalar seed to avoid seed collision.
+//	count: A scalar representing the number of times the underlying dataset
+// should be repeated. The default is `-1`, which results in infinite repetition.
+//
+//
+func ShuffleAndRepeatDataset(scope *Scope, input_dataset tf.Output, buffer_size tf.Output, seed tf.Output, seed2 tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ShuffleAndRepeatDataset",
+		Input: []tf.Input{
+			input_dataset, buffer_size, seed, seed2, count,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// QuantizedMulAttr is an optional argument to QuantizedMul.
+type QuantizedMulAttr func(optionalAttr)
+
+// QuantizedMulToutput sets the optional Toutput attribute to value.
+// If not specified, defaults to DT_QINT32
+func QuantizedMulToutput(value tf.DataType) QuantizedMulAttr {
+	return func(m optionalAttr) {
+		m["Toutput"] = value
+	}
+}
+
+// Returns x * y element-wise, working on quantized buffers.
+//
+// Arguments:
+//
+//
+//	min_x: The float value that the lowest quantized `x` value represents.
+//	max_x: The float value that the highest quantized `x` value represents.
+//	min_y: The float value that the lowest quantized `y` value represents.
+//	max_y: The float value that the highest quantized `y` value represents.
+//
+// Returns:
+//	z
+//	min_z: The float value that the lowest quantized output value represents.
+//	max_z: The float value that the highest quantized output value represents.
+//
+// *NOTE*: `QuantizedMul` supports limited forms of broadcasting. More about
+// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func QuantizedMul(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedMulAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizedMul",
+		Input: []tf.Input{
+			x, y, min_x, max_x, min_y, max_y,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// CumulativeLogsumexpAttr is an optional argument to CumulativeLogsumexp.
+type CumulativeLogsumexpAttr func(optionalAttr)
+
+// CumulativeLogsumexpExclusive sets the optional exclusive attribute to value.
+//
+// value: If `True`, perform exclusive cumulative log-sum-exp.
+// If not specified, defaults to false
+func CumulativeLogsumexpExclusive(value bool) CumulativeLogsumexpAttr {
+	return func(m optionalAttr) {
+		m["exclusive"] = value
+	}
+}
+
+// CumulativeLogsumexpReverse sets the optional reverse attribute to value.
+//
+// value: A `bool` (default: False).
+// If not specified, defaults to false
+func CumulativeLogsumexpReverse(value bool) CumulativeLogsumexpAttr {
+	return func(m optionalAttr) {
+		m["reverse"] = value
+	}
+}
+
+// Compute the cumulative product of the tensor `x` along `axis`.
+//
+// By default, this op performs an inclusive cumulative log-sum-exp,
+// which means that the first
+// element of the input is identical to the first element of the output:
+// ```python
+// tf.math.cumulative_logsumexp([a, b, c])  # => [a, log(exp(a) + exp(b)), log(exp(a) + exp(b) + exp(c))]
+// ```
+//
+// By setting the `exclusive` kwarg to `True`, an exclusive cumulative log-sum-exp is
+// performed instead:
+// ```python
+// tf.cumulative_logsumexp([a, b, c], exclusive=True)  # => [-inf, a, log(exp(a) * exp(b))]
+// ```
+// Note that the neutral element of the log-sum-exp operation is `-inf`,
+// however, for performance reasons, the minimal value representable by the
+// floating point type is used instead.
+//
+// By setting the `reverse` kwarg to `True`, the cumulative log-sum-exp is performed in the
+// opposite direction.
+//
+// Arguments:
+//	x: A `Tensor`. Must be one of the following types: `float16`, `float32`, `float64`.
+//	axis: A `Tensor` of type `int32` (default: 0). Must be in the range
+// `[-rank(x), rank(x))`.
+func CumulativeLogsumexp(scope *Scope, x tf.Output, axis tf.Output, optional ...CumulativeLogsumexpAttr) (out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CumulativeLogsumexp",
+		Input: []tf.Input{
+			x, axis,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the complex conjugate of a complex number.
+//
+// Given a tensor `input` of complex numbers, this operation returns a tensor of
+// complex numbers that are the complex conjugate of each element in `input`. The
+// complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the
+// real part and *b* is the imaginary part.
+//
+// The complex conjugate returned by this operation is of the form \\(a - bj\\).
+//
+// For example:
+//
+// ```
+// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+// tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
+// ```
+func Conj(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Conj",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ImagAttr is an optional argument to Imag.
+type ImagAttr func(optionalAttr)
+
+// ImagTout sets the optional Tout attribute to value.
+// If not specified, defaults to DT_FLOAT
+func ImagTout(value tf.DataType) ImagAttr {
+	return func(m optionalAttr) {
+		m["Tout"] = value
+	}
+}
+
+// Returns the imaginary part of a complex number.
+//
+// Given a tensor `input` of complex numbers, this operation returns a tensor of
+// type `float` that is the imaginary part of each element in `input`. All
+// elements in `input` must be complex numbers of the form \\(a + bj\\), where *a*
+// is the real part and *b* is the imaginary part returned by this operation.
+//
+// For example:
+//
+// ```
+// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+// tf.imag(input) ==> [4.75, 5.75]
+// ```
+func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Imag",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RealAttr is an optional argument to Real.
+type RealAttr func(optionalAttr)
+
+// RealTout sets the optional Tout attribute to value.
+// If not specified, defaults to DT_FLOAT
+func RealTout(value tf.DataType) RealAttr {
+	return func(m optionalAttr) {
+		m["Tout"] = value
+	}
+}
+
+// Returns the real part of a complex number.
+//
+// Given a tensor `input` of complex numbers, this operation returns a tensor of
+// type `float` that is the real part of each element in `input`. All elements in
+// `input` must be complex numbers of the form \\(a + bj\\), where *a* is the real
+//  part returned by this operation and *b* is the imaginary part.
+//
+// For example:
+//
+// ```
+// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+// tf.real(input) ==> [-2.25, 3.25]
+// ```
+func Real(scope *Scope, input tf.Output, optional ...RealAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Real",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// DequantizeAttr is an optional argument to Dequantize.
+type DequantizeAttr func(optionalAttr)
+
+// DequantizeMode sets the optional mode attribute to value.
+// If not specified, defaults to "MIN_COMBINED"
+func DequantizeMode(value string) DequantizeAttr {
+	return func(m optionalAttr) {
+		m["mode"] = value
+	}
+}
+
+// DequantizeNarrowRange sets the optional narrow_range attribute to value.
+// If not specified, defaults to false
+func DequantizeNarrowRange(value bool) DequantizeAttr {
+	return func(m optionalAttr) {
+		m["narrow_range"] = value
+	}
+}
+
+// DequantizeAxis sets the optional axis attribute to value.
+// If not specified, defaults to -1
+func DequantizeAxis(value int64) DequantizeAttr {
+	return func(m optionalAttr) {
+		m["axis"] = value
+	}
+}
+
+// Dequantize the 'input' tensor into a float Tensor.
+//
+// [min_range, max_range] are scalar floats that specify the range for
+// the output. The 'mode' attribute controls exactly which calculations are
+// used to convert the float values to their quantized equivalents.
+//
+// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
+//
+// ```
+// if T == qint8: in[i] += (range(T) + 1)/ 2.0
+// out[i] = min_range + (in[i]* (max_range - min_range) / range(T))
+// ```
+// here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
+//
+// *MIN_COMBINED Mode Example*
+//
+// If the input comes from a QuantizedRelu6, the output type is
+// quint8 (range of 0-255) but the possible range of QuantizedRelu6 is
+// 0-6.  The min_range and max_range values are therefore 0.0 and 6.0.
+// Dequantize on quint8 will take each value, cast to float, and multiply
+// by 6 / 255.
+// Note that if quantizedtype is qint8, the operation will additionally add
+// each value by 128 prior to casting.
+//
+// If the mode is 'MIN_FIRST', then this approach is used:
+//
+// ```c++
+// num_discrete_values = 1 << (# of bits in T)
+// range_adjust = num_discrete_values / (num_discrete_values - 1)
+// range = (range_max - range_min) * range_adjust
+// range_scale = range / num_discrete_values
+// const double offset_input = static_cast<double>(input) - lowest_quantized;
+// result = range_min + ((input - numeric_limits<T>::min()) * range_scale)
+// ```
+//
+// If the mode is `SCALED`, dequantization is performed by multiplying each
+// input value by a scaling_factor. (Thus an input of 0 always maps to 0.0).
+//
+// The scaling_factor is determined from `min_range`, `max_range`, and
+// `narrow_range` in a way that is compatible with `QuantizeAndDequantize{V2|V3}`
+// and `QuantizeV2`, using the following algorithm:
+//
+// ```c++
+//
+//   const int min_expected_T = std::numeric_limits<T>::min() +
+//     (narrow_range ? 1 : 0);
+//   const int max_expected_T = std::numeric_limits<T>::max();
+//   const float max_expected_T = std::numeric_limits<float>::max();
+//
+//   const float scale_factor =
+//     (std::numeric_limits<T>::min() == 0) ? (max_range / max_expected_T)
+//                                          : std::max(min_range / min_expected_T,
+//                                                     max_range / max_expected_T);
+// ```
+//
+// Arguments:
+//
+//	min_range: The minimum scalar value possibly produced for the input.
+//	max_range: The maximum scalar value possibly produced for the input.
+func Dequantize(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, optional ...DequantizeAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Dequantize",
+		Input: []tf.Input{
+			input, min_range, max_range,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ComplexAttr is an optional argument to Complex.
+type ComplexAttr func(optionalAttr)
+
+// ComplexTout sets the optional Tout attribute to value.
+// If not specified, defaults to DT_COMPLEX64
+func ComplexTout(value tf.DataType) ComplexAttr {
+	return func(m optionalAttr) {
+		m["Tout"] = value
+	}
+}
+
+// Converts two real numbers to a complex number.
+//
+// Given a tensor `real` representing the real part of a complex number, and a
+// tensor `imag` representing the imaginary part of a complex number, this
+// operation returns complex numbers elementwise of the form \\(a + bj\\), where
+// *a* represents the `real` part and *b* represents the `imag` part.
+//
+// The input tensors `real` and `imag` must have the same shape.
+//
+// For example:
+//
+// ```
+// # tensor 'real' is [2.25, 3.25]
+// # tensor `imag` is [4.75, 5.75]
+// tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]]
+// ```
+func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAttr) (out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Complex",
+		Input: []tf.Input{
+			real, imag,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// CudnnRNNCanonicalToParamsV2Attr is an optional argument to CudnnRNNCanonicalToParamsV2.
+type CudnnRNNCanonicalToParamsV2Attr func(optionalAttr)
+
+// CudnnRNNCanonicalToParamsV2RnnMode sets the optional rnn_mode attribute to value.
+// If not specified, defaults to "lstm"
+func CudnnRNNCanonicalToParamsV2RnnMode(value string) CudnnRNNCanonicalToParamsV2Attr {
+	return func(m optionalAttr) {
+		m["rnn_mode"] = value
+	}
+}
+
+// CudnnRNNCanonicalToParamsV2InputMode sets the optional input_mode attribute to value.
+// If not specified, defaults to "linear_input"
+func CudnnRNNCanonicalToParamsV2InputMode(value string) CudnnRNNCanonicalToParamsV2Attr {
+	return func(m optionalAttr) {
+		m["input_mode"] = value
+	}
+}
+
+// CudnnRNNCanonicalToParamsV2Direction sets the optional direction attribute to value.
+// If not specified, defaults to "unidirectional"
+func CudnnRNNCanonicalToParamsV2Direction(value string) CudnnRNNCanonicalToParamsV2Attr {
+	return func(m optionalAttr) {
+		m["direction"] = value
+	}
+}
+
+// CudnnRNNCanonicalToParamsV2Dropout sets the optional dropout attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNCanonicalToParamsV2Dropout(value float32) CudnnRNNCanonicalToParamsV2Attr {
+	return func(m optionalAttr) {
+		m["dropout"] = value
+	}
+}
+
+// CudnnRNNCanonicalToParamsV2Seed sets the optional seed attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNCanonicalToParamsV2Seed(value int64) CudnnRNNCanonicalToParamsV2Attr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// CudnnRNNCanonicalToParamsV2Seed2 sets the optional seed2 attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNCanonicalToParamsV2Seed2(value int64) CudnnRNNCanonicalToParamsV2Attr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// CudnnRNNCanonicalToParamsV2NumProj sets the optional num_proj attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNCanonicalToParamsV2NumProj(value int64) CudnnRNNCanonicalToParamsV2Attr {
+	return func(m optionalAttr) {
+		m["num_proj"] = value
+	}
+}
+
+// Converts CudnnRNN params from canonical form to usable form. It supports the projection in LSTM.
+//
+// Writes a set of weights into the opaque params buffer so they can be used in
+// upcoming training or inferences.
+//
+// Note that the params buffer may not be compatible across different GPUs. So any
+// save and restoration should be converted to and from the canonical weights and
+// biases.
+//
+// num_layers: Specifies the number of layers in the RNN model.
+// num_units: Specifies the size of the hidden state.
+// input_size: Specifies the size of the input state.
+// weights: the canonical form of weights that can be used for saving
+//     and restoration. They are more likely to be compatible across different
+//     generations.
+// biases: the canonical form of biases that can be used for saving
+//     and restoration. They are more likely to be compatible across different
+//     generations.
+// num_params_weigths: number of weight parameter matrix for all layers.
+// num_params_biases: number of bias parameter vector for all layers.
+// rnn_mode: Indicates the type of the RNN model.
+// input_mode: Indicate whether there is a linear projection between the input and
+//     The actual computation before the first layer. 'skip_input' is only allowed
+//     when input_size == num_units; 'auto_select' implies 'skip_input' when
+//     input_size == num_units; otherwise, it implies 'linear_input'.
+// direction: Indicates whether a bidirectional model will be used.
+//     dir = (direction == bidirectional) ? 2 : 1
+// dropout: dropout probability. When set to 0., dropout is disabled.
+// seed: the 1st part of a seed to initialize dropout.
+// seed2: the 2nd part of a seed to initialize dropout.
+// num_proj: The output dimensionality for the projection matrices. If None or 0,
+//     no projection is performed.
+func CudnnRNNCanonicalToParamsV2(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, weights []tf.Output, biases []tf.Output, optional ...CudnnRNNCanonicalToParamsV2Attr) (params tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CudnnRNNCanonicalToParamsV2",
+		Input: []tf.Input{
+			num_layers, num_units, input_size, tf.OutputList(weights), tf.OutputList(biases),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a sequence of numbers.
+//
+// This operation creates a sequence of numbers that begins at `start` and
+// extends by increments of `delta` up to but not including `limit`.
+//
+// For example:
+//
+// ```
+// # 'start' is 3
+// # 'limit' is 18
+// # 'delta' is 3
+// tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15]
+// ```
+//
+// Arguments:
+//	start: 0-D (scalar). First entry in the sequence.
+//	limit: 0-D (scalar). Upper limit of sequence, exclusive.
+//	delta: 0-D (scalar). Optional. Default is 1. Number that increments `start`.
+//
+// Returns 1-D.
+func Range(scope *Scope, start tf.Output, limit tf.Output, delta tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Range",
+		Input: []tf.Input{
+			start, limit, delta,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// AnyAttr is an optional argument to Any.
+type AnyAttr func(optionalAttr)
+
+// AnyKeepDims sets the optional keep_dims attribute to value.
+//
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func AnyKeepDims(value bool) AnyAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// Computes the "logical or" of elements across dimensions of a tensor.
+//
+// Reduces `input` along the dimensions given in `axis`. Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `axis`. If `keep_dims` is true, the reduced dimensions are
+// retained with length 1.
+//
+// Arguments:
+//	input: The tensor to reduce.
+//	axis: The dimensions to reduce. Must be in the range
+// `[-rank(input), rank(input))`.
+//
+// Returns The reduced tensor.
+func Any(scope *Scope, input tf.Output, axis tf.Output, optional ...AnyAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Any",
+		Input: []tf.Input{
+			input, axis,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the gradient of morphological 2-D dilation with respect to the input.
+//
+// Arguments:
+//	input: 4-D with shape `[batch, in_height, in_width, depth]`.
+//	filter: 3-D with shape `[filter_height, filter_width, depth]`.
+//	out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`.
+//	strides: 1-D of length 4. The stride of the sliding window for each dimension of
+// the input tensor. Must be: `[1, stride_height, stride_width, 1]`.
+//	rates: 1-D of length 4. The input stride for atrous morphological dilation.
+// Must be: `[1, rate_height, rate_width, 1]`.
+//	padding: The type of padding algorithm to use.
+//
+// Returns 4-D with shape `[batch, in_height, in_width, depth]`.
+func Dilation2DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (in_backprop tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "Dilation2DBackpropInput",
+		Input: []tf.Input{
+			input, filter, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// AllAttr is an optional argument to All.
+type AllAttr func(optionalAttr)
+
+// AllKeepDims sets the optional keep_dims attribute to value.
+//
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func AllKeepDims(value bool) AllAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// Computes the "logical and" of elements across dimensions of a tensor.
+//
+// Reduces `input` along the dimensions given in `axis`. Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `axis`. If `keep_dims` is true, the reduced dimensions are
+// retained with length 1.
+//
+// Arguments:
+//	input: The tensor to reduce.
+//	axis: The dimensions to reduce. Must be in the range
+// `[-rank(input), rank(input))`.
+//
+// Returns The reduced tensor.
+func All(scope *Scope, input tf.Output, axis tf.Output, optional ...AllAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "All",
+		Input: []tf.Input{
+			input, axis,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes gradients for SparseSegmentSqrtN.
+//
+// Returns tensor "output" with same shape as grad, except for dimension 0 whose
+// value is output_dim0.
+//
+// Arguments:
+//	grad: gradient propagated to the SparseSegmentSqrtN op.
+//	indices: indices passed to the corresponding SparseSegmentSqrtN op.
+//	segment_ids: segment_ids passed to the corresponding SparseSegmentSqrtN op.
+//	output_dim0: dimension 0 of "data" passed to SparseSegmentSqrtN op.
+func SparseSegmentSqrtNGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSegmentSqrtNGrad",
+		Input: []tf.Input{
+			grad, indices, segment_ids, output_dim0,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the mean along sparse segments of a tensor.
+//
+// See `tf.sparse.segment_sum` for usage examples.
+//
+// Like `SegmentMean`, but `segment_ids` can have rank less than `data`'s first
+// dimension, selecting a subset of dimension 0, specified by `indices`.
+//
+// Arguments:
+//
+//	indices: A 1-D tensor. Has same rank as `segment_ids`.
+//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SparseSegmentMean(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSegmentMean",
+		Input: []tf.Input{
+			data, indices, segment_ids,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the sum along sparse segments of a tensor.
+//
+// Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is
+// misisng, the `output` tensor at that position will be zeroed.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/sparse#Segmentation)
+// for an explanation of segments.
+//
+// For example:
+//
+// ```python
+// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
+//
+// tf.sparse_segment_sum_with_num_segments(
+//     c, tf.constant([0, 1]), tf.constant([0, 0]), num_segments=3)
+// # => [[0 0 0 0]
+// #     [0 0 0 0]
+// #     [0 0 0 0]]
+//
+// tf.sparse_segment_sum_with_num_segments(c,
+//                                         tf.constant([0, 1]),
+//                                         tf.constant([0, 2],
+//                                         num_segments=4))
+// # => [[ 1  2  3  4]
+// #     [ 0  0  0  0]
+// #     [-1 -2 -3 -4]
+// #     [ 0  0  0  0]]
+// ```
+//
+// Arguments:
+//
+//	indices: A 1-D tensor. Has same rank as `segment_ids`.
+//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+//	num_segments: Should equal the number of distinct segment IDs.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `num_segments`.
+func SparseSegmentSumWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSegmentSumWithNumSegments",
+		Input: []tf.Input{
+			data, indices, segment_ids, num_segments,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the sum along sparse segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+// for an explanation of segments.
+//
+// Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first
+// dimension, selecting a subset of dimension 0, specified by `indices`.
+//
+// For example:
+//
+// ```python
+// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
+//
+// # Select two rows, one segment.
+// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0]))
+// # => [[0 0 0 0]]
+//
+// # Select two rows, two segment.
+// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1]))
+// # => [[ 1  2  3  4]
+// #     [-1 -2 -3 -4]]
+//
+// # Select all rows, two segments.
+// tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1]))
+// # => [[0 0 0 0]
+// #     [5 6 7 8]]
+//
+// # Which is equivalent to:
+// tf.segment_sum(c, tf.constant([0, 0, 1]))
+// ```
+//
+// Arguments:
+//
+//	indices: A 1-D tensor. Has same rank as `segment_ids`.
+//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SparseSegmentSum(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSegmentSum",
+		Input: []tf.Input{
+			data, indices, segment_ids,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the sum along segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+// for an explanation of segments.
+//
+// Computes a tensor such that
+// \\(output[i] = \sum_{j...} data[j...]\\) where the sum is over tuples `j...` such
+// that `segment_ids[j...] == i`.  Unlike `SegmentSum`, `segment_ids`
+// need not be sorted and need not cover all values in the full
+// range of valid values.
+//
+// If the sum is empty for a given segment ID `i`, `output[i] = 0`.
+// If the given segment ID `i` is negative, the value is dropped and will not be
+// added to the sum of the segment.
+//
+// `num_segments` should equal the number of distinct segment IDs.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentSum.png" alt>
+// </div>
+//
+// ``` python
+// c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]])
+// tf.unsorted_segment_sum(c, tf.constant([0, 1, 0]), num_segments=2)
+// # ==> [[ 5,  5, 5, 5],
+// #       [5,  6, 7, 8]]
+// ```
+//
+//
+// Arguments:
+//
+//	segment_ids: A tensor whose shape is a prefix of `data.shape`.
+//
+//
+// Returns Has same shape as data, except for the first `segment_ids.rank`
+// dimensions, which are replaced with a single dimension which has size
+// `num_segments`.
+func UnsortedSegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "UnsortedSegmentSum",
+		Input: []tf.Input{
+			data, segment_ids, num_segments,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes arctangent of `y/x` element-wise, respecting signs of the arguments.
+//
+// This is the angle \( \theta \in [-\pi, \pi] \) such that
+// \[ x = r \cos(\theta) \]
+// and
+// \[ y = r \sin(\theta) \]
+// where \(r = \sqrt(x^2 + y^2) \).
+func Atan2(scope *Scope, y tf.Output, x tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Atan2",
+		Input: []tf.Input{
+			y, x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the product along segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+// for an explanation of segments.
+//
+// Computes a tensor such that
+// \\(output_i = \prod_j data_j\\) where the product is over `j` such
+// that `segment_ids[j] == i`.
+//
+// If the product is empty for a given segment ID `i`, `output[i] = 1`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentProd.png" alt>
+// </div>
+//
+// For example:
+//
+// ```
+// c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]])
+// tf.segment_prod(c, tf.constant([0, 0, 1]))
+// # ==> [[4, 6, 6, 4],
+// #      [5, 6, 7, 8]]
+// ```
+//
+//
+// Arguments:
+//
+//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
+// first dimension.  Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SegmentProd",
+		Input: []tf.Input{
+			data, segment_ids,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyAdamWithAmsgradAttr is an optional argument to ResourceApplyAdamWithAmsgrad.
+type ResourceApplyAdamWithAmsgradAttr func(optionalAttr)
+
+// ResourceApplyAdamWithAmsgradUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var, m, and v tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyAdamWithAmsgradUseLocking(value bool) ResourceApplyAdamWithAmsgradAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the Adam algorithm.
+//
+// $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$
+// $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
+// $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
+// $$vhat_t := max{vhat_{t-1}, v_t}$$
+// $$variable := variable - lr_t * m_t / (\sqrt{vhat_t} + \epsilon)$$
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	m: Should be from a Variable().
+//	v: Should be from a Variable().
+//	vhat: Should be from a Variable().
+//	beta1_power: Must be a scalar.
+//	beta2_power: Must be a scalar.
+//	lr: Scaling factor. Must be a scalar.
+//	beta1: Momentum factor. Must be a scalar.
+//	beta2: Momentum factor. Must be a scalar.
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyAdamWithAmsgrad(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, vhat tf.Output, beta1_power tf.Output, beta2_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdamWithAmsgradAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyAdamWithAmsgrad",
+		Input: []tf.Input{
+			var_, m, v, vhat, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
 // Computes the sum along segments of a tensor.
 //
 // Read
@@ -20407,105 +18152,1602 @@ func SegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.
 	return op.Output(0)
 }
 
-// TPUReplicateMetadataAttr is an optional argument to TPUReplicateMetadata.
-type TPUReplicateMetadataAttr func(optionalAttr)
+// ArgMinAttr is an optional argument to ArgMin.
+type ArgMinAttr func(optionalAttr)
 
-// TPUReplicateMetadataNumCoresPerReplica sets the optional num_cores_per_replica attribute to value.
+// ArgMinOutputType sets the optional output_type attribute to value.
+// If not specified, defaults to DT_INT64
+func ArgMinOutputType(value tf.DataType) ArgMinAttr {
+	return func(m optionalAttr) {
+		m["output_type"] = value
+	}
+}
+
+// Returns the index with the smallest value across dimensions of a tensor.
 //
-// value: Number of cores per replica. Used for model parallelism.
-// If not specified, defaults to 1
-func TPUReplicateMetadataNumCoresPerReplica(value int64) TPUReplicateMetadataAttr {
-	return func(m optionalAttr) {
-		m["num_cores_per_replica"] = value
-	}
-}
-
-// TPUReplicateMetadataTopology sets the optional topology attribute to value.
+// Note that in case of ties the identity of the return value is not guaranteed.
 //
-// value: TopologyProto indicating the topology of the TPU pod slice.
-// If not specified, defaults to ""
-func TPUReplicateMetadataTopology(value string) TPUReplicateMetadataAttr {
-	return func(m optionalAttr) {
-		m["topology"] = value
-	}
-}
-
-// TPUReplicateMetadataUseTpu sets the optional use_tpu attribute to value.
-//
-// value: Whether to place the computation on the TPU.
-// If not specified, defaults to true
-func TPUReplicateMetadataUseTpu(value bool) TPUReplicateMetadataAttr {
-	return func(m optionalAttr) {
-		m["use_tpu"] = value
-	}
-}
-
-// TPUReplicateMetadataDeviceAssignment sets the optional device_assignment attribute to value.
-//
-// value: The assignment of devices for the computation.
-// If not specified, defaults to <>
-func TPUReplicateMetadataDeviceAssignment(value []int64) TPUReplicateMetadataAttr {
-	return func(m optionalAttr) {
-		m["device_assignment"] = value
-	}
-}
-
-// TPUReplicateMetadataComputationShape sets the optional computation_shape attribute to value.
-//
-// value: DEPRECATED. Use num_cores_per_replica instead.
-// If not specified, defaults to <>
-func TPUReplicateMetadataComputationShape(value []int64) TPUReplicateMetadataAttr {
-	return func(m optionalAttr) {
-		m["computation_shape"] = value
-	}
-}
-
-// TPUReplicateMetadataHostComputeCore sets the optional host_compute_core attribute to value.
-// If not specified, defaults to <>
-func TPUReplicateMetadataHostComputeCore(value []string) TPUReplicateMetadataAttr {
-	return func(m optionalAttr) {
-		m["host_compute_core"] = value
-	}
-}
-
-// TPUReplicateMetadataPaddingMap sets the optional padding_map attribute to value.
-// If not specified, defaults to <>
-func TPUReplicateMetadataPaddingMap(value []string) TPUReplicateMetadataAttr {
-	return func(m optionalAttr) {
-		m["padding_map"] = value
-	}
-}
-
-// TPUReplicateMetadataStepMarkerLocation sets the optional step_marker_location attribute to value.
-// If not specified, defaults to "STEP_MARK_AT_ENTRY"
-func TPUReplicateMetadataStepMarkerLocation(value string) TPUReplicateMetadataAttr {
-	return func(m optionalAttr) {
-		m["step_marker_location"] = value
-	}
-}
-
-// Metadata indicaitng how the TPU computation should be replicated.
+// Usage:
+//   ```python
+//   import tensorflow as tf
+//   a = [1, 10, 26.9, 2.8, 166.32, 62.3]
+//   b = tf.math.argmin(input = a)
+//   c = tf.keras.backend.eval(b)
+//   # c = 0
+//   # here a[0] = 1 which is the smallest element of a across axis 0
+//   ```
 //
 // Arguments:
-//	num_replicas: Number of replicas of the computation
 //
-// Returns the created operation.
-func TPUReplicateMetadata(scope *Scope, num_replicas int64, optional ...TPUReplicateMetadataAttr) (o *tf.Operation) {
+//	dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
+// Describes which dimension of the input Tensor to reduce across. For vectors,
+// use dimension = 0.
+func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMinAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_replicas": num_replicas}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "TPUReplicateMetadata",
+		Type: "ArgMin",
+		Input: []tf.Input{
+			input, dimension,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 
+// Computes the reverse mode backpropagated gradient of the Cholesky algorithm.
+//
+// For an explanation see "Differentiation of the Cholesky algorithm" by
+// Iain Murray http://arxiv.org/abs/1602.07527.
+//
+// Arguments:
+//	l: Output of batch Cholesky algorithm l = cholesky(A). Shape is `[..., M, M]`.
+// Algorithm depends only on lower triangular part of the innermost matrices of
+// this tensor.
+//	grad: df/dl where f is some scalar function. Shape is `[..., M, M]`.
+// Algorithm depends only on lower triangular part of the innermost matrices of
+// this tensor.
+//
+// Returns Symmetrized version of df/dA . Shape is `[..., M, M]`
+func CholeskyGrad(scope *Scope, l tf.Output, grad tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "CholeskyGrad",
+		Input: []tf.Input{
+			l, grad,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Reshapes a tensor.
+//
+// Given `tensor`, this operation returns a tensor that has the same values
+// as `tensor` with shape `shape`.
+//
+// If one component of 1-D tensor `shape` is the special value -1, the size of that
+// dimension is computed so that the total size remains constant.  In particular, a
+// `shape` of `[-1]` flattens into 1-D.  At most one component of `shape` may be
+// unknown.
+//
+// The `shape` must be 1-D and the operation returns a tensor with shape
+// `shape` filled with the values of `tensor`. In this case, the number of elements
+// implied by `shape` must be the same as the number of elements in `tensor`.
+//
+// It is an error if `shape` is not 1-D.
+//
+// For example:
+//
+// ```
+// # tensor 't' is [1, 2, 3, 4, 5, 6, 7, 8, 9]
+// # tensor 't' has shape [9]
+// reshape(t, [3, 3]) ==> [[1, 2, 3],
+//                         [4, 5, 6],
+//                         [7, 8, 9]]
+//
+// # tensor 't' is [[[1, 1], [2, 2]],
+// #                [[3, 3], [4, 4]]]
+// # tensor 't' has shape [2, 2, 2]
+// reshape(t, [2, 4]) ==> [[1, 1, 2, 2],
+//                         [3, 3, 4, 4]]
+//
+// # tensor 't' is [[[1, 1, 1],
+// #                 [2, 2, 2]],
+// #                [[3, 3, 3],
+// #                 [4, 4, 4]],
+// #                [[5, 5, 5],
+// #                 [6, 6, 6]]]
+// # tensor 't' has shape [3, 2, 3]
+// # pass '[-1]' to flatten 't'
+// reshape(t, [-1]) ==> [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6]
+//
+// # -1 can also be used to infer the shape
+//
+// # -1 is inferred to be 9:
+// reshape(t, [2, -1]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],
+//                          [4, 4, 4, 5, 5, 5, 6, 6, 6]]
+// # -1 is inferred to be 2:
+// reshape(t, [-1, 9]) ==> [[1, 1, 1, 2, 2, 2, 3, 3, 3],
+//                          [4, 4, 4, 5, 5, 5, 6, 6, 6]]
+// # -1 is inferred to be 3:
+// reshape(t, [ 2, -1, 3]) ==> [[[1, 1, 1],
+//                               [2, 2, 2],
+//                               [3, 3, 3]],
+//                              [[4, 4, 4],
+//                               [5, 5, 5],
+//                               [6, 6, 6]]]
+//
+// # tensor 't' is [7]
+// # shape `[]` reshapes to a scalar
+// reshape(t, []) ==> 7
+// ```
+//
+// Arguments:
+//
+//	shape: Defines the shape of the output tensor.
+func Reshape(scope *Scope, tensor tf.Output, shape tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Reshape",
+		Input: []tf.Input{
+			tensor, shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SnapshotDatasetAttr is an optional argument to SnapshotDataset.
+type SnapshotDatasetAttr func(optionalAttr)
+
+// SnapshotDatasetCompression sets the optional compression attribute to value.
+// If not specified, defaults to ""
+func SnapshotDatasetCompression(value string) SnapshotDatasetAttr {
+	return func(m optionalAttr) {
+		m["compression"] = value
+	}
+}
+
+// SnapshotDatasetReaderPathPrefix sets the optional reader_path_prefix attribute to value.
+// If not specified, defaults to ""
+func SnapshotDatasetReaderPathPrefix(value string) SnapshotDatasetAttr {
+	return func(m optionalAttr) {
+		m["reader_path_prefix"] = value
+	}
+}
+
+// SnapshotDatasetWriterPathPrefix sets the optional writer_path_prefix attribute to value.
+// If not specified, defaults to ""
+func SnapshotDatasetWriterPathPrefix(value string) SnapshotDatasetAttr {
+	return func(m optionalAttr) {
+		m["writer_path_prefix"] = value
+	}
+}
+
+// SnapshotDatasetShardSizeBytes sets the optional shard_size_bytes attribute to value.
+// If not specified, defaults to 10737418240
+func SnapshotDatasetShardSizeBytes(value int64) SnapshotDatasetAttr {
+	return func(m optionalAttr) {
+		m["shard_size_bytes"] = value
+	}
+}
+
+// SnapshotDatasetPendingSnapshotExpirySeconds sets the optional pending_snapshot_expiry_seconds attribute to value.
+// If not specified, defaults to 86400
+func SnapshotDatasetPendingSnapshotExpirySeconds(value int64) SnapshotDatasetAttr {
+	return func(m optionalAttr) {
+		m["pending_snapshot_expiry_seconds"] = value
+	}
+}
+
+// SnapshotDatasetNumReaderThreads sets the optional num_reader_threads attribute to value.
+// If not specified, defaults to 1
+func SnapshotDatasetNumReaderThreads(value int64) SnapshotDatasetAttr {
+	return func(m optionalAttr) {
+		m["num_reader_threads"] = value
+	}
+}
+
+// SnapshotDatasetReaderBufferSize sets the optional reader_buffer_size attribute to value.
+// If not specified, defaults to 1
+func SnapshotDatasetReaderBufferSize(value int64) SnapshotDatasetAttr {
+	return func(m optionalAttr) {
+		m["reader_buffer_size"] = value
+	}
+}
+
+// SnapshotDatasetNumWriterThreads sets the optional num_writer_threads attribute to value.
+// If not specified, defaults to 1
+func SnapshotDatasetNumWriterThreads(value int64) SnapshotDatasetAttr {
+	return func(m optionalAttr) {
+		m["num_writer_threads"] = value
+	}
+}
+
+// SnapshotDatasetWriterBufferSize sets the optional writer_buffer_size attribute to value.
+// If not specified, defaults to 1
+func SnapshotDatasetWriterBufferSize(value int64) SnapshotDatasetAttr {
+	return func(m optionalAttr) {
+		m["writer_buffer_size"] = value
+	}
+}
+
+// SnapshotDatasetShuffleOnRead sets the optional shuffle_on_read attribute to value.
+// If not specified, defaults to false
+func SnapshotDatasetShuffleOnRead(value bool) SnapshotDatasetAttr {
+	return func(m optionalAttr) {
+		m["shuffle_on_read"] = value
+	}
+}
+
+// SnapshotDatasetSeed sets the optional seed attribute to value.
+// If not specified, defaults to 0
+func SnapshotDatasetSeed(value int64) SnapshotDatasetAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// SnapshotDatasetSeed2 sets the optional seed2 attribute to value.
+// If not specified, defaults to 0
+func SnapshotDatasetSeed2(value int64) SnapshotDatasetAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Creates a dataset that will write to / read from a snapshot.
+//
+// This dataset attempts to determine whether a valid snapshot exists at the
+// `snapshot_path`, and reads from the snapshot in lieu of using `input_dataset`.
+// If not, it will run the preprocessing pipeline as usual, and write out a
+// snapshot of the data processed for future use.
+//
+// Arguments:
+//	input_dataset: A variant tensor representing the input dataset.
+//	path: The path we should write snapshots to / read snapshots from.
+//
+//
+func SnapshotDataset(scope *Scope, input_dataset tf.Output, path tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...SnapshotDatasetAttr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SnapshotDataset",
+		Input: []tf.Input{
+			input_dataset, path,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ArgMaxAttr is an optional argument to ArgMax.
+type ArgMaxAttr func(optionalAttr)
+
+// ArgMaxOutputType sets the optional output_type attribute to value.
+// If not specified, defaults to DT_INT64
+func ArgMaxOutputType(value tf.DataType) ArgMaxAttr {
+	return func(m optionalAttr) {
+		m["output_type"] = value
+	}
+}
+
+// Returns the index with the largest value across dimensions of a tensor.
+//
+// Note that in case of ties the identity of the return value is not guaranteed.
+//
+// Usage:
+//   ```python
+//   import tensorflow as tf
+//   a = [1, 10, 26.9, 2.8, 166.32, 62.3]
+//   b = tf.math.argmax(input = a)
+//   c = tf.keras.backend.eval(b)
+//   # c = 4
+//   # here a[4] = 166.32 which is the largest element of a across axis 0
+//   ```
+//
+// Arguments:
+//
+//	dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
+// Describes which dimension of the input Tensor to reduce across. For vectors,
+// use dimension = 0.
+func ArgMax(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMaxAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ArgMax",
+		Input: []tf.Input{
+			input, dimension,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResizeBilinearGradAttr is an optional argument to ResizeBilinearGrad.
+type ResizeBilinearGradAttr func(optionalAttr)
+
+// ResizeBilinearGradAlignCorners sets the optional align_corners attribute to value.
+//
+// value: If true, the centers of the 4 corner pixels of the input and grad tensors are
+// aligned. Defaults to false.
+// If not specified, defaults to false
+func ResizeBilinearGradAlignCorners(value bool) ResizeBilinearGradAttr {
+	return func(m optionalAttr) {
+		m["align_corners"] = value
+	}
+}
+
+// ResizeBilinearGradHalfPixelCenters sets the optional half_pixel_centers attribute to value.
+// If not specified, defaults to false
+func ResizeBilinearGradHalfPixelCenters(value bool) ResizeBilinearGradAttr {
+	return func(m optionalAttr) {
+		m["half_pixel_centers"] = value
+	}
+}
+
+// Computes the gradient of bilinear interpolation.
+//
+// Arguments:
+//	grads: 4-D with shape `[batch, height, width, channels]`.
+//	original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`,
+// The image tensor that was resized.
+//
+// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`.
+// Gradients with respect to the input image. Input image must have been
+// float or double.
+func ResizeBilinearGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBilinearGradAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResizeBilinearGrad",
+		Input: []tf.Input{
+			grads, original_image,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MaxAttr is an optional argument to Max.
+type MaxAttr func(optionalAttr)
+
+// MaxKeepDims sets the optional keep_dims attribute to value.
+//
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func MaxKeepDims(value bool) MaxAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// Computes the maximum of elements across dimensions of a tensor.
+//
+// Reduces `input` along the dimensions given in `axis`. Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `axis`. If `keep_dims` is true, the reduced dimensions are
+// retained with length 1.
+//
+// Arguments:
+//	input: The tensor to reduce.
+//	axis: The dimensions to reduce. Must be in the range
+// `[-rank(input), rank(input))`.
+//
+// Returns The reduced tensor.
+func Max(scope *Scope, input tf.Output, axis tf.Output, optional ...MaxAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Max",
+		Input: []tf.Input{
+			input, axis,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Get the value of the tensor specified by its handle.
+//
+// Arguments:
+//	handle: The handle for a tensor stored in the session state.
+//	dtype: The type of the output value.
+//
+// Returns The tensor for the given handle.
+func GetSessionTensor(scope *Scope, handle tf.Output, dtype tf.DataType) (value tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	opspec := tf.OpSpec{
+		Type: "GetSessionTensor",
+		Input: []tf.Input{
+			handle,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Outputs a tensor containing the reduction across all input tensors.
+//
+// Outputs a tensor containing the reduction across all input tensors passed to ops
+// within the same `shared_name.
+//
+// The graph should be constructed so if one op runs with shared_name value `c`,
+// then `num_devices` ops will run with shared_name value `c`.  Failure to do so
+// will cause the graph execution to fail to complete.
+//
+// input: the input to the reduction
+// data: the value of the reduction across all `num_devices` devices.
+// reduction: the reduction operation to perform.
+// num_devices: The number of devices participating in this reduction.
+// shared_name: Identifier that shared between ops of the same reduction.
+func NcclAllReduce(scope *Scope, input tf.Output, reduction string, num_devices int64, shared_name string) (data tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"reduction": reduction, "num_devices": num_devices, "shared_name": shared_name}
+	opspec := tf.OpSpec{
+		Type: "NcclAllReduce",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MinAttr is an optional argument to Min.
+type MinAttr func(optionalAttr)
+
+// MinKeepDims sets the optional keep_dims attribute to value.
+//
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func MinKeepDims(value bool) MinAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// Computes the minimum of elements across dimensions of a tensor.
+//
+// Reduces `input` along the dimensions given in `axis`. Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `axis`. If `keep_dims` is true, the reduced dimensions are
+// retained with length 1.
+//
+// Arguments:
+//	input: The tensor to reduce.
+//	axis: The dimensions to reduce. Must be in the range
+// `[-rank(input), rank(input))`.
+//
+// Returns The reduced tensor.
+func Min(scope *Scope, input tf.Output, axis tf.Output, optional ...MinAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Min",
+		Input: []tf.Input{
+			input, axis,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SumAttr is an optional argument to Sum.
+type SumAttr func(optionalAttr)
+
+// SumKeepDims sets the optional keep_dims attribute to value.
+//
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func SumKeepDims(value bool) SumAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// Computes the sum of elements across dimensions of a tensor.
+//
+// Reduces `input` along the dimensions given in `axis`. Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `axis`. If `keep_dims` is true, the reduced dimensions are
+// retained with length 1.
+//
+// Arguments:
+//	input: The tensor to reduce.
+//	axis: The dimensions to reduce. Must be in the range
+// `[-rank(input), rank(input))`.
+//
+// Returns The reduced tensor.
+func Sum(scope *Scope, input tf.Output, axis tf.Output, optional ...SumAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Sum",
+		Input: []tf.Input{
+			input, axis,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// BoostedTreesQuantileStreamResourceFlushAttr is an optional argument to BoostedTreesQuantileStreamResourceFlush.
+type BoostedTreesQuantileStreamResourceFlushAttr func(optionalAttr)
+
+// BoostedTreesQuantileStreamResourceFlushGenerateQuantiles sets the optional generate_quantiles attribute to value.
+//
+// value: bool; If True, the output will be the num_quantiles for each stream where the ith
+// entry is the ith quantile of the input with an approximation error of epsilon.
+// Duplicate values may be present.
+// If False, the output will be the points in the histogram that we got which roughly
+// translates to 1/epsilon boundaries and without any duplicates.
+// Default to False.
+// If not specified, defaults to false
+func BoostedTreesQuantileStreamResourceFlushGenerateQuantiles(value bool) BoostedTreesQuantileStreamResourceFlushAttr {
+	return func(m optionalAttr) {
+		m["generate_quantiles"] = value
+	}
+}
+
+// Flush the summaries for a quantile stream resource.
+//
+// An op that flushes the summaries for a quantile stream resource.
+//
+// Arguments:
+//	quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource.
+//	num_buckets: int; approximate number of buckets unless using generate_quantiles.
+//
+// Returns the created operation.
+func BoostedTreesQuantileStreamResourceFlush(scope *Scope, quantile_stream_resource_handle tf.Output, num_buckets tf.Output, optional ...BoostedTreesQuantileStreamResourceFlushAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesQuantileStreamResourceFlush",
+		Input: []tf.Input{
+			quantile_stream_resource_handle, num_buckets,
+		},
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
+// WholeFileReaderV2Attr is an optional argument to WholeFileReaderV2.
+type WholeFileReaderV2Attr func(optionalAttr)
+
+// WholeFileReaderV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this reader is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func WholeFileReaderV2Container(value string) WholeFileReaderV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// WholeFileReaderV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this reader is named in the given bucket
+// with this shared_name. Otherwise, the node name is used instead.
+// If not specified, defaults to ""
+func WholeFileReaderV2SharedName(value string) WholeFileReaderV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// A Reader that outputs the entire contents of a file as a value.
+//
+// To use, enqueue filenames in a Queue.  The output of ReaderRead will
+// be a filename (key) and the contents of that file (value).
+//
+// Returns The handle to reference the Reader.
+func WholeFileReaderV2(scope *Scope, optional ...WholeFileReaderV2Attr) (reader_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "WholeFileReaderV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ShapeNAttr is an optional argument to ShapeN.
+type ShapeNAttr func(optionalAttr)
+
+// ShapeNOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_INT32
+func ShapeNOutType(value tf.DataType) ShapeNAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Returns shape of tensors.
+//
+// This operation returns N 1-D integer tensors representing shape of `input[i]s`.
+func ShapeN(scope *Scope, input []tf.Output, optional ...ShapeNAttr) (output []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ShapeN",
+		Input: []tf.Input{
+			tf.OutputList(input),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
+		scope.UpdateErr("ShapeN", err)
+		return
+	}
+	return output
+}
+
+// ImageSummaryAttr is an optional argument to ImageSummary.
+type ImageSummaryAttr func(optionalAttr)
+
+// ImageSummaryMaxImages sets the optional max_images attribute to value.
+//
+// value: Max number of batch elements to generate images for.
+// If not specified, defaults to 3
+//
+// REQUIRES: value >= 1
+func ImageSummaryMaxImages(value int64) ImageSummaryAttr {
+	return func(m optionalAttr) {
+		m["max_images"] = value
+	}
+}
+
+// ImageSummaryBadColor sets the optional bad_color attribute to value.
+//
+// value: Color to use for pixels with non-finite values.
+// If not specified, defaults to {dtype:DT_UINT8 tensor_shape:{dim:{size:4}} int_val:255 int_val:0 int_val:0 int_val:255}
+func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr {
+	return func(m optionalAttr) {
+		m["bad_color"] = value
+	}
+}
+
+// Outputs a `Summary` protocol buffer with images.
+//
+// The summary has up to `max_images` summary values containing images. The
+// images are built from `tensor` which must be 4-D with shape `[batch_size,
+// height, width, channels]` and where `channels` can be:
+//
+// *  1: `tensor` is interpreted as Grayscale.
+// *  3: `tensor` is interpreted as RGB.
+// *  4: `tensor` is interpreted as RGBA.
+//
+// The images have the same number of channels as the input tensor. For float
+// input, the values are normalized one image at a time to fit in the range
+// `[0, 255]`.  `uint8` values are unchanged.  The op uses two different
+// normalization algorithms:
+//
+// *  If the input values are all positive, they are rescaled so the largest one
+//    is 255.
+//
+// *  If any input value is negative, the values are shifted so input value 0.0
+//    is at 127.  They are then rescaled so that either the smallest value is 0,
+//    or the largest one is 255.
+//
+// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
+// build the `tag` of the summary values:
+//
+// *  If `max_images` is 1, the summary value tag is '*tag*/image'.
+// *  If `max_images` is greater than 1, the summary value tags are
+//    generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.
+//
+// The `bad_color` argument is the color to use in the generated images for
+// non-finite input values.  It is a `uint8` 1-D tensor of length `channels`.
+// Each element must be in the range `[0, 255]` (It represents the value of a
+// pixel in the output image).  Non-finite values in the input tensor are
+// replaced by this tensor in the output image.  The default value is the color
+// red.
+//
+// Arguments:
+//	tag: Scalar. Used to build the `tag` attribute of the summary values.
+//	tensor: 4-D of shape `[batch_size, height, width, channels]` where
+// `channels` is 1, 3, or 4.
+//
+// Returns Scalar. Serialized `Summary` protocol buffer.
+func ImageSummary(scope *Scope, tag tf.Output, tensor tf.Output, optional ...ImageSummaryAttr) (summary tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ImageSummary",
+		Input: []tf.Input{
+			tag, tensor,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ApproximateEqualAttr is an optional argument to ApproximateEqual.
+type ApproximateEqualAttr func(optionalAttr)
+
+// ApproximateEqualTolerance sets the optional tolerance attribute to value.
+// If not specified, defaults to 1e-05
+func ApproximateEqualTolerance(value float32) ApproximateEqualAttr {
+	return func(m optionalAttr) {
+		m["tolerance"] = value
+	}
+}
+
+// Returns the truth value of abs(x-y) < tolerance element-wise.
+func ApproximateEqual(scope *Scope, x tf.Output, y tf.Output, optional ...ApproximateEqualAttr) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ApproximateEqual",
+		Input: []tf.Input{
+			x, y,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LowerBoundAttr is an optional argument to LowerBound.
+type LowerBoundAttr func(optionalAttr)
+
+// LowerBoundOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_INT32
+func LowerBoundOutType(value tf.DataType) LowerBoundAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Applies lower_bound(sorted_search_values, values) along each row.
+//
+// Each set of rows with the same index in (sorted_inputs, values) is treated
+// independently.  The resulting row is the equivalent of calling
+// `np.searchsorted(sorted_inputs, values, side='left')`.
+//
+// The result is not a global index to the entire
+// `Tensor`, but rather just the index in the last dimension.
+//
+// A 2-D example:
+//   sorted_sequence = [[0, 3, 9, 9, 10],
+//                      [1, 2, 3, 4, 5]]
+//   values = [[2, 4, 9],
+//             [0, 2, 6]]
+//
+//   result = LowerBound(sorted_sequence, values)
+//
+//   result == [[1, 2, 2],
+//              [0, 1, 5]]
+//
+// Arguments:
+//	sorted_inputs: 2-D Tensor where each row is ordered.
+//	values: 2-D Tensor with the same numbers of rows as `sorted_search_values`. Contains
+// the values that will be searched for in `sorted_search_values`.
+//
+// Returns A `Tensor` with the same shape as `values`.  It contains the first scalar index
+// into the last dimension where values can be inserted without changing the
+// ordered property.
+func LowerBound(scope *Scope, sorted_inputs tf.Output, values tf.Output, optional ...LowerBoundAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LowerBound",
+		Input: []tf.Input{
+			sorted_inputs, values,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the truth value of (x > y) element-wise.
+//
+// *NOTE*: `Greater` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+//
+// Example:
+//
+// ```python
+// x = tf.constant([5, 4, 6])
+// y = tf.constant([5, 2, 5])
+// tf.math.greater(x, y) ==> [False, True, True]
+//
+// x = tf.constant([5, 4, 6])
+// y = tf.constant([5])
+// tf.math.greater(x, y) ==> [False, False, True]
+// ```
+func Greater(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Greater",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// A container for a multi device iterator resource.
+//
+// Returns:
+//	handle: A handle to a multi device iterator that can be passed to a
+// "MultiDeviceIteratorGetNextFromShard" op. In contrast to MultiDeviceIterator,
+// AnonymousIterator prevents resource sharing by name, and does not keep a
+// reference to the resource container.
+//	deleter: A variant deleter that should be passed into the op that deletes the iterator.
+func AnonymousMultiDeviceIterator(scope *Scope, devices []string, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output, deleter tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"devices": devices, "output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "AnonymousMultiDeviceIterator",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Provides the time since epoch in seconds.
+//
+// Returns the timestamp as a `float64` for seconds since the Unix epoch.
+//
+// Note: the timestamp is computed when the op is executed, not when it is added
+// to the graph.
+func Timestamp(scope *Scope) (ts tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Timestamp",
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the truth value of (x <= y) element-wise.
+//
+// *NOTE*: `LessEqual` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+//
+// Example:
+//
+// ```python
+// x = tf.constant([5, 4, 6])
+// y = tf.constant([5])
+// tf.math.less_equal(x, y) ==> [True, True, False]
+//
+// x = tf.constant([5, 4, 6])
+// y = tf.constant([5, 6, 6])
+// tf.math.less_equal(x, y) ==> [True, True, True]
+// ```
+func LessEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "LessEqual",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Compute the polygamma function \\(\psi^{(n)}(x)\\).
+//
+// The polygamma function is defined as:
+//
+//
+// \\(\psi^{(a)}(x) = \frac{d^a}{dx^a} \psi(x)\\)
+//
+// where \\(\psi(x)\\) is the digamma function.
+// The polygamma function is defined only for non-negative integer orders \\a\\.
+func Polygamma(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Polygamma",
+		Input: []tf.Input{
+			a, x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Shuffle dimensions of x according to a permutation.
+//
+// The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy:
+//   `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]`
+func Transpose(scope *Scope, x tf.Output, perm tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Transpose",
+		Input: []tf.Input{
+			x, perm,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// AssertAttr is an optional argument to Assert.
+type AssertAttr func(optionalAttr)
+
+// AssertSummarize sets the optional summarize attribute to value.
+//
+// value: Print this many entries of each tensor.
+// If not specified, defaults to 3
+func AssertSummarize(value int64) AssertAttr {
+	return func(m optionalAttr) {
+		m["summarize"] = value
+	}
+}
+
+// Asserts that the given condition is true.
+//
+// If `condition` evaluates to false, print the list of tensors in `data`.
+// `summarize` determines how many entries of the tensors to print.
+//
+// Arguments:
+//	condition: The condition to evaluate.
+//	data: The tensors to print out when condition is false.
+//
+// Returns the created operation.
+func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Assert",
+		Input: []tf.Input{
+			condition, tf.OutputList(data),
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes the gradient of `igamma(a, x)` wrt `a`.
+func IgammaGradA(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "IgammaGradA",
+		Input: []tf.Input{
+			a, x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Compute the upper regularized incomplete Gamma function `Q(a, x)`.
+//
+// The upper regularized incomplete Gamma function is defined as:
+//
+// \\(Q(a, x) = Gamma(a, x) / Gamma(a) = 1 - P(a, x)\\)
+//
+// where
+//
+// \\(Gamma(a, x) = int_{x}^{\infty} t^{a-1} exp(-t) dt\\)
+//
+// is the upper incomplete Gama function.
+//
+// Note, above `P(a, x)` (`Igamma`) is the lower regularized complete
+// Gamma function.
+func Igammac(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Igammac",
+		Input: []tf.Input{
+			a, x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns element-wise remainder of division. This emulates C semantics in that
+//
+// the result here is consistent with a truncating divide. E.g. `truncate(x / y) *
+// y + truncate_mod(x, y) = x`.
+//
+// *NOTE*: `TruncateMod` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func TruncateMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TruncateMod",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns element-wise remainder of division. This emulates C semantics in that
+//
+// the result here is consistent with a truncating divide. E.g.
+// `tf.truncatediv(x, y) * y + truncate_mod(x, y) = x`.
+//
+// *NOTE*: `Mod` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Mod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Mod",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// A substitute for `InterleaveDataset` on a fixed list of `N` datasets.
+//
+// Arguments:
+//	selector_input_dataset: A dataset of scalar `DT_INT64` elements that determines which of the
+// `N` data inputs should produce the next output element.
+//	data_input_datasets: `N` datasets with the same type that will be interleaved according to
+// the values of `selector_input_dataset`.
+//
+//
+func ExperimentalDirectedInterleaveDataset(scope *Scope, selector_input_dataset tf.Output, data_input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalDirectedInterleaveDataset",
+		Input: []tf.Input{
+			selector_input_dataset, tf.OutputList(data_input_datasets),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the min of x and y (i.e. x < y ? x : y) element-wise.
+//
+// *NOTE*: `Minimum` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Minimum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Minimum",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the max of x and y (i.e. x > y ? x : y) element-wise.
+//
+// *NOTE*: `Maximum` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Maximum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Maximum",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns 0 if x == 0, and x * log(y) otherwise, elementwise.
+func Xlogy(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Xlogy",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Increments variable pointed to by 'resource' until it reaches 'limit'.
+//
+// Arguments:
+//	resource: Should be from a scalar `Variable` node.
+//	limit: If incrementing ref would bring it above limit, instead generates an
+// 'OutOfRange' error.
+//
+//
+// Returns A copy of the input before increment. If nothing else modifies the
+// input, the values produced will all be distinct.
+func ResourceCountUpTo(scope *Scope, resource tf.Output, limit int64, T tf.DataType) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"limit": limit, "T": T}
+	opspec := tf.OpSpec{
+		Type: "ResourceCountUpTo",
+		Input: []tf.Input{
+			resource,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// StatefulStandardNormalAttr is an optional argument to StatefulStandardNormal.
+type StatefulStandardNormalAttr func(optionalAttr)
+
+// StatefulStandardNormalDtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_FLOAT
+func StatefulStandardNormalDtype(value tf.DataType) StatefulStandardNormalAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs random values from a normal distribution. This op is deprecated in favor of op 'StatefulStandardNormalV2'
+//
+// DEPRECATED at GraphDef version 29: Use StatefulStandardNormalV2 instead
+//
+// The generated values will have mean 0 and standard deviation 1.
+//
+// Arguments:
+//	resource: The handle of the resource variable that stores the state of the RNG.
+//	shape: The shape of the output tensor.
+//
+// Returns A tensor of the specified shape filled with random normal values.
+func StatefulStandardNormal(scope *Scope, resource tf.Output, shape tf.Output, optional ...StatefulStandardNormalAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StatefulStandardNormal",
+		Input: []tf.Input{
+			resource, shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns x / y element-wise for real types.
+//
+// If `x` and `y` are reals, this will return the floating-point division.
+//
+// *NOTE*: `Div` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func RealDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "RealDiv",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns x / y element-wise for integer types.
+//
+// Truncation designates that negative numbers will round fractional quantities
+// toward zero. I.e. -7 / 5 = -1. This matches C semantics but it is different
+// than Python semantics. See `FloorDiv` for a division function that matches
+// Python Semantics.
+//
+// *NOTE*: `TruncateDiv` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func TruncateDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TruncateDiv",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns 0 if the denominator is zero.
+//
+//
+// *NOTE*: `DivNoNan` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func DivNoNan(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DivNoNan",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Scatter `updates` into an existing tensor according to `indices`.
+//
+// This operation creates a new tensor by applying sparse `updates` to the passed
+// in `tensor`.
+// This operation is very similar to `tf.scatter_nd`, except that the updates are
+// scattered onto an existing tensor (as opposed to a zero-tensor). If the memory
+// for the existing tensor cannot be re-used, a copy is made and updated.
+//
+// If `indices` contains duplicates, then their updates are accumulated (summed).
+//
+// **WARNING**: The order in which updates are applied is nondeterministic, so the
+// output will be nondeterministic if `indices` contains duplicates -- because
+// of some numerical approximation issues, numbers summed in different order
+// may yield different results.
+//
+// `indices` is an integer tensor containing indices into a new tensor of shape
+// `shape`.  The last dimension of `indices` can be at most the rank of `shape`:
+//
+//     indices.shape[-1] <= shape.rank
+//
+// The last dimension of `indices` corresponds to indices into elements
+// (if `indices.shape[-1] = shape.rank`) or slices
+// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of
+// `shape`.  `updates` is a tensor with shape
+//
+//     indices.shape[:-1] + shape[indices.shape[-1]:]
+//
+// The simplest form of scatter is to insert individual elements in a tensor by
+// index. For example, say we want to insert 4 scattered elements in a rank-1
+// tensor with 8 elements.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/ScatterNd1.png" alt>
+// </div>
+//
+// In Python, this scatter operation would look like this:
+//
+//     >>> indices = tf.constant([[4], [3], [1], [7]])
+//     >>> updates = tf.constant([9, 10, 11, 12])
+//     >>> tensor = tf.ones([8], dtype=tf.int32)
+//     >>> print(tf.tensor_scatter_nd_update(tensor, indices, updates))
+//     tf.Tensor([ 1 11  1 10  9  1  1 12], shape=(8,), dtype=int32)
+//
+// We can also, insert entire slices of a higher rank tensor all at once. For
+// example, if we wanted to insert two slices in the first dimension of a
+// rank-3 tensor with two matrices of new values.
+//
+// In Python, this scatter operation would look like this:
+//
+//     >>> indices = tf.constant([[0], [2]])
+//     >>> updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6],
+//     ...                         [7, 7, 7, 7], [8, 8, 8, 8]],
+//     ...                        [[5, 5, 5, 5], [6, 6, 6, 6],
+//     ...                         [7, 7, 7, 7], [8, 8, 8, 8]]])
+//     >>> tensor = tf.ones([4, 4, 4], dtype=tf.int32)
+//     >>> print(tf.tensor_scatter_nd_update(tensor, indices, updates).numpy())
+//     [[[5 5 5 5]
+//       [6 6 6 6]
+//       [7 7 7 7]
+//       [8 8 8 8]]
+//      [[1 1 1 1]
+//       [1 1 1 1]
+//       [1 1 1 1]
+//       [1 1 1 1]]
+//      [[5 5 5 5]
+//       [6 6 6 6]
+//       [7 7 7 7]
+//       [8 8 8 8]]
+//      [[1 1 1 1]
+//       [1 1 1 1]
+//       [1 1 1 1]
+//       [1 1 1 1]]]
+//
+// Note that on CPU, if an out of bound index is found, an error is returned.
+// On GPU, if an out of bound index is found, the index is ignored.
+//
+// Arguments:
+//	tensor: Tensor to copy/update.
+//	indices: Index tensor.
+//	updates: Updates to scatter into output.
+//
+// Returns A new tensor with the given shape and updates applied according
+// to the indices.
+func TensorScatterUpdate(scope *Scope, tensor tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorScatterUpdate",
+		Input: []tf.Input{
+			tensor, indices, updates,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that contains `count` elements from the `input_dataset`.
+//
+// Arguments:
+//
+//	count: A scalar representing the number of elements from the `input_dataset`
+// that should be taken. A value of `-1` indicates that all of `input_dataset`
+// is taken.
+//
+//
+func TakeDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "TakeDataset",
+		Input: []tf.Input{
+			input_dataset, count,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the last element of the input list as well as a list with all but that element.
+//
+// Fails if the list is empty.
+//
+// input_handle: the input list
+// tensor: the withdrawn last element of the list
+// element_dtype: the type of elements in the list
+// element_shape: the shape of the output tensor
+func TensorListPopBack(scope *Scope, input_handle tf.Output, element_shape tf.Output, element_dtype tf.DataType) (output_handle tf.Output, tensor tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"element_dtype": element_dtype}
+	opspec := tf.OpSpec{
+		Type: "TensorListPopBack",
+		Input: []tf.Input{
+			input_handle, element_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Adjust the saturation of one or more images.
+//
+// `images` is a tensor of at least 3 dimensions.  The last dimension is
+// interpretted as channels, and must be three.
+//
+// The input image is considered in the RGB colorspace. Conceptually, the RGB
+// colors are first mapped into HSV. A scale is then applied all the saturation
+// values, and then remapped back to RGB colorspace.
+//
+// Arguments:
+//	images: Images to adjust.  At least 3-D.
+//	scale: A float scale to add to the saturation.
+//
+// Returns The hue-adjusted image or images.
+func AdjustSaturation(scope *Scope, images tf.Output, scale tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "AdjustSaturation",
+		Input: []tf.Input{
+			images, scale,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// QueueDequeueManyV2Attr is an optional argument to QueueDequeueManyV2.
+type QueueDequeueManyV2Attr func(optionalAttr)
+
+// QueueDequeueManyV2TimeoutMs sets the optional timeout_ms attribute to value.
+//
+// value: If the queue has fewer than n elements, this operation
+// will block for up to timeout_ms milliseconds.
+// Note: This option is not supported yet.
+// If not specified, defaults to -1
+func QueueDequeueManyV2TimeoutMs(value int64) QueueDequeueManyV2Attr {
+	return func(m optionalAttr) {
+		m["timeout_ms"] = value
+	}
+}
+
+// Dequeues `n` tuples of one or more tensors from the given queue.
+//
+// If the queue is closed and there are fewer than `n` elements, then an
+// OutOfRange error is returned.
+//
+// This operation concatenates queue-element component tensors along the
+// 0th dimension to make a single component tensor.  All of the components
+// in the dequeued tuple will have size `n` in the 0th dimension.
+//
+// This operation has `k` outputs, where `k` is the number of components in
+// the tuples stored in the given queue, and output `i` is the ith
+// component of the dequeued tuple.
+//
+// N.B. If the queue is empty, this operation will block until `n` elements
+// have been dequeued (or 'timeout_ms' elapses, if specified).
+//
+// Arguments:
+//	handle: The handle to a queue.
+//	n: The number of tuples to dequeue.
+//	component_types: The type of each component in a tuple.
+//
+// Returns One or more tensors that were dequeued as a tuple.
+func QueueDequeueManyV2(scope *Scope, handle tf.Output, n tf.Output, component_types []tf.DataType, optional ...QueueDequeueManyV2Attr) (components []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"component_types": component_types}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QueueDequeueManyV2",
+		Input: []tf.Input{
+			handle, n,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("QueueDequeueManyV2", err)
+		return
+	}
+	return components
+}
+
 // Returns x * y element-wise. Returns zero if y is zero, even if x if infinite or NaN.
 //
 // *NOTE*: `MulNoNan` supports broadcasting. More about broadcasting
@@ -20524,392 +19766,280 @@ func MulNoNan(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	return op.Output(0)
 }
 
-// Component-wise divides a SparseTensor by a dense Tensor.
+// AsStringAttr is an optional argument to AsString.
+type AsStringAttr func(optionalAttr)
+
+// AsStringPrecision sets the optional precision attribute to value.
 //
-// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not
-// the other direction.
+// value: The post-decimal precision to use for floating point numbers.
+// Only used if precision > -1.
+// If not specified, defaults to -1
+func AsStringPrecision(value int64) AsStringAttr {
+	return func(m optionalAttr) {
+		m["precision"] = value
+	}
+}
+
+// AsStringScientific sets the optional scientific attribute to value.
+//
+// value: Use scientific notation for floating point numbers.
+// If not specified, defaults to false
+func AsStringScientific(value bool) AsStringAttr {
+	return func(m optionalAttr) {
+		m["scientific"] = value
+	}
+}
+
+// AsStringShortest sets the optional shortest attribute to value.
+//
+// value: Use shortest representation (either scientific or standard) for
+// floating point numbers.
+// If not specified, defaults to false
+func AsStringShortest(value bool) AsStringAttr {
+	return func(m optionalAttr) {
+		m["shortest"] = value
+	}
+}
+
+// AsStringWidth sets the optional width attribute to value.
+//
+// value: Pad pre-decimal numbers to this width.
+// Applies to both floating point and integer numbers.
+// Only used if width > -1.
+// If not specified, defaults to -1
+func AsStringWidth(value int64) AsStringAttr {
+	return func(m optionalAttr) {
+		m["width"] = value
+	}
+}
+
+// AsStringFill sets the optional fill attribute to value.
+//
+// value: The value to pad if width > -1.  If empty, pads with spaces.
+// Another typical value is '0'.  String cannot be longer than 1 character.
+// If not specified, defaults to ""
+func AsStringFill(value string) AsStringAttr {
+	return func(m optionalAttr) {
+		m["fill"] = value
+	}
+}
+
+// Converts each entry in the given tensor to strings.
+//
+// Supports many numeric types and boolean.
+//
+// For Unicode, see the
+// [https://www.tensorflow.org/tutorials/representation/unicode](Working with Unicode text)
+// tutorial.
+func AsString(scope *Scope, input tf.Output, optional ...AsStringAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "AsString",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Conv3DBackpropFilterV2Attr is an optional argument to Conv3DBackpropFilterV2.
+type Conv3DBackpropFilterV2Attr func(optionalAttr)
+
+// Conv3DBackpropFilterV2DataFormat sets the optional data_format attribute to value.
+//
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func Conv3DBackpropFilterV2DataFormat(value string) Conv3DBackpropFilterV2Attr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Conv3DBackpropFilterV2Dilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 5.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each
+// filter element on that dimension. The dimension order is determined by the
+// value of `data_format`, see above for details. Dilations in the batch and
+// depth dimensions must be 1.
+// If not specified, defaults to {i:1 i:1 i:1 i:1 i:1}
+func Conv3DBackpropFilterV2Dilations(value []int64) Conv3DBackpropFilterV2Attr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of 3-D convolution with respect to the filter.
 //
 // Arguments:
-//	sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
-//	sp_shape: 1-D.  Shape of the input SparseTensor.
-//	dense: `R`-D.  The dense Tensor operand.
+//	input: Shape `[batch, depth, rows, cols, in_channels]`.
+//	filter_sizes: An integer vector representing the tensor shape of `filter`,
+// where `filter` is a 5-D
+// `[filter_depth, filter_height, filter_width, in_channels, out_channels]`
+// tensor.
+//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
+// out_channels]`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+func Conv3DBackpropFilterV2(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropFilterV2Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Conv3DBackpropFilterV2",
+		Input: []tf.Input{
+			input, filter_sizes, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns x + y element-wise.
 //
-// Returns 1-D.  The `N` values that are operated on.
-func SparseDenseCwiseDiv(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) {
+// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func AddV2(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseDenseCwiseDiv",
+		Type: "AddV2",
 		Input: []tf.Input{
-			sp_indices, sp_values, sp_shape, dense,
+			x, y,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// LoadTPUEmbeddingADAMParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingADAMParametersGradAccumDebug.
-type LoadTPUEmbeddingADAMParametersGradAccumDebugAttr func(optionalAttr)
+// UniformCandidateSamplerAttr is an optional argument to UniformCandidateSampler.
+type UniformCandidateSamplerAttr func(optionalAttr)
 
-// LoadTPUEmbeddingADAMParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
+// UniformCandidateSamplerSeed sets the optional seed attribute to value.
 //
-// REQUIRES: value >= -1
-func LoadTPUEmbeddingADAMParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingADAMParametersGradAccumDebugAttr {
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func UniformCandidateSamplerSeed(value int64) UniformCandidateSamplerAttr {
 	return func(m optionalAttr) {
-		m["table_id"] = value
+		m["seed"] = value
 	}
 }
 
-// LoadTPUEmbeddingADAMParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingADAMParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingADAMParametersGradAccumDebugAttr {
+// UniformCandidateSamplerSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func UniformCandidateSamplerSeed2(value int64) UniformCandidateSamplerAttr {
 	return func(m optionalAttr) {
-		m["table_name"] = value
+		m["seed2"] = value
 	}
 }
 
-// Load ADAM embedding parameters with debug support.
+// Generates labels for candidate sampling with a uniform distribution.
 //
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
+// See explanations of candidate sampling and the data formats at
+// go/candidate-sampling.
+//
+// For each batch, this op picks a single set of sampled candidate labels.
+//
+// The advantages of sampling candidates per-batch are simplicity and the
+// possibility of efficient dense matrix multiplication. The disadvantage is that
+// the sampled candidates must be chosen independently of the context and of the
+// true labels.
 //
 // Arguments:
-//	parameters: Value of parameters used in the ADAM optimization algorithm.
-//	momenta: Value of momenta used in the ADAM optimization algorithm.
-//	velocities: Value of velocities used in the ADAM optimization algorithm.
-//	gradient_accumulators: Value of gradient_accumulators used in the ADAM optimization algorithm.
+//	true_classes: A batch_size * num_true matrix, in which each row contains the
+// IDs of the num_true target_classes in the corresponding original label.
+//	num_true: Number of true labels per context.
+//	num_sampled: Number of candidates to randomly sample.
+//	unique: If unique is true, we sample with rejection, so that all sampled
+// candidates in a batch are unique. This requires some approximation to
+// estimate the post-rejection sampling probabilities.
+//	range_max: The sampler will sample integers from the interval [0, range_max).
 //
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingADAMParametersGradAccumDebug(scope *Scope, parameters tf.Output, momenta tf.Output, velocities tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingADAMParametersGradAccumDebugAttr) (o *tf.Operation) {
+// Returns:
+//	sampled_candidates: A vector of length num_sampled, in which each element is
+// the ID of a sampled candidate.
+//	true_expected_count: A batch_size * num_true matrix, representing
+// the number of times each candidate is expected to occur in a batch
+// of sampled candidates. If unique=true, then this is a probability.
+//	sampled_expected_count: A vector of length num_sampled, for each sampled
+// candidate representing the number of times the candidate is expected
+// to occur in a batch of sampled candidates.  If unique=true, then this is a
+// probability.
+func UniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...UniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingADAMParametersGradAccumDebug",
+		Type: "UniformCandidateSampler",
 		Input: []tf.Input{
-			parameters, momenta, velocities, gradient_accumulators,
+			true_classes,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
-}
-
-// ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign.
-type ResourceApplyPowerSignAttr func(optionalAttr)
-
-// ResourceApplyPowerSignUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and m tensors is
-// protected by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyPowerSignUseLocking(value bool) ResourceApplyPowerSignAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the AddSign update.
-//
-// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
-// update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g
-// variable <- variable - lr_t * update
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	m: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	logbase: Must be a scalar.
-//	sign_decay: Must be a scalar.
-//	beta: Must be a scalar.
-//	grad: The gradient.
-//
-// Returns the created operation.
-func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, logbase tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyPowerSignAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyPowerSign",
-		Input: []tf.Input{
-			var_, m, lr, logbase, sign_decay, beta, grad,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Inverse 2D real-valued fast Fourier transform.
-//
-// Computes the inverse 2-dimensional discrete Fourier transform of a real-valued
-// signal over the inner-most 2 dimensions of `input`.
-//
-// The inner-most 2 dimensions of `input` are assumed to be the result of `RFFT2D`:
-// The inner-most dimension contains the `fft_length / 2 + 1` unique components of
-// the DFT of a real-valued signal. If `fft_length` is not provided, it is computed
-// from the size of the inner-most 2 dimensions of `input`. If the FFT length used
-// to compute `input` is odd, it should be provided since it cannot be inferred
-// properly.
-//
-// Along each axis `IRFFT2D` is computed on, if `fft_length` (or
-// `fft_length / 2 + 1` for the inner-most dimension) is smaller than the
-// corresponding dimension of `input`, the dimension is cropped. If it is larger,
-// the dimension is padded with zeros.
-//
-// Arguments:
-//	input: A complex64 tensor.
-//	fft_length: An int32 tensor of shape [2]. The FFT length for each dimension.
-//
-// Returns A float32 tensor of the same rank as `input`. The inner-most 2
-//   dimensions of `input` are replaced with the `fft_length` samples of their
-//   inverse 2D Fourier transform.
-//
-// @compatibility(numpy)
-// Equivalent to np.fft.irfft2
-// @end_compatibility
-func IRFFT2D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "IRFFT2D",
-		Input: []tf.Input{
-			input, fft_length,
-		},
-	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// ResourceSparseApplyCenteredRMSPropAttr is an optional argument to ResourceSparseApplyCenteredRMSProp.
-type ResourceSparseApplyCenteredRMSPropAttr func(optionalAttr)
+// TryRpcAttr is an optional argument to TryRpc.
+type TryRpcAttr func(optionalAttr)
 
-// ResourceSparseApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var, mg, ms, and mom tensors is
-// protected by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceSparseApplyCenteredRMSPropUseLocking(value bool) ResourceSparseApplyCenteredRMSPropAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the centered RMSProp algorithm.
-//
-// The centered RMSProp algorithm uses an estimate of the centered second moment
-// (i.e., the variance) for normalization, as opposed to regular RMSProp, which
-// uses the (uncentered) second moment. This often helps with training, but is
-// slightly more expensive in terms of computation and memory.
-//
-// Note that in dense implementation of this algorithm, mg, ms, and mom will
-// update even if the grad is zero, but in this sparse implementation, mg, ms,
-// and mom will not update in iterations during which the grad is zero.
-//
-// mean_square = decay * mean_square + (1-decay) * gradient ** 2
-// mean_grad = decay * mean_grad + (1-decay) * gradient
-// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
-//
-// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-// var <- var - mom
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	mg: Should be from a Variable().
-//	ms: Should be from a Variable().
-//	mom: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	rho: Decay rate. Must be a scalar.
-//
-//	epsilon: Ridge term. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var, ms and mom.
-//
-// Returns the created operation.
-func ResourceSparseApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyCenteredRMSPropAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyCenteredRMSProp",
-		Input: []tf.Input{
-			var_, mg, ms, mom, lr, rho, momentum, epsilon, grad, indices,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// ResourceSparseApplyRMSPropAttr is an optional argument to ResourceSparseApplyRMSProp.
-type ResourceSparseApplyRMSPropAttr func(optionalAttr)
-
-// ResourceSparseApplyRMSPropUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var, ms, and mom tensors is protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceSparseApplyRMSPropUseLocking(value bool) ResourceSparseApplyRMSPropAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the RMSProp algorithm.
-//
-// Note that in dense implementation of this algorithm, ms and mom will
-// update even if the grad is zero, but in this sparse implementation, ms
-// and mom will not update in iterations during which the grad is zero.
-//
-// mean_square = decay * mean_square + (1-decay) * gradient ** 2
-// Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
-//
-// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-// var <- var - mom
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	ms: Should be from a Variable().
-//	mom: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	rho: Decay rate. Must be a scalar.
-//
-//	epsilon: Ridge term. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var, ms and mom.
-//
-// Returns the created operation.
-func ResourceSparseApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyRMSPropAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyRMSProp",
-		Input: []tf.Input{
-			var_, ms, mom, lr, rho, momentum, epsilon, grad, indices,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// LoadTPUEmbeddingMDLAdagradLightParametersAttr is an optional argument to LoadTPUEmbeddingMDLAdagradLightParameters.
-type LoadTPUEmbeddingMDLAdagradLightParametersAttr func(optionalAttr)
-
-// LoadTPUEmbeddingMDLAdagradLightParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func LoadTPUEmbeddingMDLAdagradLightParametersTableId(value int64) LoadTPUEmbeddingMDLAdagradLightParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingMDLAdagradLightParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingMDLAdagradLightParametersTableName(value string) LoadTPUEmbeddingMDLAdagradLightParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Load MDL Adagrad Light embedding parameters.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the MDL Adagrad Light optimization algorithm.
-//	accumulators: Value of accumulators used in the MDL Adagrad Light optimization algorithm.
-//	weights: Value of weights used in the MDL Adagrad Light optimization algorithm.
-//	benefits: Value of benefits used in the MDL Adagrad Light optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingMDLAdagradLightParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, weights tf.Output, benefits tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMDLAdagradLightParametersAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingMDLAdagradLightParameters",
-		Input: []tf.Input{
-			parameters, accumulators, weights, benefits,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// RpcAttr is an optional argument to Rpc.
-type RpcAttr func(optionalAttr)
-
-// RpcProtocol sets the optional protocol attribute to value.
+// TryRpcProtocol sets the optional protocol attribute to value.
 //
 // value: RPC protocol to use.  Empty string means use the default protocol.
 // Options include 'grpc'.
 // If not specified, defaults to ""
-func RpcProtocol(value string) RpcAttr {
+func TryRpcProtocol(value string) TryRpcAttr {
 	return func(m optionalAttr) {
 		m["protocol"] = value
 	}
 }
 
-// RpcFailFast sets the optional fail_fast attribute to value.
+// TryRpcFailFast sets the optional fail_fast attribute to value.
 //
 // value: `boolean`. If `true` (default), then failures to connect
 // (i.e., the server does not immediately respond) cause an RPC failure.
 // If not specified, defaults to true
-func RpcFailFast(value bool) RpcAttr {
+func TryRpcFailFast(value bool) TryRpcAttr {
 	return func(m optionalAttr) {
 		m["fail_fast"] = value
 	}
 }
 
-// RpcTimeoutInMs sets the optional timeout_in_ms attribute to value.
+// TryRpcTimeoutInMs sets the optional timeout_in_ms attribute to value.
 //
 // value: `int`. If `0` (default), then the kernel will run the RPC
 // request and only time out if the RPC deadline passes or the session times out.
 // If this value is greater than `0`, then the op will raise an exception if
 // the RPC takes longer than `timeout_in_ms`.
 // If not specified, defaults to 0
-func RpcTimeoutInMs(value int64) RpcAttr {
+func TryRpcTimeoutInMs(value int64) TryRpcAttr {
 	return func(m optionalAttr) {
 		m["timeout_in_ms"] = value
 	}
@@ -20921,7 +20051,7 @@ func RpcTimeoutInMs(value int64) RpcAttr {
 // of requests.  RPC requests are defined by three main parameters:
 //
 //   - `address` (the host+port or BNS address of the request)
-//   - `method` (the RPC method name for the request)
+//   - `method` (the method name for the request)
 //   - `request` (the serialized proto string, or vector of strings,
 //      of the RPC request argument).
 //
@@ -20960,10 +20090,12 @@ func RpcTimeoutInMs(value int64) RpcAttr {
 // actual proto objects in memory, so no performance degradation is expected
 // compared to writing custom kernels for this workflow.
 //
-// If the connection fails or the remote worker returns an error
-// status, the op reraises this exception locally.
-//
-// See the `TryRpc` op if you prefer to handle RPC failures manually in the graph.
+// Unlike the standard `Rpc` op, if the connection fails or the remote worker
+// returns an error status, this op does **not** reraise the exception.
+// Instead, the `status_code` and `status_message` entry for the corresponding RPC
+// call is set with the error returned from the RPC call.  The `response` tensor
+// will contain valid response values for those minibatch entries whose RPCs did
+// not fail; the rest of the entries will have empty strings.
 //
 // Arguments:
 //	address: `0-D` or `1-D`.  The address (i.e. host_name:port) of the RPC server.
@@ -20976,8 +20108,12 @@ func RpcTimeoutInMs(value int64) RpcAttr {
 // If this tensor has more than 1 element, then multiple parallel rpc requests
 // are sent.  This argument broadcasts with `address` and `method`.
 //
-// Returns Same shape as `request`. Serialized proto strings: the rpc responses.
-func Rpc(scope *Scope, address tf.Output, method tf.Output, request tf.Output, optional ...RpcAttr) (response tf.Output) {
+// Returns:
+//	response: Same shape as `request`. Serialized proto strings: the rpc responses.
+//	status_code: Same shape as `request`.  Values correspond to tensorflow Status enum codes.
+//	status_message: Same shape as `request`.  Values correspond to Status messages
+// returned from the RPC calls.
+func TryRpc(scope *Scope, address tf.Output, method tf.Output, request tf.Output, optional ...TryRpcAttr) (response tf.Output, status_code tf.Output, status_message tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -20986,621 +20122,62 @@ func Rpc(scope *Scope, address tf.Output, method tf.Output, request tf.Output, o
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Rpc",
+		Type: "TryRpc",
 		Input: []tf.Input{
 			address, method, request,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// DepthToSpaceAttr is an optional argument to DepthToSpace.
-type DepthToSpaceAttr func(optionalAttr)
-
-// DepthToSpaceDataFormat sets the optional data_format attribute to value.
-// If not specified, defaults to "NHWC"
-func DepthToSpaceDataFormat(value string) DepthToSpaceAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// DepthToSpace for tensors of type T.
-//
-// Rearranges data from depth into blocks of spatial data.
-// This is the reverse transformation of SpaceToDepth. More specifically,
-// this op outputs a copy of the input tensor where values from the `depth`
-// dimension are moved in spatial blocks to the `height` and `width` dimensions.
-// The attr `block_size` indicates the input block size and how the data is moved.
-//
-//   * Chunks of data of size `block_size * block_size` from depth are rearranged
-//     into non-overlapping blocks of size `block_size x block_size`
-//   * The width the output tensor is `input_depth * block_size`, whereas the
-//     height is `input_height * block_size`.
-//   * The Y, X coordinates within each block of the output image are determined
-//     by the high order component of the input channel index.
-//   * The depth of the input tensor must be divisible by
-//     `block_size * block_size`.
-//
-// The `data_format` attr specifies the layout of the input and output tensors
-// with the following options:
-//   "NHWC": `[ batch, height, width, channels ]`
-//   "NCHW": `[ batch, channels, height, width ]`
-//   "NCHW_VECT_C":
-//       `qint8 [ batch, channels / 4, height, width, 4 ]`
-//
-// It is useful to consider the operation as transforming a 6-D Tensor.
-// e.g. for data_format = NHWC,
-//      Each element in the input tensor can be specified via 6 coordinates,
-//      ordered by decreasing memory layout significance as:
-//      n,iY,iX,bY,bX,oC  (where n=batch index, iX, iY means X or Y coordinates
-//                         within the input image, bX, bY means coordinates
-//                         within the output block, oC means output channels).
-//      The output would be the input transposed to the following layout:
-//      n,iY,bY,iX,bX,oC
-//
-// This operation is useful for resizing the activations between convolutions
-// (but keeping all data), e.g. instead of pooling. It is also useful for training
-// purely convolutional models.
-//
-// For example, given an input of shape `[1, 1, 1, 4]`, data_format = "NHWC" and
-// block_size = 2:
-//
-// ```
-// x = [[[[1, 2, 3, 4]]]]
-//
-// ```
-//
-// This operation will output a tensor of shape `[1, 2, 2, 1]`:
-//
-// ```
-//    [[[[1], [2]],
-//      [[3], [4]]]]
-// ```
-//
-// Here, the input has a batch of 1 and each batch element has shape `[1, 1, 4]`,
-// the corresponding output will have 2x2 elements and will have a depth of
-// 1 channel (1 = `4 / (block_size * block_size)`).
-// The output element shape is `[2, 2, 1]`.
-//
-// For an input tensor with larger depth, here of shape `[1, 1, 1, 12]`, e.g.
-//
-// ```
-// x = [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]]
-// ```
-//
-// This operation, for block size of 2, will return the following tensor of shape
-// `[1, 2, 2, 3]`
-//
-// ```
-//    [[[[1, 2, 3], [4, 5, 6]],
-//      [[7, 8, 9], [10, 11, 12]]]]
-//
-// ```
-//
-// Similarly, for the following input of shape `[1 2 2 4]`, and a block size of 2:
-//
-// ```
-// x =  [[[[1, 2, 3, 4],
-//        [5, 6, 7, 8]],
-//       [[9, 10, 11, 12],
-//        [13, 14, 15, 16]]]]
-// ```
-//
-// the operator will return the following tensor of shape `[1 4 4 1]`:
-//
-// ```
-// x = [[[ [1],   [2],  [5],  [6]],
-//       [ [3],   [4],  [7],  [8]],
-//       [ [9],  [10], [13],  [14]],
-//       [ [11], [12], [15],  [16]]]]
-//
-// ```
-//
-// Arguments:
-//
-//	block_size: The size of the spatial block, same as in Space2Depth.
-func DepthToSpace(scope *Scope, input tf.Output, block_size int64, optional ...DepthToSpaceAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"block_size": block_size}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DepthToSpace",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingMomentumParametersGradAccumDebug.
-type LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr func(optionalAttr)
-
-// LoadTPUEmbeddingMomentumParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func LoadTPUEmbeddingMomentumParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingMomentumParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingMomentumParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Load Momentum embedding parameters with debug support.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the Momentum optimization algorithm.
-//	momenta: Value of momenta used in the Momentum optimization algorithm.
-//	gradient_accumulators: Value of gradient_accumulators used in the Momentum optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingMomentumParametersGradAccumDebug(scope *Scope, parameters tf.Output, momenta tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingMomentumParametersGradAccumDebug",
-		Input: []tf.Input{
-			parameters, momenta, gradient_accumulators,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// FixedUnigramCandidateSamplerAttr is an optional argument to FixedUnigramCandidateSampler.
-type FixedUnigramCandidateSamplerAttr func(optionalAttr)
-
-// FixedUnigramCandidateSamplerVocabFile sets the optional vocab_file attribute to value.
-//
-// value: Each valid line in this file (which should have a CSV-like format)
-// corresponds to a valid word ID. IDs are in sequential order, starting from
-// num_reserved_ids. The last entry in each line is expected to be a value
-// corresponding to the count or relative probability. Exactly one of vocab_file
-// and unigrams needs to be passed to this op.
-// If not specified, defaults to ""
-func FixedUnigramCandidateSamplerVocabFile(value string) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["vocab_file"] = value
-	}
-}
-
-// FixedUnigramCandidateSamplerDistortion sets the optional distortion attribute to value.
-//
-// value: The distortion is used to skew the unigram probability distribution.
-// Each weight is first raised to the distortion's power before adding to the
-// internal unigram distribution. As a result, distortion = 1.0 gives regular
-// unigram sampling (as defined by the vocab file), and distortion = 0.0 gives
-// a uniform distribution.
-// If not specified, defaults to 1
-func FixedUnigramCandidateSamplerDistortion(value float32) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["distortion"] = value
-	}
-}
-
-// FixedUnigramCandidateSamplerNumReservedIds sets the optional num_reserved_ids attribute to value.
-//
-// value: Optionally some reserved IDs can be added in the range [0,
-// ..., num_reserved_ids) by the users. One use case is that a special unknown
-// word token is used as ID 0. These IDs will have a sampling probability of 0.
-// If not specified, defaults to 0
-func FixedUnigramCandidateSamplerNumReservedIds(value int64) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["num_reserved_ids"] = value
-	}
-}
-
-// FixedUnigramCandidateSamplerNumShards sets the optional num_shards attribute to value.
-//
-// value: A sampler can be used to sample from a subset of the original range
-// in order to speed up the whole computation through parallelism. This parameter
-// (together with 'shard') indicates the number of partitions that are being
-// used in the overall computation.
-// If not specified, defaults to 1
-//
-// REQUIRES: value >= 1
-func FixedUnigramCandidateSamplerNumShards(value int64) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["num_shards"] = value
-	}
-}
-
-// FixedUnigramCandidateSamplerShard sets the optional shard attribute to value.
-//
-// value: A sampler can be used to sample from a subset of the original range
-// in order to speed up the whole computation through parallelism. This parameter
-// (together with 'num_shards') indicates the particular partition number of a
-// sampler op, when partitioning is being used.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func FixedUnigramCandidateSamplerShard(value int64) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["shard"] = value
-	}
-}
-
-// FixedUnigramCandidateSamplerUnigrams sets the optional unigrams attribute to value.
-//
-// value: A list of unigram counts or probabilities, one per ID in sequential
-// order. Exactly one of vocab_file and unigrams should be passed to this op.
-// If not specified, defaults to <>
-func FixedUnigramCandidateSamplerUnigrams(value []float32) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["unigrams"] = value
-	}
-}
-
-// FixedUnigramCandidateSamplerSeed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func FixedUnigramCandidateSamplerSeed(value int64) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// FixedUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value.
-//
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func FixedUnigramCandidateSamplerSeed2(value int64) FixedUnigramCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Generates labels for candidate sampling with a learned unigram distribution.
-//
-// A unigram sampler could use a fixed unigram distribution read from a
-// file or passed in as an in-memory array instead of building up the distribution
-// from data on the fly. There is also an option to skew the distribution by
-// applying a distortion power to the weights.
-//
-// The vocabulary file should be in CSV-like format, with the last field
-// being the weight associated with the word.
-//
-// For each batch, this op picks a single set of sampled candidate labels.
-//
-// The advantages of sampling candidates per-batch are simplicity and the
-// possibility of efficient dense matrix multiplication. The disadvantage is that
-// the sampled candidates must be chosen independently of the context and of the
-// true labels.
-//
-// Arguments:
-//	true_classes: A batch_size * num_true matrix, in which each row contains the
-// IDs of the num_true target_classes in the corresponding original label.
-//	num_true: Number of true labels per context.
-//	num_sampled: Number of candidates to randomly sample.
-//	unique: If unique is true, we sample with rejection, so that all sampled
-// candidates in a batch are unique. This requires some approximation to
-// estimate the post-rejection sampling probabilities.
-//	range_max: The sampler will sample integers from the interval [0, range_max).
-//
-// Returns A vector of length num_sampled, in which each element is
-// the ID of a sampled candidate.A batch_size * num_true matrix, representing
-// the number of times each candidate is expected to occur in a batch
-// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
-// candidate representing the number of times the candidate is expected
-// to occur in a batch of sampled candidates.  If unique=true, then this is a
-// probability.
-func FixedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...FixedUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FixedUnigramCandidateSampler",
-		Input: []tf.Input{
-			true_classes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Outputs deterministic pseudorandom random integers from a uniform distribution.
-//
-// The generated values follow a uniform distribution in the range `[minval, maxval)`.
-//
-// The outputs are a deterministic function of `shape`, `seed`, `minval`, and `maxval`.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
-//	minval: Minimum value (inclusive, scalar).
-//	maxval: Maximum value (exclusive, scalar).
-//
-// Returns Random values with specified shape.
-func StatelessRandomUniformInt(scope *Scope, shape tf.Output, seed tf.Output, minval tf.Output, maxval tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "StatelessRandomUniformInt",
-		Input: []tf.Input{
-			shape, seed, minval, maxval,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
+// ResourceGatherAttr is an optional argument to ResourceGather.
+type ResourceGatherAttr func(optionalAttr)
 
-// Inverse real-valued fast Fourier transform.
-//
-// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued
-// signal over the inner-most dimension of `input`.
-//
-// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the
-// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If
-// `fft_length` is not provided, it is computed from the size of the inner-most
-// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to
-// compute `input` is odd, it should be provided since it cannot be inferred
-// properly.
-//
-// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller
-// than the corresponding dimension of `input`, the dimension is cropped. If it is
-// larger, the dimension is padded with zeros.
-//
-// Arguments:
-//	input: A complex64 tensor.
-//	fft_length: An int32 tensor of shape [1]. The FFT length.
-//
-// Returns A float32 tensor of the same rank as `input`. The inner-most
-//   dimension of `input` is replaced with the `fft_length` samples of its inverse
-//   1D Fourier transform.
-//
-// @compatibility(numpy)
-// Equivalent to np.fft.irfft
-// @end_compatibility
-func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "IRFFT",
-		Input: []tf.Input{
-			input, fft_length,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MaxPoolGradWithArgmaxAttr is an optional argument to MaxPoolGradWithArgmax.
-type MaxPoolGradWithArgmaxAttr func(optionalAttr)
-
-// MaxPoolGradWithArgmaxIncludeBatchInIndex sets the optional include_batch_in_index attribute to value.
-//
-// value: Whether to include batch dimension in flattened index of `argmax`.
-// If not specified, defaults to false
-func MaxPoolGradWithArgmaxIncludeBatchInIndex(value bool) MaxPoolGradWithArgmaxAttr {
+// ResourceGatherBatchDims sets the optional batch_dims attribute to value.
+// If not specified, defaults to 0
+func ResourceGatherBatchDims(value int64) ResourceGatherAttr {
 	return func(m optionalAttr) {
-		m["include_batch_in_index"] = value
+		m["batch_dims"] = value
 	}
 }
 
-// Computes gradients of the maxpooling function.
-//
-// Arguments:
-//	input: The original input.
-//	grad: 4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t. the
-// output of `max_pool`.
-//	argmax: The indices of the maximum values chosen for each output of `max_pool`.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
-//
-// Returns Gradients w.r.t. the input of `max_pool`.
-func MaxPoolGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradWithArgmaxAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MaxPoolGradWithArgmax",
-		Input: []tf.Input{
-			input, grad, argmax,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// CompilationResultProto indicating the status of the TPU compilation.
-func TPUCompilationResult(scope *Scope) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TPUCompilationResult",
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingMomentumParametersGradAccumDebug.
-type RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Retrieve Momentum embedding parameters with debug support.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns Parameter parameters updated by the Momentum optimization algorithm.Parameter momenta updated by the Momentum optimization algorithm.Parameter gradient_accumulators updated by the Momentum optimization algorithm.
-func RetrieveTPUEmbeddingMomentumParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr) (parameters tf.Output, momenta tf.Output, gradient_accumulators tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingMomentumParametersGradAccumDebug",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// LoadTPUEmbeddingADAMParametersAttr is an optional argument to LoadTPUEmbeddingADAMParameters.
-type LoadTPUEmbeddingADAMParametersAttr func(optionalAttr)
-
-// LoadTPUEmbeddingADAMParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func LoadTPUEmbeddingADAMParametersTableId(value int64) LoadTPUEmbeddingADAMParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingADAMParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingADAMParametersTableName(value string) LoadTPUEmbeddingADAMParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Load ADAM embedding parameters.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the ADAM optimization algorithm.
-//	momenta: Value of momenta used in the ADAM optimization algorithm.
-//	velocities: Value of velocities used in the ADAM optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingADAMParameters(scope *Scope, parameters tf.Output, momenta tf.Output, velocities tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingADAMParametersAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingADAMParameters",
-		Input: []tf.Input{
-			parameters, momenta, velocities,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// SetSizeAttr is an optional argument to SetSize.
-type SetSizeAttr func(optionalAttr)
-
-// SetSizeValidateIndices sets the optional validate_indices attribute to value.
+// ResourceGatherValidateIndices sets the optional validate_indices attribute to value.
 // If not specified, defaults to true
-func SetSizeValidateIndices(value bool) SetSizeAttr {
+func ResourceGatherValidateIndices(value bool) ResourceGatherAttr {
 	return func(m optionalAttr) {
 		m["validate_indices"] = value
 	}
 }
 
-// Number of unique elements along last dimension of input `set`.
+// Gather slices from the variable pointed to by `resource` according to `indices`.
 //
-// Input `set` is a `SparseTensor` represented by `set_indices`, `set_values`,
-// and `set_shape`. The last dimension contains values in a set, duplicates are
-// allowed but ignored.
+// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
+// Produces an output tensor with shape `indices.shape + params.shape[1:]` where:
 //
-// If `validate_indices` is `True`, this op validates the order and range of `set`
-// indices.
+// ```python
+//     # Scalar indices
+//     output[:, ..., :] = params[indices, :, ... :]
 //
-// Arguments:
-//	set_indices: 2D `Tensor`, indices of a `SparseTensor`.
-//	set_values: 1D `Tensor`, values of a `SparseTensor`.
-//	set_shape: 1D `Tensor`, shape of a `SparseTensor`.
+//     # Vector indices
+//     output[i, :, ..., :] = params[indices[i], :, ... :]
 //
-// Returns For `set` ranked `n`, this is a `Tensor` with rank `n-1`, and the same 1st
-// `n-1` dimensions as `set`. Each value is the number of unique elements in
-// the corresponding `[0...n-1]` dimension of `set`.
-func SetSize(scope *Scope, set_indices tf.Output, set_values tf.Output, set_shape tf.Output, optional ...SetSizeAttr) (size tf.Output) {
+//     # Higher rank indices
+//     output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :]
+// ```
+func ResourceGather(scope *Scope, resource tf.Output, indices tf.Output, dtype tf.DataType, optional ...ResourceGatherAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"dtype": dtype}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "SetSize",
+		Type: "ResourceGather",
 		Input: []tf.Input{
-			set_indices, set_values, set_shape,
+			resource, indices,
 		},
 		Attrs: attrs,
 	}
@@ -21608,398 +20185,180 @@ func SetSize(scope *Scope, set_indices tf.Output, set_values tf.Output, set_shap
 	return op.Output(0)
 }
 
-// RetrieveTPUEmbeddingRMSPropParametersAttr is an optional argument to RetrieveTPUEmbeddingRMSPropParameters.
-type RetrieveTPUEmbeddingRMSPropParametersAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingRMSPropParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
+// Returns x + y element-wise.
 //
-// REQUIRES: value >= -1
-func RetrieveTPUEmbeddingRMSPropParametersTableId(value int64) RetrieveTPUEmbeddingRMSPropParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingRMSPropParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingRMSPropParametersTableName(value string) RetrieveTPUEmbeddingRMSPropParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Retrieve RMSProp embedding parameters.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns Parameter parameters updated by the RMSProp optimization algorithm.Parameter ms updated by the RMSProp optimization algorithm.Parameter mom updated by the RMSProp optimization algorithm.
-func RetrieveTPUEmbeddingRMSPropParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingRMSPropParametersAttr) (parameters tf.Output, ms tf.Output, mom tf.Output) {
+// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Add(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingRMSPropParameters",
-
-		Attrs: attrs,
+		Type: "Add",
+		Input: []tf.Input{
+			x, y,
+		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0)
 }
 
-// UnsortedSegmentJoinAttr is an optional argument to UnsortedSegmentJoin.
-type UnsortedSegmentJoinAttr func(optionalAttr)
-
-// UnsortedSegmentJoinSeparator sets the optional separator attribute to value.
-//
-// value: The separator to use when joining.
-// If not specified, defaults to ""
-func UnsortedSegmentJoinSeparator(value string) UnsortedSegmentJoinAttr {
-	return func(m optionalAttr) {
-		m["separator"] = value
+// Returns element-wise smallest integer not less than x.
+func Ceil(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
+	opspec := tf.OpSpec{
+		Type: "Ceil",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// Joins the elements of `inputs` based on `segment_ids`.
+// Returns element-wise largest integer not greater than x.
+func Floor(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Floor",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the Bessel i0e function of `x` element-wise.
 //
-// Computes the string join along segments of a tensor.
-// Given `segment_ids` with rank `N` and `data` with rank `N+M`:
+// Exponentially scaled modified Bessel function of order 0 defined as
+// `bessel_i0e(x) = exp(-abs(x)) bessel_i0(x)`.
 //
-//     `output[i, k1...kM] = strings.join([data[j1...jN, k1...kM])`
+// This function is faster and numerically stabler than `bessel_i0(x)`.
+func BesselI0e(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BesselI0e",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the trignometric inverse tangent of x element-wise.
 //
-// where the join is over all [j1...jN] such that segment_ids[j1...jN] = i.
-// Strings are joined in row-major order.
+// The `tf.math.atan` operation returns the inverse of `tf.math.tan`, such that
+// if `y = tf.math.tan(x)` then, `x = tf.math.atan(y)`.
+//
+// **Note**: The output of `tf.math.atan` will lie within the invertible range
+// of tan, i.e (-pi/2, pi/2).
 //
 // For example:
 //
 // ```python
-// inputs = [['Y', 'q', 'c'], ['Y', '6', '6'], ['p', 'G', 'a']]
-// output_array = string_ops.unsorted_segment_join(inputs=inputs,
-//                                                 segment_ids=[1, 0, 1],
-//                                                 num_segments=2,
-//                                                 separator=':'))
-// # output_array ==> [['Y', '6', '6'], ['Y:p', 'q:G', 'c:a']]
+// # Note: [1.047, 0.785] ~= [(pi/3), (pi/4)]
+// x = tf.constant([1.047, 0.785])
+// y = tf.math.tan(x) # [1.731261, 0.99920404]
 //
-//
-// inputs = ['this', 'is', 'a', 'test']
-// output_array = string_ops.unsorted_segment_join(inputs=inputs,
-//                                                 segment_ids=[0, 0, 0, 0],
-//                                                 num_segments=1,
-//                                                 separator=':'))
-// # output_array ==> ['this:is:a:test']
+// tf.math.atan(y) # [1.047, 0.785] = x
 // ```
 //
-// Arguments:
-//	inputs: The input to be joined.
-//	segment_ids: A tensor whose shape is a prefix of data.shape.  Negative segment ids are not
-// supported.
-//	num_segments: A scalar.
-func UnsortedSegmentJoin(scope *Scope, inputs tf.Output, segment_ids tf.Output, num_segments tf.Output, optional ...UnsortedSegmentJoinAttr) (output tf.Output) {
+func Atan(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "UnsortedSegmentJoin",
+		Type: "Atan",
 		Input: []tf.Input{
-			inputs, segment_ids, num_segments,
+			x,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// QuantizeAndDequantizeAttr is an optional argument to QuantizeAndDequantize.
-type QuantizeAndDequantizeAttr func(optionalAttr)
-
-// QuantizeAndDequantizeSignedInput sets the optional signed_input attribute to value.
-// If not specified, defaults to true
-func QuantizeAndDequantizeSignedInput(value bool) QuantizeAndDequantizeAttr {
-	return func(m optionalAttr) {
-		m["signed_input"] = value
-	}
-}
-
-// QuantizeAndDequantizeNumBits sets the optional num_bits attribute to value.
-// If not specified, defaults to 8
-func QuantizeAndDequantizeNumBits(value int64) QuantizeAndDequantizeAttr {
-	return func(m optionalAttr) {
-		m["num_bits"] = value
-	}
-}
-
-// QuantizeAndDequantizeRangeGiven sets the optional range_given attribute to value.
-// If not specified, defaults to false
-func QuantizeAndDequantizeRangeGiven(value bool) QuantizeAndDequantizeAttr {
-	return func(m optionalAttr) {
-		m["range_given"] = value
-	}
-}
-
-// QuantizeAndDequantizeInputMin sets the optional input_min attribute to value.
-// If not specified, defaults to 0
-func QuantizeAndDequantizeInputMin(value float32) QuantizeAndDequantizeAttr {
-	return func(m optionalAttr) {
-		m["input_min"] = value
-	}
-}
-
-// QuantizeAndDequantizeInputMax sets the optional input_max attribute to value.
-// If not specified, defaults to 0
-func QuantizeAndDequantizeInputMax(value float32) QuantizeAndDequantizeAttr {
-	return func(m optionalAttr) {
-		m["input_max"] = value
-	}
-}
-
-// Use QuantizeAndDequantizeV2 instead.
-//
-// DEPRECATED at GraphDef version 22: Replaced by QuantizeAndDequantizeV2
-func QuantizeAndDequantize(scope *Scope, input tf.Output, optional ...QuantizeAndDequantizeAttr) (output tf.Output) {
+// Computes acos of x element-wise.
+func Acos(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "QuantizeAndDequantize",
+		Type: "Acos",
 		Input: []tf.Input{
-			input,
+			x,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Subtracts sparse updates from the variable referenced by `resource`.
+// FusedBatchNormV2Attr is an optional argument to FusedBatchNormV2.
+type FusedBatchNormV2Attr func(optionalAttr)
+
+// FusedBatchNormV2Epsilon sets the optional epsilon attribute to value.
 //
-// This operation computes
-//
-//     # Scalar indices
-//     ref[indices, ...] -= updates[...]
-//
-//     # Vector indices (for each i)
-//     ref[indices[i], ...] -= updates[i, ...]
-//
-//     # High rank indices (for each i, ..., j)
-//     ref[indices[i, ..., j], ...] -= updates[i, ..., j, ...]
-//
-// Duplicate entries are handled correctly: if multiple `indices` reference
-// the same location, their contributions add.
-//
-// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
-// </div>
-//
-// Arguments:
-//	resource: Should be from a `Variable` node.
-//	indices: A tensor of indices into the first dimension of `ref`.
-//	updates: A tensor of updated values to add to `ref`.
-//
-// Returns the created operation.
-func ResourceScatterSub(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
+// value: A small float number added to the variance of x.
+// If not specified, defaults to 0.0001
+func FusedBatchNormV2Epsilon(value float32) FusedBatchNormV2Attr {
+	return func(m optionalAttr) {
+		m["epsilon"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "ResourceScatterSub",
-		Input: []tf.Input{
-			resource, indices, updates,
-		},
-	}
-	return scope.AddOperation(opspec)
 }
 
-// AvgPool3DGradAttr is an optional argument to AvgPool3DGrad.
-type AvgPool3DGradAttr func(optionalAttr)
-
-// AvgPool3DGradDataFormat sets the optional data_format attribute to value.
+// FusedBatchNormV2DataFormat sets the optional data_format attribute to value.
 //
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func AvgPool3DGradDataFormat(value string) AvgPool3DGradAttr {
+// value: The data format for x and y. Either "NHWC" (default) or "NCHW".
+// If not specified, defaults to "NHWC"
+func FusedBatchNormV2DataFormat(value string) FusedBatchNormV2Attr {
 	return func(m optionalAttr) {
 		m["data_format"] = value
 	}
 }
 
-// Computes gradients of average pooling function.
+// FusedBatchNormV2IsTraining sets the optional is_training attribute to value.
+//
+// value: A bool value to indicate the operation is for training (default)
+// or inference.
+// If not specified, defaults to true
+func FusedBatchNormV2IsTraining(value bool) FusedBatchNormV2Attr {
+	return func(m optionalAttr) {
+		m["is_training"] = value
+	}
+}
+
+// Batch normalization.
+//
+// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
+// The size of 1D Tensors matches the dimension C of the 4D Tensors.
 //
 // Arguments:
-//	orig_input_shape: The original input dimensions.
-//	grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
-//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
-// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
+//	x: A 4D Tensor for input data.
+//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
+//	offset: A 1D Tensor for offset, to shift to the normalized x.
+//	mean: A 1D Tensor for population mean. Used for inference only;
+// must be empty for training.
+//	variance: A 1D Tensor for population variance. Used for inference only;
+// must be empty for training.
 //
-// Returns The backprop for input.
-func AvgPool3DGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DGradAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "AvgPool3DGrad",
-		Input: []tf.Input{
-			orig_input_shape, grad,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SampleDistortedBoundingBoxV2Attr is an optional argument to SampleDistortedBoundingBoxV2.
-type SampleDistortedBoundingBoxV2Attr func(optionalAttr)
-
-// SampleDistortedBoundingBoxV2Seed sets the optional seed attribute to value.
-//
-// value: If either `seed` or `seed2` are set to non-zero, the random number
-// generator is seeded by the given `seed`.  Otherwise, it is seeded by a random
-// seed.
-// If not specified, defaults to 0
-func SampleDistortedBoundingBoxV2Seed(value int64) SampleDistortedBoundingBoxV2Attr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxV2Seed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func SampleDistortedBoundingBoxV2Seed2(value int64) SampleDistortedBoundingBoxV2Attr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxV2AspectRatioRange sets the optional aspect_ratio_range attribute to value.
-//
-// value: The cropped area of the image must have an aspect ratio =
-// width / height within this range.
-// If not specified, defaults to <f:0.75 f:1.33 >
-func SampleDistortedBoundingBoxV2AspectRatioRange(value []float32) SampleDistortedBoundingBoxV2Attr {
-	return func(m optionalAttr) {
-		m["aspect_ratio_range"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxV2AreaRange sets the optional area_range attribute to value.
-//
-// value: The cropped area of the image must contain a fraction of the
-// supplied image within this range.
-// If not specified, defaults to <f:0.05 f:1 >
-func SampleDistortedBoundingBoxV2AreaRange(value []float32) SampleDistortedBoundingBoxV2Attr {
-	return func(m optionalAttr) {
-		m["area_range"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxV2MaxAttempts sets the optional max_attempts attribute to value.
-//
-// value: Number of attempts at generating a cropped region of the image
-// of the specified constraints. After `max_attempts` failures, return the entire
-// image.
-// If not specified, defaults to 100
-func SampleDistortedBoundingBoxV2MaxAttempts(value int64) SampleDistortedBoundingBoxV2Attr {
-	return func(m optionalAttr) {
-		m["max_attempts"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxV2UseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value.
-//
-// value: Controls behavior if no bounding boxes supplied.
-// If true, assume an implicit bounding box covering the whole input. If false,
-// raise an error.
-// If not specified, defaults to false
-func SampleDistortedBoundingBoxV2UseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxV2Attr {
-	return func(m optionalAttr) {
-		m["use_image_if_no_bounding_boxes"] = value
-	}
-}
-
-// Generate a single randomly distorted bounding box for an image.
-//
-// Bounding box annotations are often supplied in addition to ground-truth labels
-// in image recognition or object localization tasks. A common technique for
-// training such a system is to randomly distort an image while preserving
-// its content, i.e. *data augmentation*. This Op outputs a randomly distorted
-// localization of an object, i.e. bounding box, given an `image_size`,
-// `bounding_boxes` and a series of constraints.
-//
-// The output of this Op is a single bounding box that may be used to crop the
-// original image. The output is returned as 3 tensors: `begin`, `size` and
-// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
-// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize
-// what the bounding box looks like.
-//
-// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The
-// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
-// height of the underlying image.
-//
-// For example,
-//
-// ```python
-//     # Generate a single distorted bounding box.
-//     begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
-//         tf.shape(image),
-//         bounding_boxes=bounding_boxes)
-//
-//     # Draw the bounding box in an image summary.
-//     image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
-//                                                   bbox_for_draw)
-//     tf.summary.image('images_with_box', image_with_box)
-//
-//     # Employ the bounding box to distort the image.
-//     distorted_image = tf.slice(image, begin, size)
-// ```
-//
-// Note that if no bounding box information is available, setting
-// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit
-// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
-// false and no bounding boxes are supplied, an error is raised.
-//
-// Arguments:
-//	image_size: 1-D, containing `[height, width, channels]`.
-//	bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes
-// associated with the image.
-//	min_object_covered: The cropped area of the image must contain at least this
-// fraction of any bounding box supplied. The value of this parameter should be
-// non-negative. In the case of 0, the cropped area does not need to overlap
-// any of the bounding boxes supplied.
-//
-// Returns 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to
-// `tf.slice`.1-D, containing `[target_height, target_width, -1]`. Provide as input to
-// `tf.slice`.3-D with shape `[1, 1, 4]` containing the distorted bounding box.
-// Provide as input to `tf.image.draw_bounding_boxes`.
-func SampleDistortedBoundingBoxV2(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, min_object_covered tf.Output, optional ...SampleDistortedBoundingBoxV2Attr) (begin tf.Output, size tf.Output, bboxes tf.Output) {
+// Returns:
+//	y: A 4D Tensor for output data.
+//	batch_mean: A 1D Tensor for the computed batch mean, to be used by TensorFlow
+// to compute the running mean.
+//	batch_variance: A 1D Tensor for the computed batch variance, to be used by
+// TensorFlow to compute the running variance.
+//	reserve_space_1: A 1D Tensor for the computed batch mean, to be reused
+// in the gradient computation.
+//	reserve_space_2: A 1D Tensor for the computed batch variance (inverted variance
+// in the cuDNN case), to be reused in the gradient computation.
+func FusedBatchNormV2(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormV2Attr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -22008,103 +20367,83 @@ func SampleDistortedBoundingBoxV2(scope *Scope, image_size tf.Output, bounding_b
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "SampleDistortedBoundingBoxV2",
+		Type: "FusedBatchNormV2",
 		Input: []tf.Input{
-			image_size, bounding_boxes, min_object_covered,
+			x, scale, offset, mean, variance,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
 }
 
-// MapIncompleteSizeAttr is an optional argument to MapIncompleteSize.
-type MapIncompleteSizeAttr func(optionalAttr)
-
-// MapIncompleteSizeCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
+// Computes sine of x element-wise.
 //
-// REQUIRES: value >= 0
-func MapIncompleteSizeCapacity(value int64) MapIncompleteSizeAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// MapIncompleteSizeMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
+//   Given an input tensor, this function computes sine of every
+//   element in the tensor. Input range is `(-inf, inf)` and
+//   output range is `[-1,1]`.
 //
-// REQUIRES: value >= 0
-func MapIncompleteSizeMemoryLimit(value int64) MapIncompleteSizeAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
+//   ```python
+//   x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 200, 10, float("inf")])
+//   tf.math.sin(x) ==> [nan -0.4121185 -0.47942555 0.84147096 0.9320391 -0.87329733 -0.54402107 nan]
+//   ```
+func Sin(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
+	opspec := tf.OpSpec{
+		Type: "Sin",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// MapIncompleteSizeContainer sets the optional container attribute to value.
+// PrintAttr is an optional argument to Print.
+type PrintAttr func(optionalAttr)
+
+// PrintMessage sets the optional message attribute to value.
+//
+// value: A string, prefix of the error message.
 // If not specified, defaults to ""
-func MapIncompleteSizeContainer(value string) MapIncompleteSizeAttr {
+func PrintMessage(value string) PrintAttr {
 	return func(m optionalAttr) {
-		m["container"] = value
+		m["message"] = value
 	}
 }
 
-// MapIncompleteSizeSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func MapIncompleteSizeSharedName(value string) MapIncompleteSizeAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op returns the number of incomplete elements in the underlying container.
-func MapIncompleteSize(scope *Scope, dtypes []tf.DataType, optional ...MapIncompleteSizeAttr) (size tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MapIncompleteSize",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceSparseApplyProximalGradientDescentAttr is an optional argument to ResourceSparseApplyProximalGradientDescent.
-type ResourceSparseApplyProximalGradientDescentAttr func(optionalAttr)
-
-// ResourceSparseApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value.
+// PrintFirstN sets the optional first_n attribute to value.
 //
-// value: If True, the subtraction will be protected by a lock;
-// otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceSparseApplyProximalGradientDescentUseLocking(value bool) ResourceSparseApplyProximalGradientDescentAttr {
+// value: Only log `first_n` number of times. -1 disables logging.
+// If not specified, defaults to -1
+func PrintFirstN(value int64) PrintAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["first_n"] = value
 	}
 }
 
-// Sparse update '*var' as FOBOS algorithm with fixed learning rate.
+// PrintSummarize sets the optional summarize attribute to value.
 //
-// That is for rows we have grad for, we update var as follows:
-// prox_v = var - alpha * grad
-// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
+// value: Only print this many entries of each tensor.
+// If not specified, defaults to 3
+func PrintSummarize(value int64) PrintAttr {
+	return func(m optionalAttr) {
+		m["summarize"] = value
+	}
+}
+
+// Prints a list of tensors.
+//
+// Passes `input` through to `output` and prints `data` when evaluating.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	alpha: Scaling factor. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
+//	input: The tensor passed to `output`
+//	data: A list of tensors to print out when op is evaluated.
 //
-// Returns the created operation.
-func ResourceSparseApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalGradientDescentAttr) (o *tf.Operation) {
+// Returns = The unmodified `input` tensor
+func Print(scope *Scope, input tf.Output, data []tf.Output, optional ...PrintAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -22113,87 +20452,9 @@ func ResourceSparseApplyProximalGradientDescent(scope *Scope, var_ tf.Output, al
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyProximalGradientDescent",
+		Type: "Print",
 		Input: []tf.Input{
-			var_, alpha, l1, l2, grad, indices,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// CumprodAttr is an optional argument to Cumprod.
-type CumprodAttr func(optionalAttr)
-
-// CumprodExclusive sets the optional exclusive attribute to value.
-//
-// value: If `True`, perform exclusive cumprod.
-// If not specified, defaults to false
-func CumprodExclusive(value bool) CumprodAttr {
-	return func(m optionalAttr) {
-		m["exclusive"] = value
-	}
-}
-
-// CumprodReverse sets the optional reverse attribute to value.
-//
-// value: A `bool` (default: False).
-// If not specified, defaults to false
-func CumprodReverse(value bool) CumprodAttr {
-	return func(m optionalAttr) {
-		m["reverse"] = value
-	}
-}
-
-// Compute the cumulative product of the tensor `x` along `axis`.
-//
-// By default, this op performs an inclusive cumprod, which means that the first
-// element of the input is identical to the first element of the output:
-//
-// ```python
-// tf.cumprod([a, b, c])  # => [a, a * b, a * b * c]
-// ```
-//
-// By setting the `exclusive` kwarg to `True`, an exclusive cumprod is
-// performed instead:
-//
-// ```python
-// tf.cumprod([a, b, c], exclusive=True)  # => [1, a, a * b]
-// ```
-//
-// By setting the `reverse` kwarg to `True`, the cumprod is performed in the
-// opposite direction:
-//
-// ```python
-// tf.cumprod([a, b, c], reverse=True)  # => [a * b * c, b * c, c]
-// ```
-//
-// This is more efficient than using separate `tf.reverse` ops.
-//
-// The `reverse` and `exclusive` kwargs can also be combined:
-//
-// ```python
-// tf.cumprod([a, b, c], exclusive=True, reverse=True)  # => [b * c, c, 1]
-// ```
-//
-// Arguments:
-//	x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
-// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
-// `complex128`, `qint8`, `quint8`, `qint32`, `half`.
-//	axis: A `Tensor` of type `int32` (default: 0). Must be in the range
-// `[-rank(x), rank(x))`.
-func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) (out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Cumprod",
-		Input: []tf.Input{
-			x, axis,
+			input, tf.OutputList(data),
 		},
 		Attrs: attrs,
 	}
@@ -22201,64 +20462,170 @@ func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr)
 	return op.Output(0)
 }
 
-// Generate a glob pattern matching all sharded file names.
-func ShardedFilespec(scope *Scope, basename tf.Output, num_shards tf.Output) (filename tf.Output) {
+// Computes the Approximate Minimum Degree (AMD) ordering of `input`.
+//
+// Computes the Approximate Minimum Degree (AMD) ordering for a sparse matrix.
+//
+// The returned permutation may be used to permute the rows and columns of the
+// given sparse matrix. This typically results in permuted sparse matrix's sparse
+// Cholesky (or other decompositions) in having fewer zero fill-in compared to
+// decomposition of the original matrix.
+//
+// The input sparse matrix may have rank 2 or rank 3. The output Tensor,
+// representing would then have rank 1 or 2 respectively, with the same batch
+// shape as the input.
+//
+// Each component of the input sparse matrix must represent a square symmetric
+// matrix; only the lower triangular part of the matrix is read. The values of the
+// sparse matrix does not affect the returned permutation, only the sparsity
+// pattern of the sparse matrix is used. Hence, a single AMD ordering may be
+// reused for the Cholesky decompositions of sparse matrices with the same sparsity
+// pattern but with possibly different values.
+//
+// Each batch component of the output permutation represents a permutation of `N`
+// elements, where the input sparse matrix components each have `N` rows. That is,
+// the component contains each of the integers `{0, .. N-1}` exactly once. The
+// `i`th element represents the row index that the `i`th row maps to.
+//
+// Usage example:
+//
+// ```python
+//     from tensorflow.python.ops.linalg.sparse import sparse_csr_matrix_ops
+//
+//     a_indices = np.array([[0, 0], [1, 1], [2, 1], [2, 2], [3, 3]])
+//     a_values = np.array([1.0, 2.0, 1.0, 3.0, 4.0], np.float32)
+//     a_dense_shape = [4, 4]
+//
+//     with tf.Session() as sess:
+//       # Define (COO format) SparseTensor over Numpy array.
+//       a_st = tf.SparseTensor(a_indices, a_values, a_dense_shape)
+//
+//       # Convert SparseTensors to CSR SparseMatrix.
+//       a_sm = sparse_csr_matrix_ops.sparse_tensor_to_csr_sparse_matrix(
+//           a_st.indices, a_st.values, a_st.dense_shape)
+//
+//       # Obtain the AMD Ordering for the CSR SparseMatrix.
+//       ordering_amd = sparse_csr_matrix_ops.sparse_matrix_ordering_amd(sparse_matrix)
+//
+//       ordering_amd_value = sess.run(ordering_amd)
+// ```
+//
+// `ordering_amd_value` stores the AMD ordering: `[1 2 3 0]`.
+//
+// input: A `CSRSparseMatrix`.
+//
+// Arguments:
+//	input: A `CSRSparseMatrix`.
+//
+// Returns The Approximate Minimum Degree (AMD) ordering of `input`.
+func SparseMatrixOrderingAMD(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ShardedFilespec",
+		Type: "SparseMatrixOrderingAMD",
 		Input: []tf.Input{
-			basename, num_shards,
+			input,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ResourceApplyKerasMomentumAttr is an optional argument to ResourceApplyKerasMomentum.
-type ResourceApplyKerasMomentumAttr func(optionalAttr)
-
-// ResourceApplyKerasMomentumUseLocking sets the optional use_locking attribute to value.
+// Computes Psi, the derivative of Lgamma (the log of the absolute value of
 //
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyKerasMomentumUseLocking(value bool) ResourceApplyKerasMomentumAttr {
+// `Gamma(x)`), element-wise.
+func Digamma(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Digamma",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the gradient for the tanh of `x` wrt its input.
+//
+// Specifically, `grad = dy * (1 - y*y)`, where `y = tanh(x)`, and `dy`
+// is the corresponding input gradient.
+func TanhGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TanhGrad",
+		Input: []tf.Input{
+			y, dy,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// FusedBatchNormAttr is an optional argument to FusedBatchNorm.
+type FusedBatchNormAttr func(optionalAttr)
+
+// FusedBatchNormEpsilon sets the optional epsilon attribute to value.
+//
+// value: A small float number added to the variance of x.
+// If not specified, defaults to 0.0001
+func FusedBatchNormEpsilon(value float32) FusedBatchNormAttr {
 	return func(m optionalAttr) {
-		m["use_locking"] = value
+		m["epsilon"] = value
 	}
 }
 
-// ResourceApplyKerasMomentumUseNesterov sets the optional use_nesterov attribute to value.
+// FusedBatchNormDataFormat sets the optional data_format attribute to value.
 //
-// value: If `True`, the tensor passed to compute grad will be
-// var + momentum * accum, so in the end, the var you get is actually
-// var + momentum * accum.
-// If not specified, defaults to false
-func ResourceApplyKerasMomentumUseNesterov(value bool) ResourceApplyKerasMomentumAttr {
+// value: The data format for x and y. Either "NHWC" (default) or "NCHW".
+// If not specified, defaults to "NHWC"
+func FusedBatchNormDataFormat(value string) FusedBatchNormAttr {
 	return func(m optionalAttr) {
-		m["use_nesterov"] = value
+		m["data_format"] = value
 	}
 }
 
-// Update '*var' according to the momentum scheme. Set use_nesterov = True if you
+// FusedBatchNormIsTraining sets the optional is_training attribute to value.
 //
-// want to use Nesterov momentum.
+// value: A bool value to indicate the operation is for training (default)
+// or inference.
+// If not specified, defaults to true
+func FusedBatchNormIsTraining(value bool) FusedBatchNormAttr {
+	return func(m optionalAttr) {
+		m["is_training"] = value
+	}
+}
+
+// Batch normalization.
 //
-// accum = accum * momentum - lr * grad
-// var += accum
+// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
+// The size of 1D Tensors matches the dimension C of the 4D Tensors.
 //
 // Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	grad: The gradient.
-//	momentum: Momentum. Must be a scalar.
+//	x: A 4D Tensor for input data.
+//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
+//	offset: A 1D Tensor for offset, to shift to the normalized x.
+//	mean: A 1D Tensor for population mean. Used for inference only;
+// must be empty for training.
+//	variance: A 1D Tensor for population variance. Used for inference only;
+// must be empty for training.
 //
-// Returns the created operation.
-func ResourceApplyKerasMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyKerasMomentumAttr) (o *tf.Operation) {
+// Returns:
+//	y: A 4D Tensor for output data.
+//	batch_mean: A 1D Tensor for the computed batch mean, to be used by TensorFlow
+// to compute the running mean.
+//	batch_variance: A 1D Tensor for the computed batch variance, to be used by
+// TensorFlow to compute the running variance.
+//	reserve_space_1: A 1D Tensor for the computed batch mean, to be reused
+// in the gradient computation.
+//	reserve_space_2: A 1D Tensor for the computed batch variance (inverted variance
+// in the cuDNN case), to be reused in the gradient computation.
+func FusedBatchNorm(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormAttr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -22267,58 +20634,63 @@ func ResourceApplyKerasMomentum(scope *Scope, var_ tf.Output, accum tf.Output, l
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceApplyKerasMomentum",
+		Type: "FusedBatchNorm",
 		Input: []tf.Input{
-			var_, accum, lr, grad, momentum,
+			x, scale, offset, mean, variance,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
 }
 
-// RandomShuffleAttr is an optional argument to RandomShuffle.
-type RandomShuffleAttr func(optionalAttr)
+// SparseMatMulAttr is an optional argument to SparseMatMul.
+type SparseMatMulAttr func(optionalAttr)
 
-// RandomShuffleSeed sets the optional seed attribute to value.
-//
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func RandomShuffleSeed(value int64) RandomShuffleAttr {
+// SparseMatMulTransposeA sets the optional transpose_a attribute to value.
+// If not specified, defaults to false
+func SparseMatMulTransposeA(value bool) SparseMatMulAttr {
 	return func(m optionalAttr) {
-		m["seed"] = value
+		m["transpose_a"] = value
 	}
 }
 
-// RandomShuffleSeed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomShuffleSeed2(value int64) RandomShuffleAttr {
+// SparseMatMulTransposeB sets the optional transpose_b attribute to value.
+// If not specified, defaults to false
+func SparseMatMulTransposeB(value bool) SparseMatMulAttr {
 	return func(m optionalAttr) {
-		m["seed2"] = value
+		m["transpose_b"] = value
 	}
 }
 
-// Randomly shuffles a tensor along its first dimension.
+// SparseMatMulAIsSparse sets the optional a_is_sparse attribute to value.
+// If not specified, defaults to false
+func SparseMatMulAIsSparse(value bool) SparseMatMulAttr {
+	return func(m optionalAttr) {
+		m["a_is_sparse"] = value
+	}
+}
+
+// SparseMatMulBIsSparse sets the optional b_is_sparse attribute to value.
+// If not specified, defaults to false
+func SparseMatMulBIsSparse(value bool) SparseMatMulAttr {
+	return func(m optionalAttr) {
+		m["b_is_sparse"] = value
+	}
+}
+
+// Multiply matrix "a" by matrix "b".
 //
-//   The tensor is shuffled along dimension 0, such that each `value[j]` is mapped
-//   to one and only one `output[i]`. For example, a mapping that might occur for a
-//   3x2 tensor is:
+// The inputs must be two-dimensional matrices and the inner dimension of "a" must
+// match the outer dimension of "b". Both "a" and "b" must be `Tensor`s not
+// `SparseTensor`s.  This op is optimized for the case where at least one of "a" or
+// "b" is sparse, in the sense that they have a large proportion of zero values.
+// The breakeven for using this versus a dense matrix multiply on one platform was
+// 30% zero values in the sparse matrix.
 //
-// ```
-// [[1, 2],       [[5, 6],
-//  [3, 4],  ==>   [1, 2],
-//  [5, 6]]        [3, 4]]
-// ```
-//
-// Arguments:
-//	value: The tensor to be shuffled.
-//
-// Returns A tensor of same shape and type as `value`, shuffled along its first
-// dimension.
-func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) {
+// The gradient computation of this operation will only take advantage of sparsity
+// in the input gradient when that gradient comes from a Relu.
+func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatMulAttr) (product tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -22327,9 +20699,9 @@ func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr)
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "RandomShuffle",
+		Type: "SparseMatMul",
 		Input: []tf.Input{
-			value,
+			a, b,
 		},
 		Attrs: attrs,
 	}
@@ -22337,6 +20709,97 @@ func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr)
 	return op.Output(0)
 }
 
+// Resizes the list.
+//
+//
+// input_handle: the input list
+// size: size of the output list
+//
+func TensorListResize(scope *Scope, input_handle tf.Output, size tf.Output) (output_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorListResize",
+		Input: []tf.Input{
+			input_handle, size,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes inverse hyperbolic tangent of x element-wise.
+//
+//   Given an input tensor, this function computes inverse hyperbolic tangent
+//   for every element in the tensor. Input range is `[-1,1]` and output range is
+//   `[-inf, inf]`. If input is `-1`, output will be `-inf` and if the
+//   input is `1`, output will be `inf`. Values outside the range will have
+//   `nan` as output.
+//
+//   ```python
+//   x = tf.constant([-float("inf"), -1, -0.5, 1, 0, 0.5, 10, float("inf")])
+//   tf.math.atanh(x) ==> [nan -inf -0.54930615 inf  0. 0.54930615 nan nan]
+//   ```
+func Atanh(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Atanh",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Serializes the tree ensemble to a proto.
+//
+// Arguments:
+//	tree_ensemble_handle: Handle to the tree ensemble.
+//
+// Returns:
+//	stamp_token: Stamp token of the tree ensemble resource.
+//	tree_ensemble_serialized: Serialized proto of the ensemble.
+func BoostedTreesSerializeEnsemble(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, tree_ensemble_serialized tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesSerializeEnsemble",
+		Input: []tf.Input{
+			tree_ensemble_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Computes inverse hyperbolic cosine of x element-wise.
+//
+// Given an input tensor, the function computes inverse hyperbolic cosine of every element.
+// Input range is `[1, inf]`. It returns `nan` if the input lies outside the range.
+//
+// ```python
+// x = tf.constant([-2, -0.5, 1, 1.2, 200, 10000, float("inf")])
+// tf.math.acosh(x) ==> [nan nan 0. 0.62236255 5.9914584 9.903487 inf]
+// ```
+func Acosh(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Acosh",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // OutfeedDequeueAttr is an optional argument to OutfeedDequeue.
 type OutfeedDequeueAttr func(optionalAttr)
 
@@ -22378,352 +20841,54 @@ func OutfeedDequeue(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ..
 	return op.Output(0)
 }
 
-// SkipgramAttr is an optional argument to Skipgram.
-type SkipgramAttr func(optionalAttr)
+// CropAndResizeGradImageAttr is an optional argument to CropAndResizeGradImage.
+type CropAndResizeGradImageAttr func(optionalAttr)
 
-// SkipgramWindowSize sets the optional window_size attribute to value.
+// CropAndResizeGradImageMethod sets the optional method attribute to value.
 //
-// value: The number of words to predict to the left and right of the target.
-// If not specified, defaults to 5
-func SkipgramWindowSize(value int64) SkipgramAttr {
+// value: A string specifying the interpolation method. Only 'bilinear' is
+// supported for now.
+// If not specified, defaults to "bilinear"
+func CropAndResizeGradImageMethod(value string) CropAndResizeGradImageAttr {
 	return func(m optionalAttr) {
-		m["window_size"] = value
+		m["method"] = value
 	}
 }
 
-// SkipgramMinCount sets the optional min_count attribute to value.
-//
-// value: The minimum number of word occurrences for it to be included in the
-// vocabulary.
-// If not specified, defaults to 5
-func SkipgramMinCount(value int64) SkipgramAttr {
-	return func(m optionalAttr) {
-		m["min_count"] = value
-	}
-}
-
-// SkipgramSubsample sets the optional subsample attribute to value.
-//
-// value: Threshold for word occurrence. Words that appear with higher
-// frequency will be randomly down-sampled. Set to 0 to disable.
-// If not specified, defaults to 0.001
-func SkipgramSubsample(value float32) SkipgramAttr {
-	return func(m optionalAttr) {
-		m["subsample"] = value
-	}
-}
-
-// Parses a text file and creates a batch of examples.
-//
-// DEPRECATED at GraphDef version 19: Moving word2vec into tensorflow_models/tutorials and deprecating its ops here as a result
+// Computes the gradient of the crop_and_resize op wrt the input image tensor.
 //
 // Arguments:
-//	filename: The corpus's text file name.
-//	batch_size: The size of produced batch.
+//	grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
+//	boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
+// specifies the coordinates of a box in the `box_ind[i]` image and is specified
+// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
+// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
+// `[0, 1]` interval of normalized image height is mapped to
+// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in
+// which case the sampled crop is an up-down flipped version of the original
+// image. The width dimension is treated similarly. Normalized coordinates
+// outside the `[0, 1]` range are allowed, in which case we use
+// `extrapolation_value` to extrapolate the input image values.
+//	box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
+// The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
+//	image_size: A 1-D tensor with value `[batch, image_height, image_width, depth]`
+// containing the original image size. Both `image_height` and `image_width` need
+// to be positive.
 //
-// Returns A vector of words in the corpus.Frequencies of words. Sorted in the non-ascending order.Number of words per epoch in the data file.The current epoch number.The total number of words processed so far.A vector of word ids.A vector of word ids.
-func Skipgram(scope *Scope, filename string, batch_size int64, optional ...SkipgramAttr) (vocab_word tf.Output, vocab_freq tf.Output, words_per_epoch tf.Output, current_epoch tf.Output, total_words_processed tf.Output, examples tf.Output, labels tf.Output) {
+//
+// Returns A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
+func CropAndResizeGradImage(scope *Scope, grads tf.Output, boxes tf.Output, box_ind tf.Output, image_size tf.Output, T tf.DataType, optional ...CropAndResizeGradImageAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"filename": filename, "batch_size": batch_size}
+	attrs := map[string]interface{}{"T": T}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Skipgram",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6)
-}
-
-// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum.
-type ResourceApplyMomentumAttr func(optionalAttr)
-
-// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value.
-//
-// value: If `True`, the tensor passed to compute grad will be
-// var - lr * momentum * accum, so in the end, the var you get is actually
-// var - lr * momentum * accum.
-// If not specified, defaults to false
-func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_nesterov"] = value
-	}
-}
-
-// Update '*var' according to the momentum scheme. Set use_nesterov = True if you
-//
-// want to use Nesterov momentum.
-//
-// accum = accum * momentum + grad
-// var -= lr * accum
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	grad: The gradient.
-//	momentum: Momentum. Must be a scalar.
-//
-// Returns the created operation.
-func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyMomentum",
+		Type: "CropAndResizeGradImage",
 		Input: []tf.Input{
-			var_, accum, lr, grad, momentum,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// EnqueueTPUEmbeddingSparseBatchAttr is an optional argument to EnqueueTPUEmbeddingSparseBatch.
-type EnqueueTPUEmbeddingSparseBatchAttr func(optionalAttr)
-
-// EnqueueTPUEmbeddingSparseBatchDeviceOrdinal sets the optional device_ordinal attribute to value.
-//
-// value: The TPU device to use. Should be >= 0 and less than the number
-// of TPU cores in the task on which the node is placed.
-// If not specified, defaults to -1
-func EnqueueTPUEmbeddingSparseBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingSparseBatchAttr {
-	return func(m optionalAttr) {
-		m["device_ordinal"] = value
-	}
-}
-
-// EnqueueTPUEmbeddingSparseBatchCombiners sets the optional combiners attribute to value.
-//
-// value: A list of string scalars, one for each embedding table that specify
-// how to normalize the embedding activations after weighted summation.
-// Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have
-// the sum of the weights be 0 for 'mean' or the sum of the squared weights be
-// 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
-// all tables.
-// If not specified, defaults to <>
-func EnqueueTPUEmbeddingSparseBatchCombiners(value []string) EnqueueTPUEmbeddingSparseBatchAttr {
-	return func(m optionalAttr) {
-		m["combiners"] = value
-	}
-}
-
-// An op that enqueues TPUEmbedding input indices from a SparseTensor.
-//
-// This Op eases the porting of code that uses embedding_lookup_sparse(),
-// although some Python preprocessing of the SparseTensor arguments to
-// embedding_lookup_sparse() is required to produce the arguments to this Op,
-// since only a single EnqueueTPUEmbeddingSparseBatch Op is allowed per training
-// step.
-//
-// The tensors at corresponding positions in the three input lists
-// must have the same shape, i.e. rank 1 with dim_size() equal to the total
-// number of lookups into the table described by the corresponding table_id.
-//
-// Arguments:
-//	sample_indices: A list of rank 1 Tensors specifying the training example and
-// feature to which the corresponding embedding_indices and aggregation_weights
-// values belong. sample_indices[i] must equal b * nf + f, where nf is the
-// number of features from the corresponding table, f is in [0, nf), and
-// b is in [0, batch size).
-//	embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
-//	aggregation_weights: A list of rank 1 Tensors containing per sample -- i.e. per
-// (training example, feature) -- aggregation weights.
-//	mode_override: A string input that overrides the mode specified in the
-// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
-// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
-// in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
-//
-// Returns the created operation.
-func EnqueueTPUEmbeddingSparseBatch(scope *Scope, sample_indices []tf.Output, embedding_indices []tf.Output, aggregation_weights []tf.Output, mode_override tf.Output, optional ...EnqueueTPUEmbeddingSparseBatchAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "EnqueueTPUEmbeddingSparseBatch",
-		Input: []tf.Input{
-			tf.OutputList(sample_indices), tf.OutputList(embedding_indices), tf.OutputList(aggregation_weights), mode_override,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// ResourceSparseApplyAdadeltaAttr is an optional argument to ResourceSparseApplyAdadelta.
-type ResourceSparseApplyAdadeltaAttr func(optionalAttr)
-
-// ResourceSparseApplyAdadeltaUseLocking sets the optional use_locking attribute to value.
-//
-// value: If True, updating of the var and accum tensors will be protected by
-// a lock; otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceSparseApplyAdadeltaUseLocking(value bool) ResourceSparseApplyAdadeltaAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// var: Should be from a Variable().
-//
-// Arguments:
-//
-//	accum: Should be from a Variable().
-//	accum_update: : Should be from a Variable().
-//	lr: Learning rate. Must be a scalar.
-//	rho: Decay factor. Must be a scalar.
-//	epsilon: Constant factor. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//
-// Returns the created operation.
-func ResourceSparseApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdadeltaAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyAdadelta",
-		Input: []tf.Input{
-			var_, accum, accum_update, lr, rho, epsilon, grad, indices,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingAdagradParametersGradAccumDebug.
-type RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func RetrieveTPUEmbeddingAdagradParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingAdagradParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Retrieve Adagrad embedding parameters with debug support.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns Parameter parameters updated by the Adagrad optimization algorithm.Parameter accumulators updated by the Adagrad optimization algorithm.Parameter gradient_accumulators updated by the Adagrad optimization algorithm.
-func RetrieveTPUEmbeddingAdagradParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingAdagradParametersGradAccumDebug",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Bitcasts a tensor from one type to another without copying data.
-//
-// Given a tensor `input`, this operation returns a tensor that has the same buffer
-// data as `input` with datatype `type`.
-//
-// If the input datatype `T` is larger than the output datatype `type` then the
-// shape changes from [...] to [..., sizeof(`T`)/sizeof(`type`)].
-//
-// If `T` is smaller than `type`, the operator requires that the rightmost
-// dimension be equal to sizeof(`type`)/sizeof(`T`). The shape then goes from
-// [..., sizeof(`type`)/sizeof(`T`)] to [...].
-//
-// tf.bitcast() and tf.cast() work differently when real dtype is casted as a complex dtype
-// (e.g. tf.complex64 or tf.complex128) as tf.cast() make imaginary part 0 while tf.bitcast()
-// gives module error.
-// For example,
-//
-// Example 1:
-// ```python
-// >>> a = [1., 2., 3.]
-// >>> equality_bitcast = tf.bitcast(a,tf.complex128)
-// tensorflow.python.framework.errors_impl.InvalidArgumentError: Cannot bitcast from float to complex128: shape [3] [Op:Bitcast]
-// >>> equality_cast = tf.cast(a,tf.complex128)
-// >>> print(equality_cast)
-// tf.Tensor([1.+0.j 2.+0.j 3.+0.j], shape=(3,), dtype=complex128)
-// ```
-// Example 2:
-// ```python
-// >>> tf.bitcast(tf.constant(0xffffffff, dtype=tf.uint32), tf.uint8)
-// <tf.Tensor: ... shape=(4,), dtype=uint8, numpy=array([255, 255, 255, 255], dtype=uint8)>
-// ```
-// Example 3:
-// ```python
-// >>> x = [1., 2., 3.]
-// >>> y = [0., 2., 3.]
-// >>> equality= tf.equal(x,y)
-// >>> equality_cast = tf.cast(equality,tf.float32)
-// >>> equality_bitcast = tf.bitcast(equality_cast,tf.uint8)
-// >>> print(equality)
-// tf.Tensor([False True True], shape=(3,), dtype=bool)
-// >>> print(equality_cast)
-// tf.Tensor([0. 1. 1.], shape=(3,), dtype=float32)
-// >>> print(equality_bitcast)
-// tf.Tensor(
-// [[ 0 0 0 0]
-//  [ 0 0 128 63]
-//  [ 0 0 128 63]], shape=(3, 4), dtype=uint8)
-// ```
-//
-// *NOTE*: Bitcast is implemented as a low-level cast, so machines with different
-// endian orderings will give different results.
-func Bitcast(scope *Scope, input tf.Output, type_ tf.DataType) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"type": type_}
-	opspec := tf.OpSpec{
-		Type: "Bitcast",
-		Input: []tf.Input{
-			input,
+			grads, boxes, box_ind, image_size,
 		},
 		Attrs: attrs,
 	}
@@ -22731,415 +20896,22 @@ func Bitcast(scope *Scope, input tf.Output, type_ tf.DataType) (output tf.Output
 	return op.Output(0)
 }
 
-// Creates a dataset that emits each dim-0 slice of `components` once.
-func TensorSliceDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "TensorSliceDataset",
-		Input: []tf.Input{
-			tf.OutputList(components),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Selects elements from `x` or `y`, depending on `condition`.
+// Computes hyperbolic tangent of `x` element-wise.
 //
-// The `x`, and `y` tensors must all have the same shape, and the
-// output will also have that shape.
-//
-// The `condition` tensor must be a scalar if `x` and `y` are scalars.
-// If `x` and `y` are vectors or higher rank, then `condition` must be either a
-// scalar, a vector with size matching the first dimension of `x`, or must have
-// the same shape as `x`.
-//
-// The `condition` tensor acts as a mask that chooses, based on the value at each
-// element, whether the corresponding element / row in the output should be
-// taken from `x` (if true) or `y` (if false).
-//
-// If `condition` is a vector and `x` and `y` are higher rank matrices, then
-// it chooses which row (outer dimension) to copy from `x` and `y`.
-// If `condition` has the same shape as `x` and `y`, then it chooses which
-// element to copy from `x` and `y`.
-//
-// For example:
-//
-// ```python
-// # 'condition' tensor is [[True,  False]
-// #                        [False, True]]
-// # 't' is [[1, 2],
-// #         [3, 4]]
-// # 'e' is [[5, 6],
-// #         [7, 8]]
-// select(condition, t, e)  # => [[1, 6], [7, 4]]
-//
-//
-// # 'condition' tensor is [True, False]
-// # 't' is [[1, 2],
-// #         [3, 4]]
-// # 'e' is [[5, 6],
-// #         [7, 8]]
-// select(condition, t, e) ==> [[1, 2],
-//                              [7, 8]]
-//
-// ```
-//
-// Arguments:
-//
-//	x: = A `Tensor` which may have the same shape as `condition`.
-// If `condition` is rank 1, `x` may have higher rank,
-// but its first dimension must match the size of `condition`.
-//	y: = A `Tensor` with the same type and shape as `x`.
-//
-// Returns = A `Tensor` with the same type and shape as `x` and `y`.
-func Select(scope *Scope, condition tf.Output, x tf.Output, y tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Select",
-		Input: []tf.Input{
-			condition, x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug.
-type LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr func(optionalAttr)
-
-// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Load proximal Adagrad embedding parameters with debug support.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the proximal Adagrad optimization algorithm.
-//	accumulators: Value of accumulators used in the proximal Adagrad optimization algorithm.
-//	gradient_accumulators: Value of gradient_accumulators used in the proximal Adagrad optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug",
-		Input: []tf.Input{
-			parameters, accumulators, gradient_accumulators,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Calculate product with tridiagonal matrix.
-//
-// Calculates product of two matrices, where left matrix is a tridiagonal matrix.
-//
-// Arguments:
-//	superdiag: Tensor of shape `[..., 1, M]`, representing superdiagonals of
-// tri-diagonal matrices to the left of multiplication. Last element is ingored.
-//	maindiag: Tensor of shape `[..., 1, M]`, representing main diagonals of tri-diagonal
-// matrices to the left of multiplication.
-//	subdiag: Tensor of shape `[..., 1, M]`, representing subdiagonals of tri-diagonal
-// matrices to the left of multiplication. First element is ingored.
-//	rhs: Tensor of shape `[..., M, N]`, representing MxN matrices to the right of
-// multiplication.
-//
-// Returns Tensor of shape `[..., M, N]` containing the product.
-func TridiagonalMatMul(scope *Scope, superdiag tf.Output, maindiag tf.Output, subdiag tf.Output, rhs tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TridiagonalMatMul",
-		Input: []tf.Input{
-			superdiag, maindiag, subdiag, rhs,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// LoadTPUEmbeddingRMSPropParametersAttr is an optional argument to LoadTPUEmbeddingRMSPropParameters.
-type LoadTPUEmbeddingRMSPropParametersAttr func(optionalAttr)
-
-// LoadTPUEmbeddingRMSPropParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func LoadTPUEmbeddingRMSPropParametersTableId(value int64) LoadTPUEmbeddingRMSPropParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingRMSPropParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingRMSPropParametersTableName(value string) LoadTPUEmbeddingRMSPropParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Load RMSProp embedding parameters.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the RMSProp optimization algorithm.
-//	ms: Value of ms used in the RMSProp optimization algorithm.
-//	mom: Value of mom used in the RMSProp optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingRMSPropParameters(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingRMSPropParametersAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingRMSPropParameters",
-		Input: []tf.Input{
-			parameters, ms, mom,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// ResourceApplyAdaMaxAttr is an optional argument to ResourceApplyAdaMax.
-type ResourceApplyAdaMaxAttr func(optionalAttr)
-
-// ResourceApplyAdaMaxUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var, m, and v tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyAdaMaxUseLocking(value bool) ResourceApplyAdaMaxAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the AdaMax algorithm.
-//
-// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
-// v_t <- max(beta2 * v_{t-1}, abs(g))
-// variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon)
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	m: Should be from a Variable().
-//	v: Should be from a Variable().
-//	beta1_power: Must be a scalar.
-//	lr: Scaling factor. Must be a scalar.
-//	beta1: Momentum factor. Must be a scalar.
-//	beta2: Momentum factor. Must be a scalar.
-//	epsilon: Ridge term. Must be a scalar.
-//	grad: The gradient.
-//
-// Returns the created operation.
-func ResourceApplyAdaMax(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdaMaxAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdaMax",
-		Input: []tf.Input{
-			var_, m, v, beta1_power, lr, beta1, beta2, epsilon, grad,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// CudnnRNNParamsToCanonicalV2Attr is an optional argument to CudnnRNNParamsToCanonicalV2.
-type CudnnRNNParamsToCanonicalV2Attr func(optionalAttr)
-
-// CudnnRNNParamsToCanonicalV2RnnMode sets the optional rnn_mode attribute to value.
-// If not specified, defaults to "lstm"
-func CudnnRNNParamsToCanonicalV2RnnMode(value string) CudnnRNNParamsToCanonicalV2Attr {
-	return func(m optionalAttr) {
-		m["rnn_mode"] = value
-	}
-}
-
-// CudnnRNNParamsToCanonicalV2InputMode sets the optional input_mode attribute to value.
-// If not specified, defaults to "linear_input"
-func CudnnRNNParamsToCanonicalV2InputMode(value string) CudnnRNNParamsToCanonicalV2Attr {
-	return func(m optionalAttr) {
-		m["input_mode"] = value
-	}
-}
-
-// CudnnRNNParamsToCanonicalV2Direction sets the optional direction attribute to value.
-// If not specified, defaults to "unidirectional"
-func CudnnRNNParamsToCanonicalV2Direction(value string) CudnnRNNParamsToCanonicalV2Attr {
-	return func(m optionalAttr) {
-		m["direction"] = value
-	}
-}
-
-// CudnnRNNParamsToCanonicalV2Dropout sets the optional dropout attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNParamsToCanonicalV2Dropout(value float32) CudnnRNNParamsToCanonicalV2Attr {
-	return func(m optionalAttr) {
-		m["dropout"] = value
-	}
-}
-
-// CudnnRNNParamsToCanonicalV2Seed sets the optional seed attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNParamsToCanonicalV2Seed(value int64) CudnnRNNParamsToCanonicalV2Attr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// CudnnRNNParamsToCanonicalV2Seed2 sets the optional seed2 attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNParamsToCanonicalV2Seed2(value int64) CudnnRNNParamsToCanonicalV2Attr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// CudnnRNNParamsToCanonicalV2NumProj sets the optional num_proj attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNParamsToCanonicalV2NumProj(value int64) CudnnRNNParamsToCanonicalV2Attr {
-	return func(m optionalAttr) {
-		m["num_proj"] = value
-	}
-}
-
-// Retrieves CudnnRNN params in canonical form. It supports the projection in LSTM.
-//
-// Retrieves a set of weights from the opaque params buffer that can be saved and
-// restored in a way compatible with future runs.
-//
-// Note that the params buffer may not be compatible across different GPUs. So any
-// save and restoration should be converted to and from the canonical weights and
-// biases.
-//
-// num_layers: Specifies the number of layers in the RNN model.
-// num_units: Specifies the size of the hidden state.
-// input_size: Specifies the size of the input state.
-// num_params_weigths: number of weight parameter matrix for all layers.
-// num_params_biases: number of bias parameter vector for all layers.
-// weights: the canonical form of weights that can be used for saving
-//     and restoration. They are more likely to be compatible across different
-//     generations.
-// biases: the canonical form of biases that can be used for saving
-//     and restoration. They are more likely to be compatible across different
-//     generations.
-// rnn_mode: Indicates the type of the RNN model.
-// input_mode: Indicate whether there is a linear projection between the input and
-//     The actual computation before the first layer. 'skip_input' is only allowed
-//     when input_size == num_units; 'auto_select' implies 'skip_input' when
-//     input_size == num_units; otherwise, it implies 'linear_input'.
-// direction: Indicates whether a bidirectional model will be used.
-//     dir = (direction == bidirectional) ? 2 : 1
-// dropout: dropout probability. When set to 0., dropout is disabled.
-// seed: the 1st part of a seed to initialize dropout.
-// seed2: the 2nd part of a seed to initialize dropout.
-// num_proj: The output dimensionality for the projection matrices. If None or 0,
-//     no projection is performed.
-func CudnnRNNParamsToCanonicalV2(scope *Scope, num_layers tf.Output, num_units tf.Output, input_size tf.Output, params tf.Output, num_params_weights int64, num_params_biases int64, optional ...CudnnRNNParamsToCanonicalV2Attr) (weights []tf.Output, biases []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_params_weights": num_params_weights, "num_params_biases": num_params_biases}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CudnnRNNParamsToCanonicalV2",
-		Input: []tf.Input{
-			num_layers, num_units, input_size, params,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if weights, idx, err = makeOutputList(op, idx, "weights"); err != nil {
-		scope.UpdateErr("CudnnRNNParamsToCanonicalV2", err)
-		return
-	}
-	if biases, idx, err = makeOutputList(op, idx, "biases"); err != nil {
-		scope.UpdateErr("CudnnRNNParamsToCanonicalV2", err)
-		return
-	}
-	return weights, biases
-}
-
-// Computes hyperbolic cosine of x element-wise.
-//
-//   Given an input tensor, this function computes hyperbolic cosine of every
-//   element in the tensor. Input range is `[-inf, inf]` and output range
-//   is `[1, inf]`.
+//   Given an input tensor, this function computes hyperbolic tangent of every
+//   element in the tensor. Input range is `[-inf, inf]` and
+//   output range is `[-1,1]`.
 //
 //   ```python
-//   x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 2, 10, float("inf")])
-//   tf.math.cosh(x) ==> [inf 4.0515420e+03 1.1276259e+00 1.5430807e+00 1.8106556e+00 3.7621956e+00 1.1013233e+04 inf]
+//   x = tf.constant([-float("inf"), -5, -0.5, 1, 1.2, 2, 3, float("inf")])
+//   tf.math.tanh(x) ==> [-1. -0.99990916 -0.46211717 0.7615942 0.8336547 0.9640276 0.9950547 1.]
 //   ```
-func Cosh(scope *Scope, x tf.Output) (y tf.Output) {
+func Tanh(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Cosh",
+		Type: "Tanh",
 		Input: []tf.Input{
 			x,
 		},
@@ -23148,125 +20920,24 @@ func Cosh(scope *Scope, x tf.Output) (y tf.Output) {
 	return op.Output(0)
 }
 
-// The gradient of SparseFillEmptyRows.
+// Computes hyperbolic sine of x element-wise.
 //
-// Takes vectors reverse_index_map, shaped `[N]`, and grad_values,
-// shaped `[N_full]`, where `N_full >= N` and copies data into either
-// `d_values` or `d_default_value`.  Here `d_values` is shaped `[N]` and
-// `d_default_value` is a scalar.
+//   Given an input tensor, this function computes hyperbolic sine of every
+//   element in the tensor. Input range is `[-inf,inf]` and output range
+//   is `[-inf,inf]`.
 //
-//   d_values[j] = grad_values[reverse_index_map[j]]
-//   d_default_value = sum_{k : 0 .. N_full - 1} (
-//      grad_values[k] * 1{k not in reverse_index_map})
-//
-// Arguments:
-//	reverse_index_map: 1-D.  The reverse index map from SparseFillEmptyRows.
-//	grad_values: 1-D.  The gradients from backprop.
-//
-// Returns 1-D.  The backprop into values.0-D.  The backprop into default_value.
-func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_values tf.Output) (d_values tf.Output, d_default_value tf.Output) {
+//   ```python
+//   x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 2, 10, float("inf")])
+//   tf.math.sinh(x) ==> [-inf -4.0515420e+03 -5.2109528e-01 1.1752012e+00 1.5094614e+00 3.6268604e+00 1.1013232e+04 inf]
+//   ```
+func Sinh(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseFillEmptyRowsGrad",
+		Type: "Sinh",
 		Input: []tf.Input{
-			reverse_index_map, grad_values,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// MapUnstageNoKeyAttr is an optional argument to MapUnstageNoKey.
-type MapUnstageNoKeyAttr func(optionalAttr)
-
-// MapUnstageNoKeyCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func MapUnstageNoKeyCapacity(value int64) MapUnstageNoKeyAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// MapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func MapUnstageNoKeyMemoryLimit(value int64) MapUnstageNoKeyAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// MapUnstageNoKeyContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func MapUnstageNoKeyContainer(value string) MapUnstageNoKeyAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// MapUnstageNoKeySharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func MapUnstageNoKeySharedName(value string) MapUnstageNoKeyAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op removes and returns a random (key, value)
-//
-// from the underlying container.   If the underlying container
-// does not contain elements, the op will block until it does.
-func MapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MapUnstageNoKey",
-		Input: []tf.Input{
-			indices,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	key = op.Output(idx)
-	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
-		scope.UpdateErr("MapUnstageNoKey", err)
-		return
-	}
-	return key, values
-}
-
-// Splits a tensor into a list.
-//
-// list[i] corresponds to lengths[i] tensors from the input tensor.
-// The tensor must have rank at least 1 and contain exactly sum(lengths) elements.
-//
-// tensor: The input tensor.
-// element_shape: A shape compatible with that of elements in the tensor.
-// lengths: Vector of sizes of the 0th dimension of tensors in the list.
-// output_handle: The list.
-func TensorListSplit(scope *Scope, tensor tf.Output, element_shape tf.Output, lengths tf.Output) (output_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorListSplit",
-		Input: []tf.Input{
-			tensor, element_shape, lengths,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
@@ -23320,43 +20991,73 @@ func ResourceApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output,
 	return scope.AddOperation(opspec)
 }
 
-// Computes sigmoid of `x` element-wise.
+// Divides sparse updates into the variable referenced by `resource`.
 //
-// Specifically, `y = 1 / (1 + exp(-x))`.
-func Sigmoid(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Sigmoid",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Adds `bias` to `value`.
+// This operation computes
 //
-// This is a deprecated version of BiasAdd and will be soon removed.
+//     # Scalar indices
+//     ref[indices, ...] /= updates[...]
 //
-// This is a special case of `tf.add` where `bias` is restricted to be 1-D.
-// Broadcasting is supported, so `value` may have any number of dimensions.
+//     # Vector indices (for each i)
+//     ref[indices[i], ...] /= updates[i, ...]
+//
+//     # High rank indices (for each i, ..., j)
+//     ref[indices[i, ..., j], ...] /= updates[i, ..., j, ...]
+//
+// Duplicate entries are handled correctly: if multiple `indices` reference
+// the same location, their contributions multiply.
+//
+// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
+// </div>
 //
 // Arguments:
-//	value: Any number of dimensions.
-//	bias: 1-D with size the last dimension of `value`.
+//	resource: Should be from a `Variable` node.
+//	indices: A tensor of indices into the first dimension of `ref`.
+//	updates: A tensor of updated values to add to `ref`.
 //
-// Returns Broadcasted sum of `value` and `bias`.
-func BiasAddV1(scope *Scope, value tf.Output, bias tf.Output) (output tf.Output) {
+// Returns the created operation.
+func ResourceScatterDiv(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "BiasAddV1",
+		Type: "ResourceScatterDiv",
 		Input: []tf.Input{
-			value, bias,
+			resource, indices, updates,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes the trignometric inverse sine of x element-wise.
+//
+// The `tf.math.asin` operation returns the inverse of `tf.math.sin`, such that
+// if `y = tf.math.sin(x)` then, `x = tf.math.asin(y)`.
+//
+// **Note**: The output of `tf.math.asin` will lie within the invertible range
+// of sine, i.e [-pi/2, pi/2].
+//
+// For example:
+//
+// ```python
+// # Note: [1.047, 0.785] ~= [(pi/3), (pi/4)]
+// x = tf.constant([1.047, 0.785])
+// y = tf.math.sin(x) # [0.8659266, 0.7068252]
+//
+// tf.math.asin(y) # [1.047, 0.785] = x
+// ```
+//
+func Asin(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Asin",
+		Input: []tf.Input{
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
@@ -23366,6 +21067,13 @@ func BiasAddV1(scope *Scope, value tf.Output, bias tf.Output) (output tf.Output)
 // Computes natural logarithm of (1 + x) element-wise.
 //
 // I.e., \\(y = \log_e (1 + x)\\).
+//
+// Example:
+//
+// ```python
+// x = tf.constant([0, 0.5, 1, 5])
+// tf.math.log1p(x) ==> [0., 0.4054651, 0.6931472, 1.7917595]
+// ```
 func Log1p(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
@@ -23380,31 +21088,560 @@ func Log1p(scope *Scope, x tf.Output) (y tf.Output) {
 	return op.Output(0)
 }
 
-// Computes the gradient of `igamma(a, x)` wrt `a`.
-func IgammaGradA(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) {
+// Converts the quantized `input` tensor into a lower-precision `output`.
+//
+// Converts the quantized `input` tensor into a lower-precision `output`, using the
+// output range specified with `requested_output_min` and `requested_output_max`.
+//
+// `[input_min, input_max]` are scalar floats that specify the range for the float
+// interpretation of the `input` data. For example, if `input_min` is -1.0f and
+// `input_max` is 1.0f, and we are dealing with `quint16` quantized data, then a 0
+// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
+//
+// Arguments:
+//
+//	input_min: The float value that the minimum quantized input value represents.
+//	input_max: The float value that the maximum quantized input value represents.
+//	requested_output_min: The float value that the minimum quantized output value represents.
+//	requested_output_max: The float value that the maximum quantized output value represents.
+//	out_type: The type of the output. Should be a lower bit depth than Tinput.
+//
+// Returns:
+//	output
+//	output_min: The requested_output_min value is copied into this output.
+//	output_max: The requested_output_max value is copied into this output.
+func Requantize(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"out_type": out_type}
+	opspec := tf.OpSpec{
+		Type: "Requantize",
+		Input: []tf.Input{
+			input, input_min, input_max, requested_output_min, requested_output_max,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Conv2DBackpropInputAttr is an optional argument to Conv2DBackpropInput.
+type Conv2DBackpropInputAttr func(optionalAttr)
+
+// Conv2DBackpropInputUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value.
+// If not specified, defaults to true
+func Conv2DBackpropInputUseCudnnOnGpu(value bool) Conv2DBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["use_cudnn_on_gpu"] = value
+	}
+}
+
+// Conv2DBackpropInputExplicitPaddings sets the optional explicit_paddings attribute to value.
+//
+// value: If `padding` is `"EXPLICIT"`, the list of explicit padding amounts. For the ith
+// dimension, the amount of padding inserted before and after the dimension is
+// `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If
+// `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty.
+// If not specified, defaults to {}
+func Conv2DBackpropInputExplicitPaddings(value []int64) Conv2DBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["explicit_paddings"] = value
+	}
+}
+
+// Conv2DBackpropInputDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func Conv2DBackpropInputDataFormat(value string) Conv2DBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Conv2DBackpropInputDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to {i:1 i:1 i:1 i:1}
+func Conv2DBackpropInputDilations(value []int64) Conv2DBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of convolution with respect to the input.
+//
+// Arguments:
+//	input_sizes: An integer vector representing the shape of `input`,
+// where `input` is a 4-D `[batch, height, width, channels]` tensor.
+//	filter: 4-D with shape
+// `[filter_height, filter_width, in_channels, out_channels]`.
+//	out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`.
+// Gradients w.r.t. the output of the convolution.
+//	strides: The stride of the sliding window for each dimension of the input
+// of the convolution. Must be in the same order as the dimension specified with
+// format.
+//	padding: The type of padding algorithm to use.
+//
+// Returns 4-D with shape `[batch, in_height, in_width, in_channels]`.  Gradient
+// w.r.t. the input of the convolution.
+func Conv2DBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropInputAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Conv2DBackpropInput",
+		Input: []tf.Input{
+			input_sizes, filter, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes `exp(x) - 1` element-wise.
+//
+//   i.e. `exp(x) - 1` or `e^(x) - 1`, where `x` is the input tensor.
+//   `e` denotes Euler's number and is approximately equal to 2.718281.
+//
+//   ```python
+//   x = tf.constant(2.0)
+//   tf.math.expm1(x) ==> 6.389056
+//
+//   x = tf.constant([2.0, 8.0])
+//   tf.math.expm1(x) ==> array([6.389056, 2979.958], dtype=float32)
+//
+//   x = tf.constant(1 + 1j)
+//   tf.math.expm1(x) ==> (0.46869393991588515+2.2873552871788423j)
+//   ```
+func Expm1(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "IgammaGradA",
+		Type: "Expm1",
 		Input: []tf.Input{
-			a, x,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns the truth value of (x < y) element-wise.
+// Computes exponential of x element-wise.  \\(y = e^x\\).
 //
-// *NOTE*: `Less` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Less(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+//   This function computes the exponential of every element in the input tensor.
+//   i.e. `exp(x)` or `e^(x)`, where `x` is the input tensor.
+//   `e` denotes Euler's number and is approximately equal to 2.718281.
+//   Output is positive for any real input.
+//
+//   ```python
+//   x = tf.constant(2.0)
+//   tf.math.exp(x) ==> 7.389056
+//
+//   x = tf.constant([2.0, 8.0])
+//   tf.math.exp(x) ==> array([7.389056, 2980.958], dtype=float32)
+//   ```
+//
+//   For complex numbers, the exponential value is calculated as follows:
+//
+//   ```
+//   e^(x+iy) = e^x * e^iy = e^x * (cos y + i sin y)
+//   ```
+//
+//   Let's consider complex number 1+1j as an example.
+//   e^1 * (cos 1 + i sin 1) = 2.7182818284590 * (0.54030230586+0.8414709848j)
+//
+//   ```python
+//   x = tf.constant(1 + 1j)
+//   tf.math.exp(x) ==> 1.4686939399158851+2.2873552871788423j
+//   ```
+func Exp(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Less",
+		Type: "Exp",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes square of x element-wise.
+//
+// I.e., \\(y = x * x = x^2\\).
+func Square(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Square",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the gradient for the inverse of `x` wrt its input.
+//
+// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy`
+// is the corresponding input gradient.
+func ReciprocalGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReciprocalGrad",
+		Input: []tf.Input{
+			y, dy,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the reciprocal of x element-wise.
+//
+// I.e., \\(y = 1 / x\\).
+func Inv(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Inv",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ComplexAbsAttr is an optional argument to ComplexAbs.
+type ComplexAbsAttr func(optionalAttr)
+
+// ComplexAbsTout sets the optional Tout attribute to value.
+// If not specified, defaults to DT_FLOAT
+func ComplexAbsTout(value tf.DataType) ComplexAbsAttr {
+	return func(m optionalAttr) {
+		m["Tout"] = value
+	}
+}
+
+// Computes the complex absolute value of a tensor.
+//
+// Given a tensor `x` of complex numbers, this operation returns a tensor of type
+// `float` or `double` that is the absolute value of each element in `x`. All
+// elements in `x` must be complex numbers of the form \\(a + bj\\). The absolute
+// value is computed as \\( \sqrt{a^2 + b^2}\\).
+func ComplexAbs(scope *Scope, x tf.Output, optional ...ComplexAbsAttr) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ComplexAbs",
+		Input: []tf.Input{
+			x,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the absolute value of a tensor.
+//
+// Given a tensor `x`, this operation returns a tensor containing the absolute
+// value of each element in `x`. For example, if x is an input element and y is
+// an output element, this operation computes \\(y = |x|\\).
+func Abs(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Abs",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Produces a summary of any statistics recorded by the given statistics manager.
+func ExperimentalStatsAggregatorSummary(scope *Scope, iterator tf.Output) (summary tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalStatsAggregatorSummary",
+		Input: []tf.Input{
+			iterator,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MeanAttr is an optional argument to Mean.
+type MeanAttr func(optionalAttr)
+
+// MeanKeepDims sets the optional keep_dims attribute to value.
+//
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func MeanKeepDims(value bool) MeanAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// Computes the mean of elements across dimensions of a tensor.
+//
+// Reduces `input` along the dimensions given in `axis`. Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `axis`. If `keep_dims` is true, the reduced dimensions are
+// retained with length 1.
+//
+// Arguments:
+//	input: The tensor to reduce.
+//	axis: The dimensions to reduce. Must be in the range
+// `[-rank(input), rank(input))`.
+//
+// Returns The reduced tensor.
+func Mean(scope *Scope, input tf.Output, axis tf.Output, optional ...MeanAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Mean",
+		Input: []tf.Input{
+			input, axis,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RandomStandardNormalAttr is an optional argument to RandomStandardNormal.
+type RandomStandardNormalAttr func(optionalAttr)
+
+// RandomStandardNormalSeed sets the optional seed attribute to value.
+//
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func RandomStandardNormalSeed(value int64) RandomStandardNormalAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// RandomStandardNormalSeed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomStandardNormalSeed2(value int64) RandomStandardNormalAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Outputs random values from a normal distribution.
+//
+// The generated values will have mean 0 and standard deviation 1.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	dtype: The type of the output.
+//
+// Returns A tensor of the specified shape filled with random normal values.
+func RandomStandardNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomStandardNormalAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RandomStandardNormal",
+		Input: []tf.Input{
+			shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the Gauss error function of `x` element-wise.
+func Erf(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Erf",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the maximum along segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+// for an explanation of segments.
+//
+// Computes a tensor such that
+// \\(output_i = \max_j(data_j)\\) where `max` is over `j` such
+// that `segment_ids[j] == i`.
+//
+// If the max is empty for a given segment ID `i`, `output[i] = 0`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMax.png" alt>
+// </div>
+//
+// For example:
+//
+// ```
+// c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]])
+// tf.segment_max(c, tf.constant([0, 0, 1]))
+// # ==> [[4, 3, 3, 4],
+// #      [5, 6, 7, 8]]
+// ```
+//
+//
+// Arguments:
+//
+//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
+// first dimension.  Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SegmentMax",
+		Input: []tf.Input{
+			data, segment_ids,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// CastAttr is an optional argument to Cast.
+type CastAttr func(optionalAttr)
+
+// CastTruncate sets the optional Truncate attribute to value.
+// If not specified, defaults to false
+func CastTruncate(value bool) CastAttr {
+	return func(m optionalAttr) {
+		m["Truncate"] = value
+	}
+}
+
+// Cast x of type SrcT to y of DstT.
+func Cast(scope *Scope, x tf.Output, DstT tf.DataType, optional ...CastAttr) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"DstT": DstT}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Cast",
+		Input: []tf.Input{
+			x,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Generate a sharded filename. The filename is printf formatted as
+//
+//    %s-%05d-of-%05d, basename, shard, num_shards.
+func ShardedFilename(scope *Scope, basename tf.Output, shard tf.Output, num_shards tf.Output) (filename tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ShardedFilename",
+		Input: []tf.Input{
+			basename, shard, num_shards,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Elementwise computes the bitwise OR of `x` and `y`.
+//
+// The result will have those bits set, that are set in `x`, `y` or both. The
+// computation is performed on the underlying representations of `x` and `y`.
+//
+// For example:
+//
+// ```python
+// import tensorflow as tf
+// from tensorflow.python.ops import bitwise_ops
+// dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64,
+//               tf.uint8, tf.uint16, tf.uint32, tf.uint64]
+//
+// for dtype in dtype_list:
+//   lhs = tf.constant([0, 5, 3, 14], dtype=dtype)
+//   rhs = tf.constant([5, 0, 7, 11], dtype=dtype)
+//   exp = tf.constant([5, 5, 7, 15], dtype=tf.float32)
+//
+//   res = bitwise_ops.bitwise_or(lhs, rhs)
+//   tf.assert_equal(tf.cast(res,  tf.float32), exp)  # TRUE
+// ```
+//
+func BitwiseOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BitwiseOr",
 		Input: []tf.Input{
 			x, y,
 		},
@@ -23413,71 +21650,716 @@ func Less(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	return op.Output(0)
 }
 
-// Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)`
+// SendAttr is an optional argument to Send.
+type SendAttr func(optionalAttr)
+
+// SendClientTerminated sets the optional client_terminated attribute to value.
 //
-// if < 0, `scale * features` otherwise.
+// value: If set to true, this indicates that the node was added
+// to the graph as a result of a client-side feed or fetch of Tensor data,
+// in which case the corresponding send or recv is expected to be managed
+// locally by the caller.
+// If not specified, defaults to false
+func SendClientTerminated(value bool) SendAttr {
+	return func(m optionalAttr) {
+		m["client_terminated"] = value
+	}
+}
+
+// Sends the named tensor from send_device to recv_device.
 //
-// To be used together with
-// `initializer = tf.variance_scaling_initializer(factor=1.0, mode='FAN_IN')`.
-// For correct dropout, use `tf.contrib.nn.alpha_dropout`.
+// Arguments:
+//	tensor: The tensor to send.
+//	tensor_name: The name of the tensor to send.
+//	send_device: The name of the device sending the tensor.
+//	send_device_incarnation: The current incarnation of send_device.
+//	recv_device: The name of the device receiving the tensor.
 //
-// See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)
-func Selu(scope *Scope, features tf.Output) (activations tf.Output) {
+// Returns the created operation.
+func Send(scope *Scope, tensor tf.Output, tensor_name string, send_device string, send_device_incarnation int64, recv_device string, optional ...SendAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"tensor_name": tensor_name, "send_device": send_device, "send_device_incarnation": send_device_incarnation, "recv_device": recv_device}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Send",
+		Input: []tf.Input{
+			tensor,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// BatchMatMulV2Attr is an optional argument to BatchMatMulV2.
+type BatchMatMulV2Attr func(optionalAttr)
+
+// BatchMatMulV2AdjX sets the optional adj_x attribute to value.
+//
+// value: If `True`, adjoint the slices of `x`. Defaults to `False`.
+// If not specified, defaults to false
+func BatchMatMulV2AdjX(value bool) BatchMatMulV2Attr {
+	return func(m optionalAttr) {
+		m["adj_x"] = value
+	}
+}
+
+// BatchMatMulV2AdjY sets the optional adj_y attribute to value.
+//
+// value: If `True`, adjoint the slices of `y`. Defaults to `False`.
+// If not specified, defaults to false
+func BatchMatMulV2AdjY(value bool) BatchMatMulV2Attr {
+	return func(m optionalAttr) {
+		m["adj_y"] = value
+	}
+}
+
+// Multiplies slices of two tensors in batches.
+//
+// Multiplies all slices of `Tensor` `x` and `y` (each slice can be
+// viewed as an element of a batch), and arranges the individual results
+// in a single output tensor of the same batch size. Each of the
+// individual slices can optionally be adjointed (to adjoint a matrix
+// means to transpose and conjugate it) before multiplication by setting
+// the `adj_x` or `adj_y` flag to `True`, which are by default `False`.
+//
+// The input tensors `x` and `y` are 2-D or higher with shape `[..., r_x, c_x]`
+// and `[..., r_y, c_y]`.
+//
+// The output tensor is 2-D or higher with shape `[..., r_o, c_o]`, where:
+//
+//     r_o = c_x if adj_x else r_x
+//     c_o = r_y if adj_y else c_y
+//
+// It is computed as:
+//
+//     output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :])
+//
+// *NOTE*: `BatchMatMulV2` supports broadcasting in the batch dimensions. More
+// about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html).
+//
+//
+// Arguments:
+//	x: 2-D or higher with shape `[..., r_x, c_x]`.
+//	y: 2-D or higher with shape `[..., r_y, c_y]`.
+//
+// Returns 3-D or higher with shape `[..., r_o, c_o]`
+func BatchMatMulV2(scope *Scope, x tf.Output, y tf.Output, optional ...BatchMatMulV2Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "BatchMatMulV2",
+		Input: []tf.Input{
+			x, y,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns 0 if x == 0, and x / y otherwise, elementwise.
+func Xdivy(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Selu",
+		Type: "Xdivy",
 		Input: []tf.Input{
-			features,
+			x, y,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Inverse 3D real-valued fast Fourier transform.
+// Compute the pairwise cross product.
 //
-// Computes the inverse 3-dimensional discrete Fourier transform of a real-valued
-// signal over the inner-most 3 dimensions of `input`.
-//
-// The inner-most 3 dimensions of `input` are assumed to be the result of `RFFT3D`:
-// The inner-most dimension contains the `fft_length / 2 + 1` unique components of
-// the DFT of a real-valued signal. If `fft_length` is not provided, it is computed
-// from the size of the inner-most 3 dimensions of `input`. If the FFT length used
-// to compute `input` is odd, it should be provided since it cannot be inferred
-// properly.
-//
-// Along each axis `IRFFT3D` is computed on, if `fft_length` (or
-// `fft_length / 2 + 1` for the inner-most dimension) is smaller than the
-// corresponding dimension of `input`, the dimension is cropped. If it is larger,
-// the dimension is padded with zeros.
+// `a` and `b` must be the same shape; they can either be simple 3-element vectors,
+// or any shape where the innermost dimension is 3. In the latter case, each pair
+// of corresponding 3-element vectors is cross-multiplied independently.
 //
 // Arguments:
-//	input: A complex64 tensor.
-//	fft_length: An int32 tensor of shape [3]. The FFT length for each dimension.
+//	a: A tensor containing 3-element vectors.
+//	b: Another tensor, of same type and shape as `a`.
 //
-// Returns A float32 tensor of the same rank as `input`. The inner-most 3
-//   dimensions of `input` are replaced with the `fft_length` samples of their
-//   inverse 3D real Fourier transform.
-//
-// @compatibility(numpy)
-// Equivalent to np.irfftn with 3 dimensions.
-// @end_compatibility
-func IRFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
+// Returns Pairwise cross product of the vectors in `a` and `b`.
+func Cross(scope *Scope, a tf.Output, b tf.Output) (product tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "IRFFT3D",
+		Type: "Cross",
 		Input: []tf.Input{
-			input, fft_length,
+			a, b,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
+// Sends `input` to all devices that are connected to the output.
+//
+// Sends `input` to all devices that are connected to the output.
+//
+// The graph should be constructed so that all ops connected to the output have a
+// valid device assignment, and the op itself is assigned one of these devices.
+//
+// input: The input to the broadcast.
+// output: The same as input.
+// shape: The shape of the input tensor.
+//
+func NcclBroadcast(scope *Scope, input tf.Output, shape tf.Shape) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"shape": shape}
+	opspec := tf.OpSpec{
+		Type: "NcclBroadcast",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Conv2DAttr is an optional argument to Conv2D.
+type Conv2DAttr func(optionalAttr)
+
+// Conv2DUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value.
+// If not specified, defaults to true
+func Conv2DUseCudnnOnGpu(value bool) Conv2DAttr {
+	return func(m optionalAttr) {
+		m["use_cudnn_on_gpu"] = value
+	}
+}
+
+// Conv2DExplicitPaddings sets the optional explicit_paddings attribute to value.
+//
+// value: If `padding` is `"EXPLICIT"`, the list of explicit padding amounts. For the ith
+// dimension, the amount of padding inserted before and after the dimension is
+// `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If
+// `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty.
+// If not specified, defaults to {}
+func Conv2DExplicitPaddings(value []int64) Conv2DAttr {
+	return func(m optionalAttr) {
+		m["explicit_paddings"] = value
+	}
+}
+
+// Conv2DDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, height, width, channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, channels, height, width].
+// If not specified, defaults to "NHWC"
+func Conv2DDataFormat(value string) Conv2DAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Conv2DDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each
+// filter element on that dimension. The dimension order is determined by the
+// value of `data_format`, see above for details. Dilations in the batch and
+// depth dimensions must be 1.
+// If not specified, defaults to {i:1 i:1 i:1 i:1}
+func Conv2DDilations(value []int64) Conv2DAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes a 2-D convolution given 4-D `input` and `filter` tensors.
+//
+// Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
+// and a filter / kernel tensor of shape
+// `[filter_height, filter_width, in_channels, out_channels]`, this op
+// performs the following:
+//
+// 1. Flattens the filter to a 2-D matrix with shape
+//    `[filter_height * filter_width * in_channels, output_channels]`.
+// 2. Extracts image patches from the input tensor to form a *virtual*
+//    tensor of shape `[batch, out_height, out_width,
+//    filter_height * filter_width * in_channels]`.
+// 3. For each patch, right-multiplies the filter matrix and the image patch
+//    vector.
+//
+// In detail, with the default NHWC format,
+//
+//     output[b, i, j, k] =
+//         sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] *
+//                         filter[di, dj, q, k]
+//
+// Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
+// horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
+//
+// Arguments:
+//	input: A 4-D tensor. The dimension order is interpreted according to the value
+// of `data_format`, see below for details.
+//	filter: A 4-D tensor of shape
+// `[filter_height, filter_width, in_channels, out_channels]`
+//	strides: 1-D tensor of length 4.  The stride of the sliding window for each
+// dimension of `input`. The dimension order is determined by the value of
+// `data_format`, see below for details.
+//	padding: The type of padding algorithm to use.
+//
+// Returns A 4-D tensor. The dimension order is determined by the value of
+// `data_format`, see below for details.
+func Conv2D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...Conv2DAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Conv2D",
+		Input: []tf.Input{
+			input, filter,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns locations of nonzero / true values in a tensor.
+//
+// This operation returns the coordinates of true elements in `condition`. The
+// coordinates are returned in a 2-D tensor where the first dimension (rows)
+// represents the number of true elements, and the second dimension (columns)
+// represents the coordinates of the true elements. Keep in mind, the shape of
+// the output tensor can vary depending on how many true values there are in
+// `condition`. Indices are output in row-major order.
+//
+// For example:
+//
+// ```
+// # 'input' tensor is [[True, False]
+// #                    [True, False]]
+// # 'input' has two true values, so output has two coordinates.
+// # 'input' has rank of 2, so coordinates have two indices.
+// where(input) ==> [[0, 0],
+//                   [1, 0]]
+//
+// # `condition` tensor is [[[True, False]
+// #                     [True, False]]
+// #                    [[False, True]
+// #                     [False, True]]
+// #                    [[False, False]
+// #                     [False, True]]]
+// # 'input' has 5 true values, so output has 5 coordinates.
+// # 'input' has rank of 3, so coordinates have three indices.
+// where(input) ==> [[0, 0, 0],
+//                   [0, 1, 0],
+//                   [1, 0, 1],
+//                   [1, 1, 1],
+//                   [2, 1, 1]]
+//
+// # `condition` tensor is [[[1.5,  0.0]
+// #                     [-0.5, 0.0]]
+// #                    [[0.0,  0.25]
+// #                     [0.0,  0.75]]
+// #                    [[0.0,  0.0]
+// #                     [0.0,  0.01]]]
+// # 'input' has 5 nonzero values, so output has 5 coordinates.
+// # 'input' has rank of 3, so coordinates have three indices.
+// where(input) ==> [[0, 0, 0],
+//                   [0, 1, 0],
+//                   [1, 0, 1],
+//                   [1, 1, 1],
+//                   [2, 1, 1]]
+//
+// # `condition` tensor is [[[1.5 + 0.0j, 0.0  + 0.0j]
+// #                     [0.0 + 0.5j, 0.0  + 0.0j]]
+// #                    [[0.0 + 0.0j, 0.25 + 1.5j]
+// #                     [0.0 + 0.0j, 0.75 + 0.0j]]
+// #                    [[0.0 + 0.0j, 0.0  + 0.0j]
+// #                     [0.0 + 0.0j, 0.01 + 0.0j]]]
+// # 'input' has 5 nonzero magnitude values, so output has 5 coordinates.
+// # 'input' has rank of 3, so coordinates have three indices.
+// where(input) ==> [[0, 0, 0],
+//                   [0, 1, 0],
+//                   [1, 0, 1],
+//                   [1, 1, 1],
+//                   [2, 1, 1]]
+// ```
+func Where(scope *Scope, condition tf.Output) (index tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Where",
+		Input: []tf.Input{
+			condition,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Deprecated, use python implementation tf.linalg.matrix_exponential.
+//
+// DEPRECATED at GraphDef version 27: Use Python implementation tf.linalg.matrix_exponential instead.
+func MatrixExponential(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixExponential",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Reduces `input` from `num_devices` using `reduction` to a single device.
+//
+// Reduces `input` from `num_devices` using `reduction` to a single device.
+//
+// The graph should be constructed so that all inputs have a valid device
+// assignment, and the op itself is assigned one of these devices.
+//
+// input: The input to the reduction.
+// data: the value of the reduction across all `num_devices` devices.
+// reduction: the reduction operation to perform.
+func NcclReduce(scope *Scope, input []tf.Output, reduction string) (data tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"reduction": reduction}
+	opspec := tf.OpSpec{
+		Type: "NcclReduce",
+		Input: []tf.Input{
+			tf.OutputList(input),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr is an optional argument to QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize.
+type QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr func(optionalAttr)
+
+// QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeOutType sets the optional out_type attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_QUINT8
+func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeOutType(value tf.DataType) QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeDilations sets the optional dilations attribute to value.
+//
+// value: List of dilation values.
+// If not specified, defaults to {i:1 i:1 i:1 i:1}
+func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeDilations(value []int64) QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizePaddingList sets the optional padding_list attribute to value.
+// If not specified, defaults to {}
+func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizePaddingList(value []int64) QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr {
+	return func(m optionalAttr) {
+		m["padding_list"] = value
+	}
+}
+
+// Computes quantized depthwise Conv2D with Bias, Relu and Requantize.
+//
+// Arguments:
+//	input: The original input tensor.
+//	filter: The original filter tensor.
+//	bias: The original bias tensor.
+//	min_input: The float value that the minimum quantized input value represents.
+//	max_input: The float value that the maximum quantized input value represents.
+//	min_filter: The float value that the minimum quantized filter value represents.
+//	max_filter: The float value that the maximum quantized filter value represents.
+//	min_freezed_output: The minimum float value of the output tensor.
+//	max_freezed_output: The maximum float value of the output tensor.
+//	strides: List of stride values.
+//
+//
+// Returns:
+//	output: The output tensor.
+//	min_output: The float value that the minimum quantized output value represents.
+//	max_output: The float value that the maximum quantized output value represents.
+func QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize(scope *Scope, input tf.Output, filter tf.Output, bias tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, min_freezed_output tf.Output, max_freezed_output tf.Output, strides []int64, padding string, optional ...QuantizedDepthwiseConv2DWithBiasAndReluAndRequantizeAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizedDepthwiseConv2DWithBiasAndReluAndRequantize",
+		Input: []tf.Input{
+			input, filter, bias, min_input, max_input, min_filter, max_filter, min_freezed_output, max_freezed_output,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// QuantizedDepthwiseConv2DWithBiasAndReluAttr is an optional argument to QuantizedDepthwiseConv2DWithBiasAndRelu.
+type QuantizedDepthwiseConv2DWithBiasAndReluAttr func(optionalAttr)
+
+// QuantizedDepthwiseConv2DWithBiasAndReluOutType sets the optional out_type attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_QINT32
+func QuantizedDepthwiseConv2DWithBiasAndReluOutType(value tf.DataType) QuantizedDepthwiseConv2DWithBiasAndReluAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// QuantizedDepthwiseConv2DWithBiasAndReluDilations sets the optional dilations attribute to value.
+//
+// value: List of dilation values.
+// If not specified, defaults to {i:1 i:1 i:1 i:1}
+func QuantizedDepthwiseConv2DWithBiasAndReluDilations(value []int64) QuantizedDepthwiseConv2DWithBiasAndReluAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// QuantizedDepthwiseConv2DWithBiasAndReluPaddingList sets the optional padding_list attribute to value.
+// If not specified, defaults to {}
+func QuantizedDepthwiseConv2DWithBiasAndReluPaddingList(value []int64) QuantizedDepthwiseConv2DWithBiasAndReluAttr {
+	return func(m optionalAttr) {
+		m["padding_list"] = value
+	}
+}
+
+// Computes quantized depthwise Conv2D with Bias and Relu.
+//
+// Arguments:
+//	input: The original input tensor.
+//	filter: The original filter tensor.
+//	bias: The original bias tensor.
+//	min_input: The float value that the minimum quantized input value represents.
+//	max_input: The float value that the maximum quantized input value represents.
+//	min_filter: The float value that the minimum quantized filter value represents.
+//	max_filter: The float value that the maximum quantized filter value represents.
+//	strides: List of stride values.
+//
+//
+// Returns:
+//	output: The output tensor.
+//	min_output: The float value that the minimum quantized output value represents.
+//	max_output: The float value that the maximum quantized output value represents.
+func QuantizedDepthwiseConv2DWithBiasAndRelu(scope *Scope, input tf.Output, filter tf.Output, bias tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedDepthwiseConv2DWithBiasAndReluAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizedDepthwiseConv2DWithBiasAndRelu",
+		Input: []tf.Input{
+			input, filter, bias, min_input, max_input, min_filter, max_filter,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// MergeV2CheckpointsAttr is an optional argument to MergeV2Checkpoints.
+type MergeV2CheckpointsAttr func(optionalAttr)
+
+// MergeV2CheckpointsDeleteOldDirs sets the optional delete_old_dirs attribute to value.
+//
+// value: see above.
+// If not specified, defaults to true
+func MergeV2CheckpointsDeleteOldDirs(value bool) MergeV2CheckpointsAttr {
+	return func(m optionalAttr) {
+		m["delete_old_dirs"] = value
+	}
+}
+
+// V2 format specific: merges the metadata files of sharded checkpoints.  The
+//
+// result is one logical checkpoint, with one physical metadata file and renamed
+// data files.
+//
+// Intended for "grouping" multiple checkpoints in a sharded checkpoint setup.
+//
+// If delete_old_dirs is true, attempts to delete recursively the dirname of each
+// path in the input checkpoint_prefixes.  This is useful when those paths are non
+// user-facing temporary locations.
+//
+// Arguments:
+//	checkpoint_prefixes: prefixes of V2 checkpoints to merge.
+//	destination_prefix: scalar.  The desired final prefix.  Allowed to be the same
+// as one of the checkpoint_prefixes.
+//
+// Returns the created operation.
+func MergeV2Checkpoints(scope *Scope, checkpoint_prefixes tf.Output, destination_prefix tf.Output, optional ...MergeV2CheckpointsAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MergeV2Checkpoints",
+		Input: []tf.Input{
+			checkpoint_prefixes, destination_prefix,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// QuantizedDepthwiseConv2DWithBiasAttr is an optional argument to QuantizedDepthwiseConv2DWithBias.
+type QuantizedDepthwiseConv2DWithBiasAttr func(optionalAttr)
+
+// QuantizedDepthwiseConv2DWithBiasOutType sets the optional out_type attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_QINT32
+func QuantizedDepthwiseConv2DWithBiasOutType(value tf.DataType) QuantizedDepthwiseConv2DWithBiasAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// QuantizedDepthwiseConv2DWithBiasDilations sets the optional dilations attribute to value.
+//
+// value: List of dilation values.
+// If not specified, defaults to {i:1 i:1 i:1 i:1}
+func QuantizedDepthwiseConv2DWithBiasDilations(value []int64) QuantizedDepthwiseConv2DWithBiasAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes quantized depthwise Conv2D with Bias.
+//
+// Arguments:
+//	input: The original input tensor.
+//	filter: The original filter tensor.
+//	bias: The original bias tensor.
+//	min_input: The float value that the minimum quantized input value represents.
+//	max_input: The float value that the maximum quantized input value represents.
+//	min_filter: The float value that the minimum quantized filter value represents.
+//	max_filter: The float value that the maximum quantized filter value represents.
+//	strides: List of stride values.
+//
+//
+// Returns:
+//	output: The output tensor.
+//	min_output: The float value that the minimum quantized output value represents.
+//	max_output: The float value that the maximum quantized output value represents.
+func QuantizedDepthwiseConv2DWithBias(scope *Scope, input tf.Output, filter tf.Output, bias tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedDepthwiseConv2DWithBiasAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizedDepthwiseConv2DWithBias",
+		Input: []tf.Input{
+			input, filter, bias, min_input, max_input, min_filter, max_filter,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// QuantizedDepthwiseConv2DAttr is an optional argument to QuantizedDepthwiseConv2D.
+type QuantizedDepthwiseConv2DAttr func(optionalAttr)
+
+// QuantizedDepthwiseConv2DOutType sets the optional out_type attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_QINT32
+func QuantizedDepthwiseConv2DOutType(value tf.DataType) QuantizedDepthwiseConv2DAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// QuantizedDepthwiseConv2DDilations sets the optional dilations attribute to value.
+//
+// value: List of dilation values.
+// If not specified, defaults to {i:1 i:1 i:1 i:1}
+func QuantizedDepthwiseConv2DDilations(value []int64) QuantizedDepthwiseConv2DAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes quantized depthwise Conv2D.
+//
+// Arguments:
+//	input: The original input tensor.
+//	filter: The original filter tensor.
+//	min_input: The float value that the minimum quantized input value represents.
+//	max_input: The float value that the maximum quantized input value represents.
+//	min_filter: The float value that the minimum quantized filter value represents.
+//	max_filter: The float value that the maximum quantized filter value represents.
+//	strides: List of stride values.
+//
+//
+// Returns:
+//	output: The output tensor.
+//	min_output: The float value that the minimum quantized output value represents.
+//	max_output: The float value that the maximum quantized output value represents.
+func QuantizedDepthwiseConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedDepthwiseConv2DAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizedDepthwiseConv2D",
+		Input: []tf.Input{
+			input, filter, min_input, max_input, min_filter, max_filter,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
 // DecodeAndCropJpegAttr is an optional argument to DecodeAndCropJpeg.
 type DecodeAndCropJpegAttr func(optionalAttr)
 
@@ -23594,2876 +22476,46 @@ func DecodeAndCropJpeg(scope *Scope, contents tf.Output, crop_window tf.Output,
 	return op.Output(0)
 }
 
-// Elementwise computes the bitwise OR of `x` and `y`.
-//
-// The result will have those bits set, that are set in `x`, `y` or both. The
-// computation is performed on the underlying representations of `x` and `y`.
-//
-// For example:
-//
-// ```python
-// import tensorflow as tf
-// from tensorflow.python.ops import bitwise_ops
-// dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64,
-//               tf.uint8, tf.uint16, tf.uint32, tf.uint64]
-//
-// for dtype in dtype_list:
-//   lhs = tf.constant([0, 5, 3, 14], dtype=dtype)
-//   rhs = tf.constant([5, 0, 7, 11], dtype=dtype)
-//   exp = tf.constant([5, 5, 7, 15], dtype=tf.float32)
-//
-//   res = bitwise_ops.bitwise_or(lhs, rhs)
-//   tf.assert_equal(tf.cast(res,  tf.float32), exp)  # TRUE
-// ```
-//
-func BitwiseOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BitwiseOr",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// LoadTPUEmbeddingAdadeltaParametersAttr is an optional argument to LoadTPUEmbeddingAdadeltaParameters.
-type LoadTPUEmbeddingAdadeltaParametersAttr func(optionalAttr)
-
-// LoadTPUEmbeddingAdadeltaParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func LoadTPUEmbeddingAdadeltaParametersTableId(value int64) LoadTPUEmbeddingAdadeltaParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingAdadeltaParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingAdadeltaParametersTableName(value string) LoadTPUEmbeddingAdadeltaParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Load Adadelta embedding parameters.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the Adadelta optimization algorithm.
-//	accumulators: Value of accumulators used in the Adadelta optimization algorithm.
-//	updates: Value of updates used in the Adadelta optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingAdadeltaParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, updates tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdadeltaParametersAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingAdadeltaParameters",
-		Input: []tf.Input{
-			parameters, accumulators, updates,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug.
-type RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Retrieve proximal Adagrad embedding parameters with debug support.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns Parameter parameters updated by the proximal Adagrad optimization algorithm.Parameter accumulators updated by the proximal Adagrad optimization algorithm.Parameter gradient_accumulators updated by the proximal Adagrad optimization algorithm.
-func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// AnyAttr is an optional argument to Any.
-type AnyAttr func(optionalAttr)
-
-// AnyKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func AnyKeepDims(value bool) AnyAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
-	}
-}
-
-// Computes the "logical or" of elements across dimensions of a tensor.
-//
-// Reduces `input` along the dimensions given in `axis`. Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `axis`. If `keep_dims` is true, the reduced dimensions are
-// retained with length 1.
-//
-// Arguments:
-//	input: The tensor to reduce.
-//	axis: The dimensions to reduce. Must be in the range
-// `[-rank(input), rank(input))`.
-//
-// Returns The reduced tensor.
-func Any(scope *Scope, input tf.Output, axis tf.Output, optional ...AnyAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Any",
-		Input: []tf.Input{
-			input, axis,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ConfigureDistributedTPUAttr is an optional argument to ConfigureDistributedTPU.
-type ConfigureDistributedTPUAttr func(optionalAttr)
-
-// ConfigureDistributedTPUEmbeddingConfig sets the optional embedding_config attribute to value.
-//
-// value: Reserved. Do not use.
-// If not specified, defaults to ""
-func ConfigureDistributedTPUEmbeddingConfig(value string) ConfigureDistributedTPUAttr {
-	return func(m optionalAttr) {
-		m["embedding_config"] = value
-	}
-}
-
-// ConfigureDistributedTPUTpuEmbeddingConfig sets the optional tpu_embedding_config attribute to value.
-//
-// value: Serialized tensorflow.tpu.TPUEmbeddingConfiguration that
-// describes the embedding lookups of the program.
-// If not specified, defaults to ""
-func ConfigureDistributedTPUTpuEmbeddingConfig(value string) ConfigureDistributedTPUAttr {
-	return func(m optionalAttr) {
-		m["tpu_embedding_config"] = value
-	}
-}
-
-// ConfigureDistributedTPUIsGlobalInit sets the optional is_global_init attribute to value.
-//
-// value: Reserved. Do not use.
-// If not specified, defaults to false
-func ConfigureDistributedTPUIsGlobalInit(value bool) ConfigureDistributedTPUAttr {
-	return func(m optionalAttr) {
-		m["is_global_init"] = value
-	}
-}
-
-// Sets up the centralized structures for a distributed TPU system.
-//
-// Returns A serialized tensorflow.tpu.TopologyProto that describes the TPU
-// topology.
-func ConfigureDistributedTPU(scope *Scope, optional ...ConfigureDistributedTPUAttr) (topology tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ConfigureDistributedTPU",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceSparseApplyFtrlAttr is an optional argument to ResourceSparseApplyFtrl.
-type ResourceSparseApplyFtrlAttr func(optionalAttr)
-
-// ResourceSparseApplyFtrlUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceSparseApplyFtrlUseLocking(value bool) ResourceSparseApplyFtrlAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update relevant entries in '*var' according to the Ftrl-proximal scheme.
-//
-// That is for rows we have grad for, we update var, accum and linear as follows:
-// accum_new = accum + grad * grad
-// linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-// accum = accum_new
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	linear: Should be from a Variable().
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//	lr: Scaling factor. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	lr_power: Scaling factor. Must be a scalar.
-//
-// Returns the created operation.
-func ResourceSparseApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyFtrl",
-		Input: []tf.Input{
-			var_, accum, linear, grad, indices, lr, l1, l2, lr_power,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// ImagAttr is an optional argument to Imag.
-type ImagAttr func(optionalAttr)
-
-// ImagTout sets the optional Tout attribute to value.
-// If not specified, defaults to DT_FLOAT
-func ImagTout(value tf.DataType) ImagAttr {
-	return func(m optionalAttr) {
-		m["Tout"] = value
-	}
-}
-
-// Returns the imaginary part of a complex number.
-//
-// Given a tensor `input` of complex numbers, this operation returns a tensor of
-// type `float` that is the imaginary part of each element in `input`. All
-// elements in `input` must be complex numbers of the form \\(a + bj\\), where *a*
-// is the real part and *b* is the imaginary part returned by this operation.
-//
-// For example:
-//
-// ```
-// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
-// tf.imag(input) ==> [4.75, 5.75]
-// ```
-func Imag(scope *Scope, input tf.Output, optional ...ImagAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Imag",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Enqueue multiple Tensor values on the computation outfeed.
-//
-// Arguments:
-//	inputs: A list of tensors that will be inserted into the outfeed queue as an
-// XLA tuple.
-//
-// Returns the created operation.
-func OutfeedEnqueueTuple(scope *Scope, inputs []tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "OutfeedEnqueueTuple",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Reshapes a quantized tensor as per the Reshape op.
-//
-// ```
-//
-// Arguments:
-//
-//	shape: Defines the shape of the output tensor.
-//	input_min: The minimum value of the input.
-//	input_max: The maximum value of the input.
-//
-// Returns This value is copied from input_min.This value is copied from input_max.
-func QuantizedReshape(scope *Scope, tensor tf.Output, shape tf.Output, input_min tf.Output, input_max tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedReshape",
-		Input: []tf.Input{
-			tensor, shape, input_min, input_max,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Compute the Hurwitz zeta function \\(\zeta(x, q)\\).
-//
-// The Hurwitz zeta function is defined as:
-//
-//
-// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\)
-func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Zeta",
-		Input: []tf.Input{
-			x, q,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that overrides the maximum intra-op parallelism.
-//
-// Arguments:
-//
-//	max_intra_op_parallelism: Identifies the maximum intra-op parallelism to use.
-//
-//
-func ExperimentalMaxIntraOpParallelismDataset(scope *Scope, input_dataset tf.Output, max_intra_op_parallelism tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalMaxIntraOpParallelismDataset",
-		Input: []tf.Input{
-			input_dataset, max_intra_op_parallelism,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Gather ragged slices from `params` axis `0` according to `indices`.
-//
-// Outputs a `RaggedTensor` output composed from `output_dense_values` and
-// `output_nested_splits`, such that:
-//
-// ```python
-// output.shape = indices.shape + params.shape[1:]
-// output.ragged_rank = indices.shape.ndims + params.ragged_rank
-// output[i...j, d0...dn] = params[indices[i...j], d0...dn]
-// ```
-//
-// where
-//
-// * `params =
-//    ragged.from_nested_row_splits(params_dense_values, params_nested_splits)`
-//    provides the values that should be gathered.
-// * `indices` ia a dense tensor with dtype `int32` or `int64`, indicating which
-//    values should be gathered.
-// * `output =
-//    ragged.from_nested_row_splits(output_dense_values, output_nested_splits)`
-//    is the output tensor.
-//
-// (Note: This c++ op is used to implement the higher-level python
-// `tf.ragged.gather` op, which also supports ragged indices.)
-//
-//
-// Arguments:
-//	params_nested_splits: The `nested_row_splits` tensors that define the row-partitioning for the
-// `params` RaggedTensor input.
-//	params_dense_values: The `flat_values` for the `params` RaggedTensor. There was a terminology change
-// at the python level from dense_values to flat_values, so dense_values is the
-// deprecated name.
-//	indices: Indices in the outermost dimension of `params` of the values that should be
-// gathered.
-//	OUTPUT_RAGGED_RANK: The ragged rank of the output RaggedTensor. `output_nested_splits` will contain
-// this number of `row_splits` tensors. This value should equal
-// `indices.shape.ndims + params.ragged_rank - 1`.
-//
-// Returns The `nested_row_splits` tensors that define the row-partitioning for the
-// returned RaggedTensor.The `flat_values` for the returned RaggedTensor.
-func RaggedGather(scope *Scope, params_nested_splits []tf.Output, params_dense_values tf.Output, indices tf.Output, OUTPUT_RAGGED_RANK int64) (output_nested_splits []tf.Output, output_dense_values tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"OUTPUT_RAGGED_RANK": OUTPUT_RAGGED_RANK}
-	opspec := tf.OpSpec{
-		Type: "RaggedGather",
-		Input: []tf.Input{
-			tf.OutputList(params_nested_splits), params_dense_values, indices,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output_nested_splits, idx, err = makeOutputList(op, idx, "output_nested_splits"); err != nil {
-		scope.UpdateErr("RaggedGather", err)
-		return
-	}
-	output_dense_values = op.Output(idx)
-	return output_nested_splits, output_dense_values
-}
-
-// ResourceApplyAdagradAttr is an optional argument to ResourceApplyAdagrad.
-type ResourceApplyAdagradAttr func(optionalAttr)
-
-// ResourceApplyAdagradUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyAdagradUseLocking(value bool) ResourceApplyAdagradAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// ResourceApplyAdagradUpdateSlots sets the optional update_slots attribute to value.
-// If not specified, defaults to true
-func ResourceApplyAdagradUpdateSlots(value bool) ResourceApplyAdagradAttr {
-	return func(m optionalAttr) {
-		m["update_slots"] = value
-	}
-}
-
-// Update '*var' according to the adagrad scheme.
-//
-// accum += grad * grad
-// var -= lr * grad * (1 / sqrt(accum))
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	grad: The gradient.
-//
-// Returns the created operation.
-func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, optional ...ResourceApplyAdagradAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdagrad",
-		Input: []tf.Input{
-			var_, accum, lr, grad,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Returns a list of tensors with the same shapes and contents as the input
-//
-// tensors.
-//
-// This op can be used to override the gradient for complicated functions. For
-// example, suppose y = f(x) and we wish to apply a custom function g for backprop
-// such that dx = g(dy). In Python,
-//
-// ```python
-// with tf.get_default_graph().gradient_override_map(
-//     {'IdentityN': 'OverrideGradientWithG'}):
-//   y, _ = identity_n([f(x), x])
-//
-// @tf.RegisterGradient('OverrideGradientWithG')
-// def ApplyG(op, dy, _):
-//   return [None, g(dy)]  # Do not backprop to f(x).
-// ```
-func IdentityN(scope *Scope, input []tf.Output) (output []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "IdentityN",
-		Input: []tf.Input{
-			tf.OutputList(input),
-		},
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
-		scope.UpdateErr("IdentityN", err)
-		return
-	}
-	return output
-}
-
-// Computes inverse hyperbolic sine of x element-wise.
-//
-//   Given an input tensor, this function computes inverse hyperbolic sine
-//   for every element in the tensor. Both input and output has a range of
-//   `[-inf, inf]`.
-//
-//   ```python
-//   x = tf.constant([-float("inf"), -2, -0.5, 1, 1.2, 200, 10000, float("inf")])
-//   tf.math.asinh(x) ==> [-inf -1.4436355 -0.4812118 0.8813736 1.0159732 5.991471 9.903487 inf]
-//   ```
-func Asinh(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Asinh",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Return a tensor with the same shape and contents as the input tensor or value.
-func Identity(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Identity",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes softmax cross entropy cost and gradients to backpropagate.
-//
-// Inputs are the logits, not probabilities.
-//
-// Arguments:
-//	features: batch_size x num_classes matrix
-//	labels: batch_size x num_classes matrix
-// The caller must ensure that each batch of labels represents a valid
-// probability distribution.
-//
-// Returns Per example loss (batch_size vector).backpropagated gradients (batch_size x num_classes matrix).
-func SoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SoftmaxCrossEntropyWithLogits",
-		Input: []tf.Input{
-			features, labels,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Debugging/model interpretability outputs for each example.
-//
-// It traverses all the trees and computes debug metrics for individual examples,
-// such as getting split feature ids and logits after each split along the decision
-// path used to compute directional feature contributions.
-//
-// Arguments:
-//
-//	bucketized_features: A list of rank 1 Tensors containing bucket id for each
-// feature.
-//	logits_dimension: scalar, dimension of the logits, to be used for constructing the protos in
-// examples_debug_outputs_serialized.
-//
-// Returns Output rank 1 Tensor containing a proto serialized as a string for each example.
-func BoostedTreesExampleDebugOutputs(scope *Scope, tree_ensemble_handle tf.Output, bucketized_features []tf.Output, logits_dimension int64) (examples_debug_outputs_serialized tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"logits_dimension": logits_dimension}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesExampleDebugOutputs",
-		Input: []tf.Input{
-			tree_ensemble_handle, tf.OutputList(bucketized_features),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceSparseApplyAdagradDAAttr is an optional argument to ResourceSparseApplyAdagradDA.
-type ResourceSparseApplyAdagradDAAttr func(optionalAttr)
-
-// ResourceSparseApplyAdagradDAUseLocking sets the optional use_locking attribute to value.
-//
-// value: If True, updating of the var and accum tensors will be protected by
-// a lock; otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceSparseApplyAdagradDAUseLocking(value bool) ResourceSparseApplyAdagradDAAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update entries in '*var' and '*accum' according to the proximal adagrad scheme.
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	gradient_accumulator: Should be from a Variable().
-//	gradient_squared_accumulator: Should be from a Variable().
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//	lr: Learning rate. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	global_step: Training step number. Must be a scalar.
-//
-// Returns the created operation.
-func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceSparseApplyAdagradDAAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyAdagradDA",
-		Input: []tf.Input{
-			var_, gradient_accumulator, gradient_squared_accumulator, grad, indices, lr, l1, l2, global_step,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Converts each string in the input Tensor to its hash mod by a number of buckets.
-//
-// The hash function is deterministic on the content of the string within the
-// process. The hash function is a keyed hash function, where attribute `key`
-// defines the key of the hash function. `key` is an array of 2 elements.
-//
-// A strong hash is important when inputs may be malicious, e.g. URLs with
-// additional components. Adversaries could try to make their inputs hash to the
-// same bucket for a denial-of-service attack or to skew the results. A strong
-// hash can be used to make it difficult to find inputs with a skewed hash value
-// distribution over buckets. This requires that the hash function is
-// seeded by a high-entropy (random) "key" unknown to the adversary.
-//
-// The additional robustness comes at a cost of roughly 4x higher compute
-// time than `tf.string_to_hash_bucket_fast`.
-//
-// Arguments:
-//	input: The strings to assign a hash bucket.
-//	num_buckets: The number of buckets.
-//	key: The key used to seed the hash function, passed as a list of two uint64
-// elements.
-//
-// Returns A Tensor of the same shape as the input `string_tensor`.
-func StringToHashBucketStrong(scope *Scope, input tf.Output, num_buckets int64, key []int64) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_buckets": num_buckets, "key": key}
-	opspec := tf.OpSpec{
-		Type: "StringToHashBucketStrong",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RetrieveTPUEmbeddingMDLAdagradLightParametersAttr is an optional argument to RetrieveTPUEmbeddingMDLAdagradLightParameters.
-type RetrieveTPUEmbeddingMDLAdagradLightParametersAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingMDLAdagradLightParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func RetrieveTPUEmbeddingMDLAdagradLightParametersTableId(value int64) RetrieveTPUEmbeddingMDLAdagradLightParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingMDLAdagradLightParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingMDLAdagradLightParametersTableName(value string) RetrieveTPUEmbeddingMDLAdagradLightParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Retrieve MDL Adagrad Light embedding parameters.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns Parameter parameters updated by the MDL Adagrad Light optimization algorithm.Parameter accumulators updated by the MDL Adagrad Light optimization algorithm.Parameter weights updated by the MDL Adagrad Light optimization algorithm.Parameter benefits updated by the MDL Adagrad Light optimization algorithm.
-func RetrieveTPUEmbeddingMDLAdagradLightParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMDLAdagradLightParametersAttr) (parameters tf.Output, accumulators tf.Output, weights tf.Output, benefits tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingMDLAdagradLightParameters",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
-}
-
-// SparseTensorDenseMatMulAttr is an optional argument to SparseTensorDenseMatMul.
-type SparseTensorDenseMatMulAttr func(optionalAttr)
-
-// SparseTensorDenseMatMulAdjointA sets the optional adjoint_a attribute to value.
-//
-// value: Use the adjoint of A in the matrix multiply.  If A is complex, this
-// is transpose(conj(A)).  Otherwise it's transpose(A).
-// If not specified, defaults to false
-func SparseTensorDenseMatMulAdjointA(value bool) SparseTensorDenseMatMulAttr {
-	return func(m optionalAttr) {
-		m["adjoint_a"] = value
-	}
-}
-
-// SparseTensorDenseMatMulAdjointB sets the optional adjoint_b attribute to value.
-//
-// value: Use the adjoint of B in the matrix multiply.  If B is complex, this
-// is transpose(conj(B)).  Otherwise it's transpose(B).
-// If not specified, defaults to false
-func SparseTensorDenseMatMulAdjointB(value bool) SparseTensorDenseMatMulAttr {
-	return func(m optionalAttr) {
-		m["adjoint_b"] = value
-	}
-}
-
-// Multiply SparseTensor (of rank 2) "A" by dense matrix "B".
-//
-// No validity checking is performed on the indices of A.  However, the following
-// input format is recommended for optimal behavior:
-//
-// if adjoint_a == false:
-//   A should be sorted in lexicographically increasing order.  Use SparseReorder
-//   if you're not sure.
-// if adjoint_a == true:
-//   A should be sorted in order of increasing dimension 1 (i.e., "column major"
-//   order instead of "row major" order).
-//
-// Arguments:
-//	a_indices: 2-D.  The `indices` of the `SparseTensor`, size `[nnz, 2]` Matrix.
-//	a_values: 1-D.  The `values` of the `SparseTensor`, size `[nnz]` Vector.
-//	a_shape: 1-D.  The `shape` of the `SparseTensor`, size `[2]` Vector.
-//	b: 2-D.  A dense Matrix.
-func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output, optional ...SparseTensorDenseMatMulAttr) (product tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseTensorDenseMatMul",
-		Input: []tf.Input{
-			a_indices, a_values, a_shape, b,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Calculates gains for each feature and returns the best possible split information for the feature.
-//
-// The split information is the best threshold (bucket id), gains and left/right node contributions per node for each feature.
-//
-// It is possible that not all nodes can be split on each feature. Hence, the list of possible nodes can differ between the features. Therefore, we return `node_ids_list` for each feature, containing the list of nodes that this feature can be used to split.
-//
-// In this manner, the output is the best split per features and per node, so that it needs to be combined later to produce the best split for each node (among all possible features).
-//
-// The length of output lists are all of the same length, `num_features`.
-// The output shapes are compatible in a way that the first dimension of all tensors of all lists are the same and equal to the number of possible split nodes for each feature.
-//
-// Arguments:
-//	node_id_range: A Rank 1 tensor (shape=[2]) to specify the range [first, last) of node ids to process within `stats_summary_list`. The nodes are iterated between the two nodes specified by the tensor, as like `for node_id in range(node_id_range[0], node_id_range[1])` (Note that the last index node_id_range[1] is exclusive).
-//	stats_summary_list: A list of Rank 3 tensor (#shape=[max_splits, bucket, 2]) for accumulated stats summary (gradient/hessian) per node per buckets for each feature. The first dimension of the tensor is the maximum number of splits, and thus not all elements of it will be used, but only the indexes specified by node_ids will be used.
-//	l1: l1 regularization factor on leaf weights, per instance based.
-//	l2: l2 regularization factor on leaf weights, per instance based.
-//	tree_complexity: adjustment to the gain, per leaf based.
-//	min_node_weight: mininum avg of hessians in a node before required for the node to be considered for splitting.
-//	max_splits: the number of nodes that can be split in the whole tree. Used as a dimension of output tensors.
-//
-// Returns An output list of Rank 1 tensors indicating possible split node ids for each feature. The length of the list is num_features, but each tensor has different size as each feature provides different possible nodes. See above for details like shapes and sizes.An output list of Rank 1 tensors indicating the best gains for each feature to split for certain nodes. See above for details like shapes and sizes.An output list of Rank 1 tensors indicating the bucket id to compare with (as a threshold) for split in each node. See above for details like shapes and sizes.A list of Rank 2 tensors indicating the contribution of the left nodes when branching from parent nodes (given by the tensor element in the output node_ids_list) to the left direction by the given threshold for each feature. This value will be used to make the left node value by adding to the parent node value. Second dimension size is 1 for 1-dimensional logits, but would be larger for multi-class problems. See above for details like shapes and sizes.A list of Rank 2 tensors, with the same shape/conditions as left_node_contribs_list, but just that the value is for the right node.
-func BoostedTreesCalculateBestGainsPerFeature(scope *Scope, node_id_range tf.Output, stats_summary_list []tf.Output, l1 tf.Output, l2 tf.Output, tree_complexity tf.Output, min_node_weight tf.Output, max_splits int64) (node_ids_list []tf.Output, gains_list []tf.Output, thresholds_list []tf.Output, left_node_contribs_list []tf.Output, right_node_contribs_list []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"max_splits": max_splits}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesCalculateBestGainsPerFeature",
-		Input: []tf.Input{
-			node_id_range, tf.OutputList(stats_summary_list), l1, l2, tree_complexity, min_node_weight,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if node_ids_list, idx, err = makeOutputList(op, idx, "node_ids_list"); err != nil {
-		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
-		return
-	}
-	if gains_list, idx, err = makeOutputList(op, idx, "gains_list"); err != nil {
-		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
-		return
-	}
-	if thresholds_list, idx, err = makeOutputList(op, idx, "thresholds_list"); err != nil {
-		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
-		return
-	}
-	if left_node_contribs_list, idx, err = makeOutputList(op, idx, "left_node_contribs_list"); err != nil {
-		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
-		return
-	}
-	if right_node_contribs_list, idx, err = makeOutputList(op, idx, "right_node_contribs_list"); err != nil {
-		scope.UpdateErr("BoostedTreesCalculateBestGainsPerFeature", err)
-		return
-	}
-	return node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list
-}
-
-// Computes the mean along sparse segments of a tensor.
-//
-// Like `SparseSegmentMean`, but allows missing ids in `segment_ids`. If an id is
-// misisng, the `output` tensor at that position will be zeroed.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
-// for an explanation of segments.
-//
-// Arguments:
-//
-//	indices: A 1-D tensor. Has same rank as `segment_ids`.
-//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
-//	num_segments: Should equal the number of distinct segment IDs.
-//
-// Returns Has same shape as data, except for dimension 0 which has size
-// `num_segments`.
-func SparseSegmentMeanWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSegmentMeanWithNumSegments",
-		Input: []tf.Input{
-			data, indices, segment_ids, num_segments,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Updates the tree ensemble by either adding a layer to the last tree being grown
-//
-// or by starting a new tree.
-//
-// Arguments:
-//	tree_ensemble_handle: Handle to the ensemble variable.
-//	feature_ids: Rank 1 tensor with ids for each feature. This is the real id of
-// the feature that will be used in the split.
-//	node_ids: List of rank 1 tensors representing the nodes for which this feature
-// has a split.
-//	gains: List of rank 1 tensors representing the gains for each of the feature's
-// split.
-//	thresholds: List of rank 1 tensors representing the thesholds for each of the
-// feature's split.
-//	left_node_contribs: List of rank 2 tensors with left leaf contribs for each of
-// the feature's splits. Will be added to the previous node values to constitute
-// the values of the left nodes.
-//	right_node_contribs: List of rank 2 tensors with right leaf contribs for each
-// of the feature's splits. Will be added to the previous node values to constitute
-// the values of the right nodes.
-//	max_depth: Max depth of the tree to build.
-//	learning_rate: shrinkage const for each new tree.
-//	pruning_mode: 0-No pruning, 1-Pre-pruning, 2-Post-pruning.
-//
-// Returns the created operation.
-func BoostedTreesUpdateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, feature_ids tf.Output, node_ids []tf.Output, gains []tf.Output, thresholds []tf.Output, left_node_contribs []tf.Output, right_node_contribs []tf.Output, max_depth tf.Output, learning_rate tf.Output, pruning_mode int64) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"pruning_mode": pruning_mode}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesUpdateEnsemble",
-		Input: []tf.Input{
-			tree_ensemble_handle, feature_ids, tf.OutputList(node_ids), tf.OutputList(gains), tf.OutputList(thresholds), tf.OutputList(left_node_contribs), tf.OutputList(right_node_contribs), max_depth, learning_rate,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Deserialize `SparseTensor` objects.
-//
-// The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where
-// the last dimension stores serialized `SparseTensor` objects and the other N
-// dimensions (N >= 0) correspond to a batch. The ranks of the original
-// `SparseTensor` objects must all match. When the final `SparseTensor` is
-// created, its rank is the rank of the incoming `SparseTensor` objects plus N;
-// the sparse tensors have been concatenated along new dimensions, one for each
-// batch.
-//
-// The output `SparseTensor` object's shape values for the original dimensions
-// are the max across the input `SparseTensor` objects' shape values for the
-// corresponding dimensions. The new dimensions match the size of the batch.
-//
-// The input `SparseTensor` objects' indices are assumed ordered in
-// standard lexicographic order.  If this is not the case, after this
-// step run `SparseReorder` to restore index ordering.
-//
-// For example, if the serialized input is a `[2 x 3]` matrix representing two
-// original `SparseTensor` objects:
-//
-//     index = [ 0]
-//             [10]
-//             [20]
-//     values = [1, 2, 3]
-//     shape = [50]
-//
-// and
-//
-//     index = [ 2]
-//             [10]
-//     values = [4, 5]
-//     shape = [30]
-//
-// then the final deserialized `SparseTensor` will be:
-//
-//     index = [0  0]
-//             [0 10]
-//             [0 20]
-//             [1  2]
-//             [1 10]
-//     values = [1, 2, 3, 4, 5]
-//     shape = [2 50]
-//
-// Arguments:
-//	serialized_sparse: The serialized `SparseTensor` objects. The last dimension
-// must have 3 columns.
-//	dtype: The `dtype` of the serialized `SparseTensor` objects.
-func DeserializeSparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	opspec := tf.OpSpec{
-		Type: "DeserializeSparse",
-		Input: []tf.Input{
-			serialized_sparse,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// PrelinearizeAttr is an optional argument to Prelinearize.
-type PrelinearizeAttr func(optionalAttr)
-
-// PrelinearizeShape sets the optional shape attribute to value.
-//
-// value: The shape of the tensor.
-// If not specified, defaults to <>
-func PrelinearizeShape(value tf.Shape) PrelinearizeAttr {
-	return func(m optionalAttr) {
-		m["shape"] = value
-	}
-}
-
-// PrelinearizeLayout sets the optional layout attribute to value.
-//
-// value: A vector holding the requested layout in minor-to-major sequence. If a layout
-// attribute is passed but its values are all -1 the layout will be computed by
-// the infeed operation.
-// If not specified, defaults to <>
-func PrelinearizeLayout(value []int64) PrelinearizeAttr {
-	return func(m optionalAttr) {
-		m["layout"] = value
-	}
-}
-
-// An op which linearizes one Tensor value to an opaque variant tensor.
-//
-// Arguments:
-//	input: A tensor that will be linearized.
-func Prelinearize(scope *Scope, input tf.Output, optional ...PrelinearizeAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Prelinearize",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceApplyAdamWithAmsgradAttr is an optional argument to ResourceApplyAdamWithAmsgrad.
-type ResourceApplyAdamWithAmsgradAttr func(optionalAttr)
-
-// ResourceApplyAdamWithAmsgradUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var, m, and v tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyAdamWithAmsgradUseLocking(value bool) ResourceApplyAdamWithAmsgradAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the Adam algorithm.
-//
-// $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$
-// $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
-// $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
-// $$vhat_t := max{vhat_{t-1}, v_t}$$
-// $$variable := variable - lr_t * m_t / (\sqrt{vhat_t} + \epsilon)$$
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	m: Should be from a Variable().
-//	v: Should be from a Variable().
-//	vhat: Should be from a Variable().
-//	beta1_power: Must be a scalar.
-//	beta2_power: Must be a scalar.
-//	lr: Scaling factor. Must be a scalar.
-//	beta1: Momentum factor. Must be a scalar.
-//	beta2: Momentum factor. Must be a scalar.
-//	epsilon: Ridge term. Must be a scalar.
-//	grad: The gradient.
-//
-// Returns the created operation.
-func ResourceApplyAdamWithAmsgrad(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, vhat tf.Output, beta1_power tf.Output, beta2_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdamWithAmsgradAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdamWithAmsgrad",
-		Input: []tf.Input{
-			var_, m, v, vhat, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Convert one or more images from HSV to RGB.
-//
-// Outputs a tensor of the same shape as the `images` tensor, containing the RGB
-// value of the pixels. The output is only well defined if the value in `images`
-// are in `[0,1]`.
-//
-// See `rgb_to_hsv` for a description of the HSV encoding.
-//
-// Arguments:
-//	images: 1-D or higher rank. HSV data to convert. Last dimension must be size 3.
-//
-// Returns `images` converted to RGB.
-func HSVToRGB(scope *Scope, images tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "HSVToRGB",
-		Input: []tf.Input{
-			images,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Reads the value of a variable.
-//
-// The tensor returned by this operation is immutable.
-//
-// The value returned by this operation is guaranteed to be influenced by all the
-// writes on which this operation depends directly or indirectly, and to not be
-// influenced by any of the writes which depend directly or indirectly on this
-// operation.
-//
-// Arguments:
-//	resource: handle to the resource in which to store the variable.
-//	dtype: the dtype of the value.
-func ReadVariableOp(scope *Scope, resource tf.Output, dtype tf.DataType) (value tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	opspec := tf.OpSpec{
-		Type: "ReadVariableOp",
-		Input: []tf.Input{
-			resource,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2.
-type ResourceSparseApplyFtrlV2Attr func(optionalAttr)
-
-// ResourceSparseApplyFtrlV2UseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceSparseApplyFtrlV2UseLocking(value bool) ResourceSparseApplyFtrlV2Attr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update relevant entries in '*var' according to the Ftrl-proximal scheme.
-//
-// That is for rows we have grad for, we update var, accum and linear as follows:
-// grad_with_shrinkage = grad + 2 * l2_shrinkage * var
-// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
-// linear += grad_with_shrinkage +
-//     (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-// accum = accum_new
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	linear: Should be from a Variable().
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//	lr: Scaling factor. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 shrinkage regulariation. Must be a scalar.
-//
-//	lr_power: Scaling factor. Must be a scalar.
-//
-// Returns the created operation.
-func ResourceSparseApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlV2Attr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyFtrlV2",
-		Input: []tf.Input{
-			var_, accum, linear, grad, indices, lr, l1, l2, l2_shrinkage, lr_power,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// DecodeProtoV2Attr is an optional argument to DecodeProtoV2.
-type DecodeProtoV2Attr func(optionalAttr)
-
-// DecodeProtoV2DescriptorSource sets the optional descriptor_source attribute to value.
-//
-// value: Either the special value `local://` or a path to a file containing
-// a serialized `FileDescriptorSet`.
-// If not specified, defaults to "local://"
-func DecodeProtoV2DescriptorSource(value string) DecodeProtoV2Attr {
-	return func(m optionalAttr) {
-		m["descriptor_source"] = value
-	}
-}
-
-// DecodeProtoV2MessageFormat sets the optional message_format attribute to value.
-//
-// value: Either `binary` or `text`.
-// If not specified, defaults to "binary"
-func DecodeProtoV2MessageFormat(value string) DecodeProtoV2Attr {
-	return func(m optionalAttr) {
-		m["message_format"] = value
-	}
-}
-
-// DecodeProtoV2Sanitize sets the optional sanitize attribute to value.
-//
-// value: Whether to sanitize the result or not.
-// If not specified, defaults to false
-func DecodeProtoV2Sanitize(value bool) DecodeProtoV2Attr {
-	return func(m optionalAttr) {
-		m["sanitize"] = value
-	}
-}
-
-// The op extracts fields from a serialized protocol buffers message into tensors.
-//
-// The `decode_proto` op extracts fields from a serialized protocol buffers
-// message into tensors.  The fields in `field_names` are decoded and converted
-// to the corresponding `output_types` if possible.
-//
-// A `message_type` name must be provided to give context for the field names.
-// The actual message descriptor can be looked up either in the linked-in
-// descriptor pool or a filename provided by the caller using the
-// `descriptor_source` attribute.
-//
-// Each output tensor is a dense tensor. This means that it is padded to hold
-// the largest number of repeated elements seen in the input minibatch. (The
-// shape is also padded by one to prevent zero-sized dimensions). The actual
-// repeat counts for each example in the minibatch can be found in the `sizes`
-// output. In many cases the output of `decode_proto` is fed immediately into
-// tf.squeeze if missing values are not a concern. When using tf.squeeze, always
-// pass the squeeze dimension explicitly to avoid surprises.
-//
-// For the most part, the mapping between Proto field types and TensorFlow dtypes
-// is straightforward. However, there are a few special cases:
-//
-// - A proto field that contains a submessage or group can only be converted
-// to `DT_STRING` (the serialized submessage). This is to reduce the complexity
-// of the API. The resulting string can be used as input to another instance of
-// the decode_proto op.
-//
-// - TensorFlow lacks support for unsigned integers. The ops represent uint64
-// types as a `DT_INT64` with the same twos-complement bit pattern (the obvious
-// way). Unsigned int32 values can be represented exactly by specifying type
-// `DT_INT64`, or using twos-complement if the caller specifies `DT_INT32` in
-// the `output_types` attribute.
-//
-// Both binary and text proto serializations are supported, and can be
-// chosen using the `format` attribute.
-//
-// The `descriptor_source` attribute selects the source of protocol
-// descriptors to consult when looking up `message_type`. This may be:
-//
-// - An empty string  or "local://", in which case protocol descriptors are
-// created for C++ (not Python) proto definitions linked to the binary.
-//
-// - A file, in which case protocol descriptors are created from the file,
-// which is expected to contain a `FileDescriptorSet` serialized as a string.
-// NOTE: You can build a `descriptor_source` file using the `--descriptor_set_out`
-// and `--include_imports` options to the protocol compiler `protoc`.
-//
-// - A "bytes://<bytes>", in which protocol descriptors are created from `<bytes>`,
-// which is expected to be a `FileDescriptorSet` serialized as a string.
-//
-// Arguments:
-//	bytes: Tensor of serialized protos with shape `batch_shape`.
-//	message_type: Name of the proto message type to decode.
-//	field_names: List of strings containing proto field names. An extension field can be decoded
-// by using its full name, e.g. EXT_PACKAGE.EXT_FIELD_NAME.
-//	output_types: List of TF types to use for the respective field in field_names.
-//
-// Returns Tensor of int32 with shape `[batch_shape, len(field_names)]`.
-// Each entry is the number of values found for the corresponding field.
-// Optional fields may have 0 or 1 values.List of tensors containing values for the corresponding field.
-// `values[i]` has datatype `output_types[i]`
-// and shape `[batch_shape, max(sizes[...,i])]`.
-func DecodeProtoV2(scope *Scope, bytes tf.Output, message_type string, field_names []string, output_types []tf.DataType, optional ...DecodeProtoV2Attr) (sizes tf.Output, values []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"message_type": message_type, "field_names": field_names, "output_types": output_types}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DecodeProtoV2",
-		Input: []tf.Input{
-			bytes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	sizes = op.Output(idx)
-	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
-		scope.UpdateErr("DecodeProtoV2", err)
-		return
-	}
-	return sizes, values
-}
-
-// Inverse 3D fast Fourier transform.
-//
-// Computes the inverse 3-dimensional discrete Fourier transform over the
-// inner-most 3 dimensions of `input`.
-//
-// Arguments:
-//	input: A complex64 tensor.
-//
-// Returns A complex64 tensor of the same shape as `input`. The inner-most 3
-//   dimensions of `input` are replaced with their inverse 3D Fourier transform.
-//
-// @compatibility(numpy)
-// Equivalent to np.fft.ifftn with 3 dimensions.
-// @end_compatibility
-func IFFT3D(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "IFFT3D",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a Tensor by indexing into the TensorList.
-//
-// Each row in the produced Tensor corresponds to the element in the TensorList
-// specified by the given index (see `tf.gather`).
-//
-// input_handle: The input tensor list.
-// indices: The indices used to index into the list.
-// values: The tensor.
-func TensorListGather(scope *Scope, input_handle tf.Output, indices tf.Output, element_shape tf.Output, element_dtype tf.DataType) (values tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"element_dtype": element_dtype}
-	opspec := tf.OpSpec{
-		Type: "TensorListGather",
-		Input: []tf.Input{
-			input_handle, indices, element_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Forwards the value of an available tensor from `inputs` to `output`.
-//
-// `Merge` waits for at least one of the tensors in `inputs` to become available.
-// It is usually combined with `Switch` to implement branching.
-//
-// `Merge` forwards the first tensor to become available to `output`, and sets
-// `value_index` to its index in `inputs`.
-//
-// Arguments:
-//	inputs: The input tensors, exactly one of which will become available.
-//
-// Returns Will be set to the available input tensor.The index of the chosen input tensor in `inputs`.
-func Merge(scope *Scope, inputs []tf.Output) (output tf.Output, value_index tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Merge",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Reduces sparse updates into the variable referenced by `resource` using the `max` operation.
-//
-// This operation computes
-//
-//     # Scalar indices
-//     ref[indices, ...] = max(ref[indices, ...], updates[...])
-//
-//     # Vector indices (for each i)
-//     ref[indices[i], ...] = max(ref[indices[i], ...], updates[i, ...])
-//
-//     # High rank indices (for each i, ..., j)
-//     ref[indices[i, ..., j], ...] = max(ref[indices[i, ..., j], ...], updates[i, ..., j, ...])
-//
-// Duplicate entries are handled correctly: if multiple `indices` reference
-// the same location, their contributions are combined.
-//
-// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
-// </div>
-//
-// Arguments:
-//	resource: Should be from a `Variable` node.
-//	indices: A tensor of indices into the first dimension of `ref`.
-//	updates: A tensor of updated values to add to `ref`.
-//
-// Returns the created operation.
-func ResourceScatterMax(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceScatterMax",
-		Input: []tf.Input{
-			resource, indices, updates,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// ResourceApplyRMSPropAttr is an optional argument to ResourceApplyRMSProp.
-type ResourceApplyRMSPropAttr func(optionalAttr)
-
-// ResourceApplyRMSPropUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var, ms, and mom tensors is protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyRMSPropUseLocking(value bool) ResourceApplyRMSPropAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the RMSProp algorithm.
-//
-// Note that in dense implementation of this algorithm, ms and mom will
-// update even if the grad is zero, but in this sparse implementation, ms
-// and mom will not update in iterations during which the grad is zero.
-//
-// mean_square = decay * mean_square + (1-decay) * gradient ** 2
-// Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
-//
-// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
-// var <- var - mom
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	ms: Should be from a Variable().
-//	mom: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	rho: Decay rate. Must be a scalar.
-//
-//	epsilon: Ridge term. Must be a scalar.
-//	grad: The gradient.
-//
-// Returns the created operation.
-func ResourceApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyRMSPropAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyRMSProp",
-		Input: []tf.Input{
-			var_, ms, mom, lr, rho, momentum, epsilon, grad,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// LoadTPUEmbeddingFTRLParametersAttr is an optional argument to LoadTPUEmbeddingFTRLParameters.
-type LoadTPUEmbeddingFTRLParametersAttr func(optionalAttr)
-
-// LoadTPUEmbeddingFTRLParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func LoadTPUEmbeddingFTRLParametersTableId(value int64) LoadTPUEmbeddingFTRLParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingFTRLParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingFTRLParametersTableName(value string) LoadTPUEmbeddingFTRLParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Load FTRL embedding parameters.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the FTRL optimization algorithm.
-//	accumulators: Value of accumulators used in the FTRL optimization algorithm.
-//	linears: Value of linears used in the FTRL optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingFTRLParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, linears tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingFTRLParametersAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingFTRLParameters",
-		Input: []tf.Input{
-			parameters, accumulators, linears,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// ResourceSparseApplyMomentumAttr is an optional argument to ResourceSparseApplyMomentum.
-type ResourceSparseApplyMomentumAttr func(optionalAttr)
-
-// ResourceSparseApplyMomentumUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceSparseApplyMomentumUseLocking(value bool) ResourceSparseApplyMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// ResourceSparseApplyMomentumUseNesterov sets the optional use_nesterov attribute to value.
-//
-// value: If `True`, the tensor passed to compute grad will be
-// var - lr * momentum * accum, so in the end, the var you get is actually
-// var - lr * momentum * accum.
-// If not specified, defaults to false
-func ResourceSparseApplyMomentumUseNesterov(value bool) ResourceSparseApplyMomentumAttr {
-	return func(m optionalAttr) {
-		m["use_nesterov"] = value
-	}
-}
-
-// Update relevant entries in '*var' and '*accum' according to the momentum scheme.
-//
-// Set use_nesterov = True if you want to use Nesterov momentum.
-//
-// That is for rows we have grad for, we update var and accum as follows:
-//
-// accum = accum * momentum + grad
-// var -= lr * accum
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	lr: Learning rate. Must be a scalar.
-//	grad: The gradient.
-//	indices: A vector of indices into the first dimension of var and accum.
-//	momentum: Momentum. Must be a scalar.
-//
-// Returns the created operation.
-func ResourceSparseApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyMomentumAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceSparseApplyMomentum",
-		Input: []tf.Input{
-			var_, accum, lr, grad, indices, momentum,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// TensorArrayConcatV3Attr is an optional argument to TensorArrayConcatV3.
-type TensorArrayConcatV3Attr func(optionalAttr)
-
-// TensorArrayConcatV3ElementShapeExcept0 sets the optional element_shape_except0 attribute to value.
-//
-// value: The expected shape of an element, if known,
-// excluding the first dimension. Used to validate the shapes of
-// TensorArray elements. If this shape is not fully specified, concatenating
-// zero-size TensorArrays is an error.
-// If not specified, defaults to <unknown_rank:true >
-func TensorArrayConcatV3ElementShapeExcept0(value tf.Shape) TensorArrayConcatV3Attr {
-	return func(m optionalAttr) {
-		m["element_shape_except0"] = value
-	}
-}
-
-// Concat the elements from the TensorArray into value `value`.
-//
-// Takes `T` elements of shapes
-//
-//   ```
-//   (n0 x d0 x d1 x ...), (n1 x d0 x d1 x ...), ..., (n(T-1) x d0 x d1 x ...)
-//   ```
-//
-// and concatenates them into a Tensor of shape:
-//
-//   ```(n0 + n1 + ... + n(T-1) x d0 x d1 x ...)```
-//
-// All elements must have the same shape (excepting the first dimension).
-//
-// Arguments:
-//	handle: The handle to a TensorArray.
-//	flow_in: A float scalar that enforces proper chaining of operations.
-//	dtype: The type of the elem that is returned.
-//
-// Returns All of the elements in the TensorArray, concatenated along the first
-// axis.A vector of the row sizes of the original T elements in the
-// value output.  In the example above, this would be the values:
-// `(n1, n2, ..., n(T-1))`.
-func TensorArrayConcatV3(scope *Scope, handle tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayConcatV3Attr) (value tf.Output, lengths tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayConcatV3",
-		Input: []tf.Input{
-			handle, flow_in,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Computes the gradient for the rsqrt of `x` wrt its input.
-//
-// Specifically, `grad = dy * -0.5 * y^3`, where `y = rsqrt(x)`, and `dy`
-// is the corresponding input gradient.
-func RsqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "RsqrtGrad",
-		Input: []tf.Input{
-			y, dy,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// AddManySparseToTensorsMapAttr is an optional argument to AddManySparseToTensorsMap.
-type AddManySparseToTensorsMapAttr func(optionalAttr)
-
-// AddManySparseToTensorsMapContainer sets the optional container attribute to value.
-//
-// value: The container name for the `SparseTensorsMap` created by this op.
-// If not specified, defaults to ""
-func AddManySparseToTensorsMapContainer(value string) AddManySparseToTensorsMapAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// AddManySparseToTensorsMapSharedName sets the optional shared_name attribute to value.
-//
-// value: The shared name for the `SparseTensorsMap` created by this op.
-// If blank, the new Operation's unique name is used.
-// If not specified, defaults to ""
-func AddManySparseToTensorsMapSharedName(value string) AddManySparseToTensorsMapAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Add an `N`-minibatch `SparseTensor` to a `SparseTensorsMap`, return `N` handles.
-//
-// A `SparseTensor` of rank `R` is represented by three tensors: `sparse_indices`,
-// `sparse_values`, and `sparse_shape`, where
-//
-// ```sparse_indices.shape[1] == sparse_shape.shape[0] == R```
-//
-// An `N`-minibatch of `SparseTensor` objects is represented as a `SparseTensor`
-// having a first `sparse_indices` column taking values between `[0, N)`, where
-// the minibatch size `N == sparse_shape[0]`.
-//
-// The input `SparseTensor` must have rank `R` greater than 1, and the first
-// dimension is treated as the minibatch dimension.  Elements of the `SparseTensor`
-// must be sorted in increasing order of this first dimension.  The stored
-// `SparseTensor` objects pointed to by each row of the output `sparse_handles`
-// will have rank `R-1`.
-//
-// The `SparseTensor` values can then be read out as part of a minibatch by passing
-// the given keys as vector elements to `TakeManySparseFromTensorsMap`.  To ensure
-// the correct `SparseTensorsMap` is accessed, ensure that the same
-// `container` and `shared_name` are passed to that Op.  If no `shared_name`
-// is provided here, instead use the *name* of the Operation created by calling
-// `AddManySparseToTensorsMap` as the `shared_name` passed to
-// `TakeManySparseFromTensorsMap`.  Ensure the Operations are colocated.
-//
-// Arguments:
-//	sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
-// `sparse_indices[:, 0]` must be ordered values in `[0, N)`.
-//	sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
-//	sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
-// The minibatch size `N == sparse_shape[0]`.
-//
-// Returns 1-D.  The handles of the `SparseTensor` now stored in the
-// `SparseTensorsMap`.  Shape: `[N]`.
-func AddManySparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddManySparseToTensorsMapAttr) (sparse_handles tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "AddManySparseToTensorsMap",
-		Input: []tf.Input{
-			sparse_indices, sparse_values, sparse_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`.
-//
-// This Op does not require `a_indices` be sorted in standard lexicographic order.
-//
-// Arguments:
-//	a_indices: 2-D.  The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`.
-//	a_values: 1-D.  The `values` of the `SparseTensor`, with shape `[nnz]`.
-//	a_shape: 1-D.  The `shape` of the `SparseTensor`, with shape `[ndims]`.
-//	b: `ndims`-D Tensor.  With shape `a_shape`.
-func SparseTensorDenseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseTensorDenseAdd",
-		Input: []tf.Input{
-			a_indices, a_values, a_shape, b,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Increments variable pointed to by 'resource' until it reaches 'limit'.
-//
-// Arguments:
-//	resource: Should be from a scalar `Variable` node.
-//	limit: If incrementing ref would bring it above limit, instead generates an
-// 'OutOfRange' error.
-//
-//
-// Returns A copy of the input before increment. If nothing else modifies the
-// input, the values produced will all be distinct.
-func ResourceCountUpTo(scope *Scope, resource tf.Output, limit int64, T tf.DataType) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"limit": limit, "T": T}
-	opspec := tf.OpSpec{
-		Type: "ResourceCountUpTo",
-		Input: []tf.Input{
-			resource,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceApplyAddSignAttr is an optional argument to ResourceApplyAddSign.
-type ResourceApplyAddSignAttr func(optionalAttr)
-
-// ResourceApplyAddSignUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and m tensors is
-// protected by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyAddSignUseLocking(value bool) ResourceApplyAddSignAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the AddSign update.
-//
-// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
-// update <- (alpha + sign_decay * sign(g) *sign(m)) * g
-// variable <- variable - lr_t * update
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	m: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	alpha: Must be a scalar.
-//	sign_decay: Must be a scalar.
-//	beta: Must be a scalar.
-//	grad: The gradient.
-//
-// Returns the created operation.
-func ResourceApplyAddSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, alpha tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyAddSignAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyAddSign",
-		Input: []tf.Input{
-			var_, m, lr, alpha, sign_decay, beta, grad,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// SubstrAttr is an optional argument to Substr.
-type SubstrAttr func(optionalAttr)
-
-// SubstrUnit sets the optional unit attribute to value.
-//
-// value: The unit that is used to create the substring.  One of: `"BYTE"` (for
-// defining position and length by bytes) or `"UTF8_CHAR"` (for the UTF-8
-// encoded Unicode code points).  The default is `"BYTE"`. Results are undefined if
-// `unit=UTF8_CHAR` and the `input` strings do not contain structurally valid
-// UTF-8.
-// If not specified, defaults to "BYTE"
-func SubstrUnit(value string) SubstrAttr {
-	return func(m optionalAttr) {
-		m["unit"] = value
-	}
-}
-
-// Return substrings from `Tensor` of strings.
-//
-// For each string in the input `Tensor`, creates a substring starting at index
-// `pos` with a total length of `len`.
-//
-// If `len` defines a substring that would extend beyond the length of the input
-// string, then as many characters as possible are used.
-//
-// A negative `pos` indicates distance within the string backwards from the end.
-//
-// If `pos` specifies an index which is out of range for any of the input strings,
-// then an `InvalidArgumentError` is thrown.
-//
-// `pos` and `len` must have the same shape, otherwise a `ValueError` is thrown on
-// Op creation.
-//
-// *NOTE*: `Substr` supports broadcasting up to two dimensions. More about
-// broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-//
-// ---
-//
-// Examples
-//
-// Using scalar `pos` and `len`:
-//
-// ```python
-// input = [b'Hello', b'World']
-// position = 1
-// length = 3
-//
-// output = [b'ell', b'orl']
-// ```
-//
-// Using `pos` and `len` with same shape as `input`:
-//
-// ```python
-// input = [[b'ten', b'eleven', b'twelve'],
-//          [b'thirteen', b'fourteen', b'fifteen'],
-//          [b'sixteen', b'seventeen', b'eighteen']]
-// position = [[1, 2, 3],
-//             [1, 2, 3],
-//             [1, 2, 3]]
-// length =   [[2, 3, 4],
-//             [4, 3, 2],
-//             [5, 5, 5]]
-//
-// output = [[b'en', b'eve', b'lve'],
-//           [b'hirt', b'urt', b'te'],
-//           [b'ixtee', b'vente', b'hteen']]
-// ```
-//
-// Broadcasting `pos` and `len` onto `input`:
-//
-// ```
-// input = [[b'ten', b'eleven', b'twelve'],
-//          [b'thirteen', b'fourteen', b'fifteen'],
-//          [b'sixteen', b'seventeen', b'eighteen'],
-//          [b'nineteen', b'twenty', b'twentyone']]
-// position = [1, 2, 3]
-// length =   [1, 2, 3]
-//
-// output = [[b'e', b'ev', b'lve'],
-//           [b'h', b'ur', b'tee'],
-//           [b'i', b've', b'hte'],
-//           [b'i', b'en', b'nty']]
-// ```
-//
-// Broadcasting `input` onto `pos` and `len`:
-//
-// ```
-// input = b'thirteen'
-// position = [1, 5, 7]
-// length =   [3, 2, 1]
-//
-// output = [b'hir', b'ee', b'n']
-// ```
-//
-// Arguments:
-//	input: Tensor of strings
-//	pos: Scalar defining the position of first character in each substring
-//	len: Scalar defining the number of characters to include in each substring
-//
-// Returns Tensor of substrings
-func Substr(scope *Scope, input tf.Output, pos tf.Output, len tf.Output, optional ...SubstrAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Substr",
-		Input: []tf.Input{
-			input, pos, len,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// 3D fast Fourier transform.
-//
-// Computes the 3-dimensional discrete Fourier transform over the inner-most 3
-// dimensions of `input`.
-//
-// Arguments:
-//	input: A complex64 tensor.
-//
-// Returns A complex64 tensor of the same shape as `input`. The inner-most 3
-//   dimensions of `input` are replaced with their 3D Fourier transform.
-//
-// @compatibility(numpy)
-// Equivalent to np.fft.fftn with 3 dimensions.
-// @end_compatibility
-func FFT3D(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "FFT3D",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceApplyGradientDescentAttr is an optional argument to ResourceApplyGradientDescent.
-type ResourceApplyGradientDescentAttr func(optionalAttr)
-
-// ResourceApplyGradientDescentUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, the subtraction will be protected by a lock;
-// otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceApplyGradientDescentUseLocking(value bool) ResourceApplyGradientDescentAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' by subtracting 'alpha' * 'delta' from it.
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	alpha: Scaling factor. Must be a scalar.
-//	delta: The change.
-//
-// Returns the created operation.
-func ResourceApplyGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, delta tf.Output, optional ...ResourceApplyGradientDescentAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyGradientDescent",
-		Input: []tf.Input{
-			var_, alpha, delta,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// MeanAttr is an optional argument to Mean.
-type MeanAttr func(optionalAttr)
-
-// MeanKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func MeanKeepDims(value bool) MeanAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
-	}
-}
-
-// Computes the mean of elements across dimensions of a tensor.
-//
-// Reduces `input` along the dimensions given in `axis`. Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `axis`. If `keep_dims` is true, the reduced dimensions are
-// retained with length 1.
-//
-// Arguments:
-//	input: The tensor to reduce.
-//	axis: The dimensions to reduce. Must be in the range
-// `[-rank(input), rank(input))`.
-//
-// Returns The reduced tensor.
-func Mean(scope *Scope, input tf.Output, axis tf.Output, optional ...MeanAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Mean",
-		Input: []tf.Input{
-			input, axis,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Deserialize and concatenate `SparseTensors` from a serialized minibatch.
-//
-// The input `serialized_sparse` must be a string matrix of shape `[N x 3]` where
-// `N` is the minibatch size and the rows correspond to packed outputs of
-// `SerializeSparse`.  The ranks of the original `SparseTensor` objects
-// must all match.  When the final `SparseTensor` is created, it has rank one
-// higher than the ranks of the incoming `SparseTensor` objects
-// (they have been concatenated along a new row dimension).
-//
-// The output `SparseTensor` object's shape values for all dimensions but the
-// first are the max across the input `SparseTensor` objects' shape values
-// for the corresponding dimensions.  Its first shape value is `N`, the minibatch
-// size.
-//
-// The input `SparseTensor` objects' indices are assumed ordered in
-// standard lexicographic order.  If this is not the case, after this
-// step run `SparseReorder` to restore index ordering.
-//
-// For example, if the serialized input is a `[2 x 3]` matrix representing two
-// original `SparseTensor` objects:
-//
-//     index = [ 0]
-//             [10]
-//             [20]
-//     values = [1, 2, 3]
-//     shape = [50]
-//
-// and
-//
-//     index = [ 2]
-//             [10]
-//     values = [4, 5]
-//     shape = [30]
-//
-// then the final deserialized `SparseTensor` will be:
-//
-//     index = [0  0]
-//             [0 10]
-//             [0 20]
-//             [1  2]
-//             [1 10]
-//     values = [1, 2, 3, 4, 5]
-//     shape = [2 50]
-//
-// Arguments:
-//	serialized_sparse: 2-D, The `N` serialized `SparseTensor` objects.
-// Must have 3 columns.
-//	dtype: The `dtype` of the serialized `SparseTensor` objects.
-func DeserializeManySparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	opspec := tf.OpSpec{
-		Type: "DeserializeManySparse",
-		Input: []tf.Input{
-			serialized_sparse,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// LoadTPUEmbeddingStochasticGradientDescentParametersAttr is an optional argument to LoadTPUEmbeddingStochasticGradientDescentParameters.
-type LoadTPUEmbeddingStochasticGradientDescentParametersAttr func(optionalAttr)
-
-// LoadTPUEmbeddingStochasticGradientDescentParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func LoadTPUEmbeddingStochasticGradientDescentParametersTableId(value int64) LoadTPUEmbeddingStochasticGradientDescentParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingStochasticGradientDescentParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingStochasticGradientDescentParametersTableName(value string) LoadTPUEmbeddingStochasticGradientDescentParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Load SGD embedding parameters.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the stochastic gradient descent optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingStochasticGradientDescentParameters(scope *Scope, parameters tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingStochasticGradientDescentParametersAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingStochasticGradientDescentParameters",
-		Input: []tf.Input{
-			parameters,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Computes the LSTM cell backward propagation for the entire time sequence.
-//
-// This implementation is to be used in conjunction of LSTMBlock.
-//
-// Arguments:
-//	seq_len_max: Maximum time length actually used by this input. Outputs are padded
-// with zeros beyond this length.
-//	x: The sequence input to the LSTM, shape (timelen, batch_size, num_inputs).
-//	cs_prev: Value of the initial cell state.
-//	h_prev: Initial output of cell (to be used for peephole).
-//	w: The weight matrix.
-//	wci: The weight matrix for input gate peephole connection.
-//	wcf: The weight matrix for forget gate peephole connection.
-//	wco: The weight matrix for output gate peephole connection.
-//	b: The bias vector.
-//	i: The input gate over the whole time sequence.
-//	cs: The cell state before the tanh over the whole time sequence.
-//	f: The forget gate over the whole time sequence.
-//	o: The output gate over the whole time sequence.
-//	ci: The cell input over the whole time sequence.
-//	co: The cell after the tanh over the whole time sequence.
-//	h: The output h vector over the whole time sequence.
-//	cs_grad: The current gradient of cs.
-//	h_grad: The gradient of h vector.
-//	use_peephole: Whether to use peephole weights.
-//
-// Returns The gradient of x to be back-propped.The gradient of cs_prev to be back-propped.The gradient of h_prev to be back-propped.The gradient for w to be back-propped.The gradient for wci to be back-propped.The gradient for wcf to be back-propped.The gradient for wco to be back-propped.The gradient for w to be back-propped.
-func BlockLSTMGrad(scope *Scope, seq_len_max tf.Output, x tf.Output, cs_prev tf.Output, h_prev tf.Output, w tf.Output, wci tf.Output, wcf tf.Output, wco tf.Output, b tf.Output, i tf.Output, cs tf.Output, f tf.Output, o tf.Output, ci tf.Output, co tf.Output, h tf.Output, cs_grad tf.Output, h_grad tf.Output, use_peephole bool) (x_grad tf.Output, cs_prev_grad tf.Output, h_prev_grad tf.Output, w_grad tf.Output, wci_grad tf.Output, wcf_grad tf.Output, wco_grad tf.Output, b_grad tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"use_peephole": use_peephole}
-	opspec := tf.OpSpec{
-		Type: "BlockLSTMGrad",
-		Input: []tf.Input{
-			seq_len_max, x, cs_prev, h_prev, w, wci, wcf, wco, b, i, cs, f, o, ci, co, h, cs_grad, h_grad,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6), op.Output(7)
-}
-
-// AudioSummaryV2Attr is an optional argument to AudioSummaryV2.
-type AudioSummaryV2Attr func(optionalAttr)
-
-// AudioSummaryV2MaxOutputs sets the optional max_outputs attribute to value.
-//
-// value: Max number of batch elements to generate audio for.
-// If not specified, defaults to 3
-//
-// REQUIRES: value >= 1
-func AudioSummaryV2MaxOutputs(value int64) AudioSummaryV2Attr {
-	return func(m optionalAttr) {
-		m["max_outputs"] = value
-	}
-}
-
-// Outputs a `Summary` protocol buffer with audio.
-//
-// The summary has up to `max_outputs` summary values containing audio. The
-// audio is built from `tensor` which must be 3-D with shape `[batch_size,
-// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are
-// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.
-//
-// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
-// build the `tag` of the summary values:
-//
-// *  If `max_outputs` is 1, the summary value tag is '*tag*/audio'.
-// *  If `max_outputs` is greater than 1, the summary value tags are
-//    generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
-//
-// Arguments:
-//	tag: Scalar. Used to build the `tag` attribute of the summary values.
-//	tensor: 2-D of shape `[batch_size, frames]`.
-//	sample_rate: The sample rate of the signal in hertz.
-//
-// Returns Scalar. Serialized `Summary` protocol buffer.
-func AudioSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...AudioSummaryV2Attr) (summary tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "AudioSummaryV2",
-		Input: []tf.Input{
-			tag, tensor, sample_rate,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Reads and outputs the entire contents of the input filename.
-func ReadFile(scope *Scope, filename tf.Output) (contents tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ReadFile",
-		Input: []tf.Input{
-			filename,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceApplyAdadeltaAttr is an optional argument to ResourceApplyAdadelta.
-type ResourceApplyAdadeltaAttr func(optionalAttr)
-
-// ResourceApplyAdadeltaUseLocking sets the optional use_locking attribute to value.
-//
-// value: If True, updating of the var, accum and update_accum tensors will be protected by
-// a lock; otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceApplyAdadeltaUseLocking(value bool) ResourceApplyAdadeltaAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the adadelta scheme.
-//
-// accum = rho() * accum + (1 - rho()) * grad.square();
-// update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad;
-// update_accum = rho() * update_accum + (1 - rho()) * update.square();
-// var -= update;
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	accum_update: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	rho: Decay factor. Must be a scalar.
-//	epsilon: Constant factor. Must be a scalar.
-//	grad: The gradient.
-//
-// Returns the created operation.
-func ResourceApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdadeltaAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyAdadelta",
-		Input: []tf.Input{
-			var_, accum, accum_update, lr, rho, epsilon, grad,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// SampleDistortedBoundingBoxAttr is an optional argument to SampleDistortedBoundingBox.
-type SampleDistortedBoundingBoxAttr func(optionalAttr)
-
-// SampleDistortedBoundingBoxSeed sets the optional seed attribute to value.
-//
-// value: If either `seed` or `seed2` are set to non-zero, the random number
-// generator is seeded by the given `seed`.  Otherwise, it is seeded by a random
-// seed.
-// If not specified, defaults to 0
-func SampleDistortedBoundingBoxSeed(value int64) SampleDistortedBoundingBoxAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxSeed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func SampleDistortedBoundingBoxSeed2(value int64) SampleDistortedBoundingBoxAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxMinObjectCovered sets the optional min_object_covered attribute to value.
-//
-// value: The cropped area of the image must contain at least this
-// fraction of any bounding box supplied. The value of this parameter should be
-// non-negative. In the case of 0, the cropped area does not need to overlap
-// any of the bounding boxes supplied.
-// If not specified, defaults to 0.1
-func SampleDistortedBoundingBoxMinObjectCovered(value float32) SampleDistortedBoundingBoxAttr {
-	return func(m optionalAttr) {
-		m["min_object_covered"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxAspectRatioRange sets the optional aspect_ratio_range attribute to value.
-//
-// value: The cropped area of the image must have an aspect ratio =
-// width / height within this range.
-// If not specified, defaults to <f:0.75 f:1.33 >
-func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistortedBoundingBoxAttr {
-	return func(m optionalAttr) {
-		m["aspect_ratio_range"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxAreaRange sets the optional area_range attribute to value.
-//
-// value: The cropped area of the image must contain a fraction of the
-// supplied image within this range.
-// If not specified, defaults to <f:0.05 f:1 >
-func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr {
-	return func(m optionalAttr) {
-		m["area_range"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxMaxAttempts sets the optional max_attempts attribute to value.
-//
-// value: Number of attempts at generating a cropped region of the image
-// of the specified constraints. After `max_attempts` failures, return the entire
-// image.
-// If not specified, defaults to 100
-func SampleDistortedBoundingBoxMaxAttempts(value int64) SampleDistortedBoundingBoxAttr {
-	return func(m optionalAttr) {
-		m["max_attempts"] = value
-	}
-}
-
-// SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes sets the optional use_image_if_no_bounding_boxes attribute to value.
-//
-// value: Controls behavior if no bounding boxes supplied.
-// If true, assume an implicit bounding box covering the whole input. If false,
-// raise an error.
-// If not specified, defaults to false
-func SampleDistortedBoundingBoxUseImageIfNoBoundingBoxes(value bool) SampleDistortedBoundingBoxAttr {
-	return func(m optionalAttr) {
-		m["use_image_if_no_bounding_boxes"] = value
-	}
-}
-
-// Generate a single randomly distorted bounding box for an image.
-//
-// Bounding box annotations are often supplied in addition to ground-truth labels
-// in image recognition or object localization tasks. A common technique for
-// training such a system is to randomly distort an image while preserving
-// its content, i.e. *data augmentation*. This Op outputs a randomly distorted
-// localization of an object, i.e. bounding box, given an `image_size`,
-// `bounding_boxes` and a series of constraints.
-//
-// The output of this Op is a single bounding box that may be used to crop the
-// original image. The output is returned as 3 tensors: `begin`, `size` and
-// `bboxes`. The first 2 tensors can be fed directly into `tf.slice` to crop the
-// image. The latter may be supplied to `tf.image.draw_bounding_boxes` to visualize
-// what the bounding box looks like.
-//
-// Bounding boxes are supplied and returned as `[y_min, x_min, y_max, x_max]`. The
-// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
-// height of the underlying image.
-//
-// For example,
-//
-// ```python
-//     # Generate a single distorted bounding box.
-//     begin, size, bbox_for_draw = tf.image.sample_distorted_bounding_box(
-//         tf.shape(image),
-//         bounding_boxes=bounding_boxes)
-//
-//     # Draw the bounding box in an image summary.
-//     image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
-//                                                   bbox_for_draw)
-//     tf.summary.image('images_with_box', image_with_box)
-//
-//     # Employ the bounding box to distort the image.
-//     distorted_image = tf.slice(image, begin, size)
-// ```
-//
-// Note that if no bounding box information is available, setting
-// `use_image_if_no_bounding_boxes = true` will assume there is a single implicit
-// bounding box covering the whole image. If `use_image_if_no_bounding_boxes` is
-// false and no bounding boxes are supplied, an error is raised.
-//
-// Arguments:
-//	image_size: 1-D, containing `[height, width, channels]`.
-//	bounding_boxes: 3-D with shape `[batch, N, 4]` describing the N bounding boxes
-// associated with the image.
-//
-// Returns 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to
-// `tf.slice`.1-D, containing `[target_height, target_width, -1]`. Provide as input to
-// `tf.slice`.3-D with shape `[1, 1, 4]` containing the distorted bounding box.
-// Provide as input to `tf.image.draw_bounding_boxes`.
-func SampleDistortedBoundingBox(scope *Scope, image_size tf.Output, bounding_boxes tf.Output, optional ...SampleDistortedBoundingBoxAttr) (begin tf.Output, size tf.Output, bboxes tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SampleDistortedBoundingBox",
-		Input: []tf.Input{
-			image_size, bounding_boxes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// TruncatedNormalAttr is an optional argument to TruncatedNormal.
-type TruncatedNormalAttr func(optionalAttr)
-
-// TruncatedNormalSeed sets the optional seed attribute to value.
-//
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func TruncatedNormalSeed(value int64) TruncatedNormalAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// TruncatedNormalSeed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func TruncatedNormalSeed2(value int64) TruncatedNormalAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Outputs random values from a truncated normal distribution.
-//
-// The generated values follow a normal distribution with mean 0 and standard
-// deviation 1, except that values whose magnitude is more than 2 standard
-// deviations from the mean are dropped and re-picked.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	dtype: The type of the output.
-//
-// Returns A tensor of the specified shape filled with random truncated normal
-// values.
-func TruncatedNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...TruncatedNormalAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TruncatedNormal",
-		Input: []tf.Input{
-			shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RetrieveTPUEmbeddingMomentumParametersAttr is an optional argument to RetrieveTPUEmbeddingMomentumParameters.
-type RetrieveTPUEmbeddingMomentumParametersAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingMomentumParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func RetrieveTPUEmbeddingMomentumParametersTableId(value int64) RetrieveTPUEmbeddingMomentumParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingMomentumParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingMomentumParametersTableName(value string) RetrieveTPUEmbeddingMomentumParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Retrieve Momentum embedding parameters.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns Parameter parameters updated by the Momentum optimization algorithm.Parameter momenta updated by the Momentum optimization algorithm.
-func RetrieveTPUEmbeddingMomentumParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMomentumParametersAttr) (parameters tf.Output, momenta tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingMomentumParameters",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Set a summary_writer_interface to record statistics using given stats_aggregator.
-//
-// Returns the created operation.
-func StatsAggregatorSetSummaryWriter(scope *Scope, stats_aggregator tf.Output, summary tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "StatsAggregatorSetSummaryWriter",
-		Input: []tf.Input{
-			stats_aggregator, summary,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// AudioSummaryAttr is an optional argument to AudioSummary.
-type AudioSummaryAttr func(optionalAttr)
-
-// AudioSummaryMaxOutputs sets the optional max_outputs attribute to value.
-//
-// value: Max number of batch elements to generate audio for.
-// If not specified, defaults to 3
-//
-// REQUIRES: value >= 1
-func AudioSummaryMaxOutputs(value int64) AudioSummaryAttr {
-	return func(m optionalAttr) {
-		m["max_outputs"] = value
-	}
-}
-
-// Outputs a `Summary` protocol buffer with audio.
-//
-// DEPRECATED at GraphDef version 15: Use AudioSummaryV2.
-//
-// The summary has up to `max_outputs` summary values containing audio. The
-// audio is built from `tensor` which must be 3-D with shape `[batch_size,
-// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are
-// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.
-//
-// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
-// build the `tag` of the summary values:
-//
-// *  If `max_outputs` is 1, the summary value tag is '*tag*/audio'.
-// *  If `max_outputs` is greater than 1, the summary value tags are
-//    generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
-//
-// Arguments:
-//	tag: Scalar. Used to build the `tag` attribute of the summary values.
-//	tensor: 2-D of shape `[batch_size, frames]`.
-//	sample_rate: The sample rate of the signal in hertz.
-//
-// Returns Scalar. Serialized `Summary` protocol buffer.
-func AudioSummary(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate float32, optional ...AudioSummaryAttr) (summary tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"sample_rate": sample_rate}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "AudioSummary",
-		Input: []tf.Input{
-			tag, tensor,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr is an optional argument to RetrieveTPUEmbeddingStochasticGradientDescentParameters.
-type RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingStochasticGradientDescentParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func RetrieveTPUEmbeddingStochasticGradientDescentParametersTableId(value int64) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingStochasticGradientDescentParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingStochasticGradientDescentParametersTableName(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Retrieve SGD embedding parameters.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns Parameter parameters updated by the stochastic gradient descent optimization algorithm.
-func RetrieveTPUEmbeddingStochasticGradientDescentParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr) (parameters tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingStochasticGradientDescentParameters",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QuantizedDepthwiseConv2DWithBiasAttr is an optional argument to QuantizedDepthwiseConv2DWithBias.
-type QuantizedDepthwiseConv2DWithBiasAttr func(optionalAttr)
+// QuantizedConv2DPerChannelAttr is an optional argument to QuantizedConv2DPerChannel.
+type QuantizedConv2DPerChannelAttr func(optionalAttr)
 
-// QuantizedDepthwiseConv2DWithBiasOutType sets the optional out_type attribute to value.
+// QuantizedConv2DPerChannelOutType sets the optional out_type attribute to value.
 //
-// value: The type of the output.
+// value: The quantized type of output tensor that needs to be converted.
 // If not specified, defaults to DT_QINT32
-func QuantizedDepthwiseConv2DWithBiasOutType(value tf.DataType) QuantizedDepthwiseConv2DWithBiasAttr {
+func QuantizedConv2DPerChannelOutType(value tf.DataType) QuantizedConv2DPerChannelAttr {
 	return func(m optionalAttr) {
 		m["out_type"] = value
 	}
 }
 
-// QuantizedDepthwiseConv2DWithBiasDilations sets the optional dilations attribute to value.
+// QuantizedConv2DPerChannelDilations sets the optional dilations attribute to value.
 //
-// value: List of dilation values.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 >
-func QuantizedDepthwiseConv2DWithBiasDilations(value []int64) QuantizedDepthwiseConv2DWithBiasAttr {
+// value: list of dilation values.
+// If not specified, defaults to {i:1 i:1 i:1 i:1}
+func QuantizedConv2DPerChannelDilations(value []int64) QuantizedConv2DPerChannelAttr {
 	return func(m optionalAttr) {
 		m["dilations"] = value
 	}
 }
 
-// Computes quantized depthwise Conv2D with Bias.
+// Computes QuantizedConv2D per channel.
 //
 // Arguments:
 //	input: The original input tensor.
 //	filter: The original filter tensor.
-//	bias: The original bias tensor.
-//	min_input: The float value that the minimum quantized input value represents.
-//	max_input: The float value that the maximum quantized input value represents.
-//	min_filter: The float value that the minimum quantized filter value represents.
-//	max_filter: The float value that the maximum quantized filter value represents.
-//	strides: List of stride values.
+//	min_input: The minimum value of the input tensor
+//	max_input: The maximum value of the input tensor.
+//	min_filter: The minimum value of the filter tensor.
+//	max_filter: The maximum value of the filter tensor.
+//	strides: list of stride values.
 //
 //
-// Returns The output tensor.The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents.
-func QuantizedDepthwiseConv2DWithBias(scope *Scope, input tf.Output, filter tf.Output, bias tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedDepthwiseConv2DWithBiasAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) {
+// Returns:
+//	output: The output tensor.
+//	min_output: The minimum value of the final output tensor.
+//	max_output: The maximum value of the final output tensor.
+func QuantizedConv2DPerChannel(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedConv2DPerChannelAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -26472,9 +22524,9 @@ func QuantizedDepthwiseConv2DWithBias(scope *Scope, input tf.Output, filter tf.O
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "QuantizedDepthwiseConv2DWithBias",
+		Type: "QuantizedConv2DPerChannel",
 		Input: []tf.Input{
-			input, filter, bias, min_input, max_input, min_filter, max_filter,
+			input, filter, min_input, max_input, min_filter, max_filter,
 		},
 		Attrs: attrs,
 	}
@@ -26482,142 +22534,135 @@ func QuantizedDepthwiseConv2DWithBias(scope *Scope, input tf.Output, filter tf.O
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Greedily selects a subset of bounding boxes in descending order of score,
-//
-// pruning away boxes that have high overlaps
-// with previously selected boxes.  Bounding boxes with score less than
-// `score_threshold` are removed. N-by-n overlap values are supplied as square matrix,
-// which allows for defining a custom overlap criterium (eg. intersection over union,
-// intersection over area, etc.).
-//
-// The output of this operation is a set of integers indexing into the input
-// collection of bounding boxes representing the selected boxes.  The bounding
-// box coordinates corresponding to the selected indices can then be obtained
-// using the `tf.gather operation`.  For example:
-//
-//   selected_indices = tf.image.non_max_suppression_with_overlaps(
-//       overlaps, scores, max_output_size, overlap_threshold, score_threshold)
-//   selected_boxes = tf.gather(boxes, selected_indices)
+// Concatenates quantized tensors along one dimension.
 //
 // Arguments:
-//	overlaps: A 2-D float tensor of shape `[num_boxes, num_boxes]` representing
-// the n-by-n box overlap values.
-//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
-// score corresponding to each box (each row of boxes).
-//	max_output_size: A scalar integer tensor representing the maximum number of
-// boxes to be selected by non max suppression.
-//	overlap_threshold: A 0-D float tensor representing the threshold for deciding whether
-// boxes overlap too.
-//	score_threshold: A 0-D float tensor representing the threshold for deciding when to remove
-// boxes based on score.
+//	concat_dim: 0-D.  The dimension along which to concatenate.  Must be in the
+// range [0, rank(values)).
+//	values: The `N` Tensors to concatenate. Their ranks and types must match,
+// and their sizes must match in all dimensions except `concat_dim`.
+//	input_mins: The minimum scalar values for each of the input tensors.
+//	input_maxes: The maximum scalar values for each of the input tensors.
 //
-// Returns A 1-D integer tensor of shape `[M]` representing the selected
-// indices from the boxes tensor, where `M <= max_output_size`.
-func NonMaxSuppressionWithOverlaps(scope *Scope, overlaps tf.Output, scores tf.Output, max_output_size tf.Output, overlap_threshold tf.Output, score_threshold tf.Output) (selected_indices tf.Output) {
+// Returns:
+//	output: A `Tensor` with the concatenation of values stacked along the
+// `concat_dim` dimension.  This tensor's shape matches that of `values` except
+// in `concat_dim` where it has the sum of the sizes.
+//	output_min: The float value that the minimum quantized output value represents.
+//	output_max: The float value that the maximum quantized output value represents.
+func QuantizedConcat(scope *Scope, concat_dim tf.Output, values []tf.Output, input_mins []tf.Output, input_maxes []tf.Output) (output tf.Output, output_min tf.Output, output_max tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "NonMaxSuppressionWithOverlaps",
+		Type: "QuantizedConcat",
 		Input: []tf.Input{
-			overlaps, scores, max_output_size, overlap_threshold, score_threshold,
+			concat_dim, tf.OutputList(values), tf.OutputList(input_mins), tf.OutputList(input_maxes),
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Returns the batched diagonal part of a batched tensor.
+//
+// Returns a tensor with the `k[0]`-th to `k[1]`-th diagonals of the batched
+// `input`.
+//
+// Assume `input` has `r` dimensions `[I, J, ..., L, M, N]`.
+// Let `max_diag_len` be the maximum length among all diagonals to be extracted,
+// `max_diag_len = min(M + min(k[1], 0), N + min(-k[0], 0))`
+// Let `num_diags` be the number of diagonals to extract,
+// `num_diags = k[1] - k[0] + 1`.
+//
+// If `num_diags == 1`, the output tensor is of rank `r - 1` with shape
+// `[I, J, ..., L, max_diag_len]` and values:
+//
+// ```
+// diagonal[i, j, ..., l, n]
+//   = input[i, j, ..., l, n+y, n+x] ; if 0 <= n+y < M and 0 <= n+x < N,
+//     padding_value                 ; otherwise.
+// ```
+// where `y = max(-k[1], 0)`, `x = max(k[1], 0)`.
+//
+// Otherwise, the output tensor has rank `r` with dimensions
+// `[I, J, ..., L, num_diags, max_diag_len]` with values:
+//
+// ```
+// diagonal[i, j, ..., l, m, n]
+//   = input[i, j, ..., l, n+y, n+x] ; if 0 <= n+y < M and 0 <= n+x < N,
+//     padding_value                 ; otherwise.
+// ```
+// where `d = k[1] - m`, `y = max(-d, 0)`, and `x = max(d, 0)`.
+//
+// The input must be at least a matrix.
+//
+// For example:
+//
+// ```
+// input = np.array([[[1, 2, 3, 4],  # Input shape: (2, 3, 4)
+//                    [5, 6, 7, 8],
+//                    [9, 8, 7, 6]],
+//                   [[5, 4, 3, 2],
+//                    [1, 2, 3, 4],
+//                    [5, 6, 7, 8]]])
+//
+// # A main diagonal from each batch.
+// tf.matrix_diag_part(input) ==> [[1, 6, 7],  # Output shape: (2, 3)
+//                                 [5, 2, 7]]
+//
+// # A superdiagonal from each batch.
+// tf.matrix_diag_part(input, k = 1)
+//   ==> [[2, 7, 6],  # Output shape: (2, 3)
+//        [4, 3, 8]]
+//
+// # A tridiagonal band from each batch.
+// tf.matrix_diag_part(input, k = (-1, 1))
+//   ==> [[[2, 7, 6],  # Output shape: (2, 3, 3)
+//         [1, 6, 7],
+//         [5, 8, 0]],
+//        [[4, 3, 8],
+//         [5, 2, 7],
+//         [1, 6, 0]]]
+//
+// # Padding value = 9
+// tf.matrix_diag_part(input, k = (1, 3), padding_value = 9)
+//   ==> [[[4, 9, 9],  # Output shape: (2, 3, 3)
+//         [3, 8, 9],
+//         [2, 7, 6]],
+//        [[2, 9, 9],
+//         [3, 4, 9],
+//         [4, 3, 8]]]
+// ```
+//
+// Arguments:
+//	input: Rank `r` tensor where `r >= 2`.
+//	k: Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main
+// diagonal, and negative value means subdiagonals. `k` can be a single integer
+// (for a single diagonal) or a pair of integers specifying the low and high ends
+// of a matrix band. `k[0]` must not be larger than `k[1]`.
+//	padding_value: The value to fill the area outside the specified diagonal band with.
+// Default is 0.
+//
+// Returns The extracted diagonal(s).
+func MatrixDiagPartV2(scope *Scope, input tf.Output, k tf.Output, padding_value tf.Output) (diagonal tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixDiagPartV2",
+		Input: []tf.Input{
+			input, k, padding_value,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Adds sparse `updates` to an existing tensor according to `indices`.
-//
-// This operation creates a new tensor by adding sparse `updates` to the passed
-// in `tensor`.
-// This operation is very similar to `tf.scatter_nd_add`, except that the updates
-// are added onto an existing tensor (as opposed to a variable). If the memory
-// for the existing tensor cannot be re-used, a copy is made and updated.
-//
-// `indices` is an integer tensor containing indices into a new tensor of shape
-// `shape`.  The last dimension of `indices` can be at most the rank of `shape`:
-//
-//     indices.shape[-1] <= shape.rank
-//
-// The last dimension of `indices` corresponds to indices into elements
-// (if `indices.shape[-1] = shape.rank`) or slices
-// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of
-// `shape`.  `updates` is a tensor with shape
-//
-//     indices.shape[:-1] + shape[indices.shape[-1]:]
-//
-// The simplest form of tensor_scatter_add is to add individual elements to a
-// tensor by index. For example, say we want to add 4 elements in a rank-1
-// tensor with 8 elements.
-//
-// In Python, this scatter add operation would look like this:
-//
-// ```python
-//     indices = tf.constant([[4], [3], [1], [7]])
-//     updates = tf.constant([9, 10, 11, 12])
-//     tensor = tf.ones([8], dtype=tf.int32)
-//     updated = tf.tensor_scatter_add(tensor, indices, updates)
-//     with tf.Session() as sess:
-//       print(sess.run(scatter))
-// ```
-//
-// The resulting tensor would look like this:
-//
-//     [1, 12, 1, 11, 10, 1, 1, 13]
-//
-// We can also, insert entire slices of a higher rank tensor all at once. For
-// example, if we wanted to insert two slices in the first dimension of a
-// rank-3 tensor with two matrices of new values.
-//
-// In Python, this scatter add operation would look like this:
-//
-// ```python
-//     indices = tf.constant([[0], [2]])
-//     updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6],
-//                             [7, 7, 7, 7], [8, 8, 8, 8]],
-//                            [[5, 5, 5, 5], [6, 6, 6, 6],
-//                             [7, 7, 7, 7], [8, 8, 8, 8]]])
-//     tensor = tf.ones([4, 4, 4])
-//     updated = tf.tensor_scatter_add(tensor, indices, updates)
-//     with tf.Session() as sess:
-//       print(sess.run(scatter))
-// ```
-//
-// The resulting tensor would look like this:
-//
-//     [[[6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8], [9, 9, 9, 9]],
-//      [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
-//      [[6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8], [9, 9, 9, 9]],
-//      [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]]
-//
-// Note that on CPU, if an out of bound index is found, an error is returned.
-// On GPU, if an out of bound index is found, the index is ignored.
-//
-// Arguments:
-//	tensor: Tensor to copy/update.
-//	indices: Index tensor.
-//	updates: Updates to scatter into output.
-//
-// Returns A new tensor copied from tensor and updates added according to the indices.
-func TensorScatterAdd(scope *Scope, tensor tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorScatterAdd",
-		Input: []tf.Input{
-			tensor, indices, updates,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
+// Conv3DBackpropInputV2Attr is an optional argument to Conv3DBackpropInputV2.
+type Conv3DBackpropInputV2Attr func(optionalAttr)
 
-// AvgPool3DAttr is an optional argument to AvgPool3D.
-type AvgPool3DAttr func(optionalAttr)
-
-// AvgPool3DDataFormat sets the optional data_format attribute to value.
+// Conv3DBackpropInputV2DataFormat sets the optional data_format attribute to value.
 //
 // value: The data format of the input and output data. With the
 // default format "NDHWC", the data is stored in the order of:
@@ -26625,2726 +22670,40 @@ type AvgPool3DAttr func(optionalAttr)
 // Alternatively, the format could be "NCDHW", the data storage order is:
 //     [batch, in_channels, in_depth, in_height, in_width].
 // If not specified, defaults to "NDHWC"
-func AvgPool3DDataFormat(value string) AvgPool3DAttr {
+func Conv3DBackpropInputV2DataFormat(value string) Conv3DBackpropInputV2Attr {
 	return func(m optionalAttr) {
 		m["data_format"] = value
 	}
 }
 
-// Performs 3D average pooling on the input.
+// Conv3DBackpropInputV2Dilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 5.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each
+// filter element on that dimension. The dimension order is determined by the
+// value of `data_format`, see above for details. Dilations in the batch and
+// depth dimensions must be 1.
+// If not specified, defaults to {i:1 i:1 i:1 i:1 i:1}
+func Conv3DBackpropInputV2Dilations(value []int64) Conv3DBackpropInputV2Attr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of 3-D convolution with respect to the input.
 //
 // Arguments:
-//	input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over.
-//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
-// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+//	input_sizes: An integer vector representing the tensor shape of `input`,
+// where `input` is a 5-D
+// `[batch, depth, rows, cols, in_channels]` tensor.
+//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
+// `in_channels` must match between `input` and `filter`.
+//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
+// out_channels]`.
 //	strides: 1-D tensor of length 5. The stride of the sliding window for each
 // dimension of `input`. Must have `strides[0] = strides[4] = 1`.
 //	padding: The type of padding algorithm to use.
-//
-// Returns The average pooled output tensor.
-func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "AvgPool3D",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Gets the next output from the given iterator as an Optional variant.
-func IteratorGetNextAsOptional(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (optional tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "IteratorGetNextAsOptional",
-		Input: []tf.Input{
-			iterator,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Subtracts a value from the current value of a variable.
-//
-// Any ReadVariableOp with a control dependency on this op is guaranteed to
-// see the decremented value or a subsequent newer one.
-//
-// Arguments:
-//	resource: handle to the resource in which to store the variable.
-//	value: the value by which the variable will be incremented.
-//
-// Returns the created operation.
-func AssignSubVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "AssignSubVariableOp",
-		Input: []tf.Input{
-			resource, value,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// ComplexAbsAttr is an optional argument to ComplexAbs.
-type ComplexAbsAttr func(optionalAttr)
-
-// ComplexAbsTout sets the optional Tout attribute to value.
-// If not specified, defaults to DT_FLOAT
-func ComplexAbsTout(value tf.DataType) ComplexAbsAttr {
-	return func(m optionalAttr) {
-		m["Tout"] = value
-	}
-}
-
-// Computes the complex absolute value of a tensor.
-//
-// Given a tensor `x` of complex numbers, this operation returns a tensor of type
-// `float` or `double` that is the absolute value of each element in `x`. All
-// elements in `x` must be complex numbers of the form \\(a + bj\\). The absolute
-// value is computed as \\( \sqrt{a^2 + b^2}\\).
-func ComplexAbs(scope *Scope, x tf.Output, optional ...ComplexAbsAttr) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ComplexAbs",
-		Input: []tf.Input{
-			x,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// FakeQuantWithMinMaxArgsAttr is an optional argument to FakeQuantWithMinMaxArgs.
-type FakeQuantWithMinMaxArgsAttr func(optionalAttr)
-
-// FakeQuantWithMinMaxArgsMin sets the optional min attribute to value.
-// If not specified, defaults to -6
-func FakeQuantWithMinMaxArgsMin(value float32) FakeQuantWithMinMaxArgsAttr {
-	return func(m optionalAttr) {
-		m["min"] = value
-	}
-}
-
-// FakeQuantWithMinMaxArgsMax sets the optional max attribute to value.
-// If not specified, defaults to 6
-func FakeQuantWithMinMaxArgsMax(value float32) FakeQuantWithMinMaxArgsAttr {
-	return func(m optionalAttr) {
-		m["max"] = value
-	}
-}
-
-// FakeQuantWithMinMaxArgsNumBits sets the optional num_bits attribute to value.
-// If not specified, defaults to 8
-func FakeQuantWithMinMaxArgsNumBits(value int64) FakeQuantWithMinMaxArgsAttr {
-	return func(m optionalAttr) {
-		m["num_bits"] = value
-	}
-}
-
-// FakeQuantWithMinMaxArgsNarrowRange sets the optional narrow_range attribute to value.
-// If not specified, defaults to false
-func FakeQuantWithMinMaxArgsNarrowRange(value bool) FakeQuantWithMinMaxArgsAttr {
-	return func(m optionalAttr) {
-		m["narrow_range"] = value
-	}
-}
-
-// Fake-quantize the 'inputs' tensor, type float to 'outputs' tensor of same type.
-//
-// Attributes `[min; max]` define the clamping range for the `inputs` data.
-// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
-// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
-// then de-quantized and output as floats in `[min; max]` interval.
-// `num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive.
-//
-// Before quantization, `min` and `max` values are adjusted with the following
-// logic.
-// It is suggested to have `min <= 0 <= max`. If `0` is not in the range of values,
-// the behavior can be unexpected:
-// If `0 < min < max`: `min_adj = 0` and `max_adj = max - min`.
-// If `min < max < 0`: `min_adj = min - max` and `max_adj = 0`.
-// If `min <= 0 <= max`: `scale = (max - min) / (2^num_bits - 1) `,
-// `min_adj = scale * round(min / scale)` and `max_adj = max + min_adj - min`.
-//
-// Quantization is called fake since the output is still in floating point.
-func FakeQuantWithMinMaxArgs(scope *Scope, inputs tf.Output, optional ...FakeQuantWithMinMaxArgsAttr) (outputs tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FakeQuantWithMinMaxArgs",
-		Input: []tf.Input{
-			inputs,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// 3D real-valued fast Fourier transform.
-//
-// Computes the 3-dimensional discrete Fourier transform of a real-valued signal
-// over the inner-most 3 dimensions of `input`.
-//
-// Since the DFT of a real signal is Hermitian-symmetric, `RFFT3D` only returns the
-// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension
-// of `output`: the zero-frequency term, followed by the `fft_length / 2`
-// positive-frequency terms.
-//
-// Along each axis `RFFT3D` is computed on, if `fft_length` is smaller than the
-// corresponding dimension of `input`, the dimension is cropped. If it is larger,
-// the dimension is padded with zeros.
-//
-// Arguments:
-//	input: A float32 tensor.
-//	fft_length: An int32 tensor of shape [3]. The FFT length for each dimension.
-//
-// Returns A complex64 tensor of the same rank as `input`. The inner-most 3
-//   dimensions of `input` are replaced with the their 3D Fourier transform. The
-//   inner-most dimension contains `fft_length / 2 + 1` unique frequency
-//   components.
-//
-// @compatibility(numpy)
-// Equivalent to np.fft.rfftn with 3 dimensions.
-// @end_compatibility
-func RFFT3D(scope *Scope, input tf.Output, fft_length tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "RFFT3D",
-		Input: []tf.Input{
-			input, fft_length,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// UniformCandidateSamplerAttr is an optional argument to UniformCandidateSampler.
-type UniformCandidateSamplerAttr func(optionalAttr)
-
-// UniformCandidateSamplerSeed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func UniformCandidateSamplerSeed(value int64) UniformCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// UniformCandidateSamplerSeed2 sets the optional seed2 attribute to value.
-//
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func UniformCandidateSamplerSeed2(value int64) UniformCandidateSamplerAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Generates labels for candidate sampling with a uniform distribution.
-//
-// See explanations of candidate sampling and the data formats at
-// go/candidate-sampling.
-//
-// For each batch, this op picks a single set of sampled candidate labels.
-//
-// The advantages of sampling candidates per-batch are simplicity and the
-// possibility of efficient dense matrix multiplication. The disadvantage is that
-// the sampled candidates must be chosen independently of the context and of the
-// true labels.
-//
-// Arguments:
-//	true_classes: A batch_size * num_true matrix, in which each row contains the
-// IDs of the num_true target_classes in the corresponding original label.
-//	num_true: Number of true labels per context.
-//	num_sampled: Number of candidates to randomly sample.
-//	unique: If unique is true, we sample with rejection, so that all sampled
-// candidates in a batch are unique. This requires some approximation to
-// estimate the post-rejection sampling probabilities.
-//	range_max: The sampler will sample integers from the interval [0, range_max).
-//
-// Returns A vector of length num_sampled, in which each element is
-// the ID of a sampled candidate.A batch_size * num_true matrix, representing
-// the number of times each candidate is expected to occur in a batch
-// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled
-// candidate representing the number of times the candidate is expected
-// to occur in a batch of sampled candidates.  If unique=true, then this is a
-// probability.
-func UniformCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...UniformCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "UniformCandidateSampler",
-		Input: []tf.Input{
-			true_classes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// CropAndResizeGradBoxesAttr is an optional argument to CropAndResizeGradBoxes.
-type CropAndResizeGradBoxesAttr func(optionalAttr)
-
-// CropAndResizeGradBoxesMethod sets the optional method attribute to value.
-//
-// value: A string specifying the interpolation method. Only 'bilinear' is
-// supported for now.
-// If not specified, defaults to "bilinear"
-func CropAndResizeGradBoxesMethod(value string) CropAndResizeGradBoxesAttr {
-	return func(m optionalAttr) {
-		m["method"] = value
-	}
-}
-
-// Computes the gradient of the crop_and_resize op wrt the input boxes tensor.
-//
-// Arguments:
-//	grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
-//	image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
-// Both `image_height` and `image_width` need to be positive.
-//	boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
-// specifies the coordinates of a box in the `box_ind[i]` image and is specified
-// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
-// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
-// `[0, 1]` interval of normalized image height is mapped to
-// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in
-// which case the sampled crop is an up-down flipped version of the original
-// image. The width dimension is treated similarly. Normalized coordinates
-// outside the `[0, 1]` range are allowed, in which case we use
-// `extrapolation_value` to extrapolate the input image values.
-//	box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
-// The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
-//
-// Returns A 2-D tensor of shape `[num_boxes, 4]`.
-func CropAndResizeGradBoxes(scope *Scope, grads tf.Output, image tf.Output, boxes tf.Output, box_ind tf.Output, optional ...CropAndResizeGradBoxesAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CropAndResizeGradBoxes",
-		Input: []tf.Input{
-			grads, image, boxes, box_ind,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QuantizedRelu6Attr is an optional argument to QuantizedRelu6.
-type QuantizedRelu6Attr func(optionalAttr)
-
-// QuantizedRelu6OutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_QUINT8
-func QuantizedRelu6OutType(value tf.DataType) QuantizedRelu6Attr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)`
-//
-// Arguments:
-//
-//	min_features: The float value that the lowest quantized value represents.
-//	max_features: The float value that the highest quantized value represents.
-//
-// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents.
-func QuantizedRelu6(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedRelu6Attr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedRelu6",
-		Input: []tf.Input{
-			features, min_features, max_features,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Strip leading and trailing whitespaces from the Tensor.
-//
-// Arguments:
-//	input: A string `Tensor` of any shape.
-//
-// Returns A string `Tensor` of the same shape as the input.
-func StringStrip(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "StringStrip",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RetrieveTPUEmbeddingADAMParametersAttr is an optional argument to RetrieveTPUEmbeddingADAMParameters.
-type RetrieveTPUEmbeddingADAMParametersAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingADAMParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func RetrieveTPUEmbeddingADAMParametersTableId(value int64) RetrieveTPUEmbeddingADAMParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingADAMParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingADAMParametersTableName(value string) RetrieveTPUEmbeddingADAMParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Retrieve ADAM embedding parameters.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns Parameter parameters updated by the ADAM optimization algorithm.Parameter momenta updated by the ADAM optimization algorithm.Parameter velocities updated by the ADAM optimization algorithm.
-func RetrieveTPUEmbeddingADAMParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingADAMParametersAttr) (parameters tf.Output, momenta tf.Output, velocities tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingADAMParameters",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Computes the gradient of morphological 2-D dilation with respect to the input.
-//
-// Arguments:
-//	input: 4-D with shape `[batch, in_height, in_width, depth]`.
-//	filter: 3-D with shape `[filter_height, filter_width, depth]`.
-//	out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`.
-//	strides: 1-D of length 4. The stride of the sliding window for each dimension of
-// the input tensor. Must be: `[1, stride_height, stride_width, 1]`.
-//	rates: 1-D of length 4. The input stride for atrous morphological dilation.
-// Must be: `[1, rate_height, rate_width, 1]`.
-//	padding: The type of padding algorithm to use.
-//
-// Returns 4-D with shape `[batch, in_height, in_width, depth]`.
-func Dilation2DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (in_backprop tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding}
-	opspec := tf.OpSpec{
-		Type: "Dilation2DBackpropInput",
-		Input: []tf.Input{
-			input, filter, out_backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns which elements of x are finite.
-//
-// @compatibility(numpy)
-// Equivalent to np.isfinite
-// @end_compatibility
-func IsFinite(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "IsFinite",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingADAMParametersGradAccumDebug.
-type RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Retrieve ADAM embedding parameters with debug support.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns Parameter parameters updated by the ADAM optimization algorithm.Parameter momenta updated by the ADAM optimization algorithm.Parameter velocities updated by the ADAM optimization algorithm.Parameter gradient_accumulators updated by the ADAM optimization algorithm.
-func RetrieveTPUEmbeddingADAMParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr) (parameters tf.Output, momenta tf.Output, velocities tf.Output, gradient_accumulators tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingADAMParametersGradAccumDebug",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
-}
-
-// RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug.
-type RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Retrieve RMSProp embedding parameters with debug support.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns Parameter parameters updated by the RMSProp optimization algorithm.Parameter ms updated by the RMSProp optimization algorithm.Parameter mom updated by the RMSProp optimization algorithm.Parameter gradient_accumulators updated by the RMSProp optimization algorithm.
-func RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr) (parameters tf.Output, ms tf.Output, mom tf.Output, gradient_accumulators tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
-}
-
-// UnicodeEncodeAttr is an optional argument to UnicodeEncode.
-type UnicodeEncodeAttr func(optionalAttr)
-
-// UnicodeEncodeErrors sets the optional errors attribute to value.
-//
-// value: Error handling policy when there is invalid formatting found in the input.
-// The value of 'strict' will cause the operation to produce a InvalidArgument
-// error on any invalid input formatting. A value of 'replace' (the default) will
-// cause the operation to replace any invalid formatting in the input with the
-// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
-// skip any invalid formatting in the input and produce no corresponding output
-// character.
-// If not specified, defaults to "replace"
-func UnicodeEncodeErrors(value string) UnicodeEncodeAttr {
-	return func(m optionalAttr) {
-		m["errors"] = value
-	}
-}
-
-// UnicodeEncodeReplacementChar sets the optional replacement_char attribute to value.
-//
-// value: The replacement character codepoint to be used in place of any invalid
-// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
-// be used. The default value is the default unicode replacement character is
-// 0xFFFD (U+65533).
-// If not specified, defaults to 65533
-func UnicodeEncodeReplacementChar(value int64) UnicodeEncodeAttr {
-	return func(m optionalAttr) {
-		m["replacement_char"] = value
-	}
-}
-
-// Encode a tensor of ints into unicode strings.
-//
-// Returns a vector of strings, where `output[i]` is constructed by encoding the
-// Unicode codepoints in `input_values[input_splits[i]:input_splits[i+1]]`
-// using `output_encoding`.
-//
-// ---
-//
-// Example:
-//
-// ```
-// input_values = [72, 101, 108, 108, 111, 87, 111, 114, 108, 100]
-// input_splits = [0, 5, 10]
-// output_encoding = 'UTF-8'
-//
-// output = ['Hello', 'World']
-// ```
-//
-// Arguments:
-//	input_values: A 1D tensor containing the unicode codepoints that should be encoded.
-//	input_splits: A 1D tensor specifying how the unicode codepoints should be split into strings.
-// In particular, `output[i]` is constructed by encoding the codepoints in the
-// slice `input_values[input_splits[i]:input_splits[i+1]]`.
-//	output_encoding: Unicode encoding of the output strings. Valid encodings are: `"UTF-8",
-// "UTF-16-BE", and "UTF-32-BE"`.
-//
-// Returns The 1-D Tensor of strings encoded from the provided unicode codepoints.
-func UnicodeEncode(scope *Scope, input_values tf.Output, input_splits tf.Output, output_encoding string, optional ...UnicodeEncodeAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_encoding": output_encoding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "UnicodeEncode",
-		Input: []tf.Input{
-			input_values, input_splits,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Produces the average pool of the input tensor for quantized types.
-//
-// Arguments:
-//	input: 4-D with shape `[batch, height, width, channels]`.
-//	min_input: The float value that the lowest quantized input value represents.
-//	max_input: The float value that the highest quantized input value represents.
-//	ksize: The size of the window for each dimension of the input tensor.
-// The length must be 4 to match the number of dimensions of the input.
-//	strides: The stride of the sliding window for each dimension of the input
-// tensor.  The length must be 4 to match the number of dimensions of the input.
-//	padding: The type of padding algorithm to use.
-//
-// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
-func QuantizedAvgPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	opspec := tf.OpSpec{
-		Type: "QuantizedAvgPool",
-		Input: []tf.Input{
-			input, min_input, max_input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Updates the table to associates keys with values.
-//
-// The tensor `keys` must be of the same type as the keys of the table.
-// The tensor `values` must be of the type of the table values.
-//
-// Arguments:
-//	table_handle: Handle to the table.
-//	keys: Any shape.  Keys to look up.
-//	values: Values to associate with keys.
-//
-// Returns the created operation.
-func LookupTableInsertV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "LookupTableInsertV2",
-		Input: []tf.Input{
-			table_handle, keys, values,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// RetrieveTPUEmbeddingAdadeltaParametersAttr is an optional argument to RetrieveTPUEmbeddingAdadeltaParameters.
-type RetrieveTPUEmbeddingAdadeltaParametersAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingAdadeltaParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func RetrieveTPUEmbeddingAdadeltaParametersTableId(value int64) RetrieveTPUEmbeddingAdadeltaParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingAdadeltaParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingAdadeltaParametersTableName(value string) RetrieveTPUEmbeddingAdadeltaParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Retrieve Adadelta embedding parameters.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns Parameter parameters updated by the Adadelta optimization algorithm.Parameter accumulators updated by the Adadelta optimization algorithm.Parameter updates updated by the Adadelta optimization algorithm.
-func RetrieveTPUEmbeddingAdadeltaParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdadeltaParametersAttr) (parameters tf.Output, accumulators tf.Output, updates tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingAdadeltaParameters",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// CTCLossAttr is an optional argument to CTCLoss.
-type CTCLossAttr func(optionalAttr)
-
-// CTCLossPreprocessCollapseRepeated sets the optional preprocess_collapse_repeated attribute to value.
-//
-// value: Scalar, if true then repeated labels are
-// collapsed prior to the CTC calculation.
-// If not specified, defaults to false
-func CTCLossPreprocessCollapseRepeated(value bool) CTCLossAttr {
-	return func(m optionalAttr) {
-		m["preprocess_collapse_repeated"] = value
-	}
-}
-
-// CTCLossCtcMergeRepeated sets the optional ctc_merge_repeated attribute to value.
-//
-// value: Scalar.  If set to false, *during* CTC calculation
-// repeated non-blank labels will not be merged and are interpreted as
-// individual labels.  This is a simplified version of CTC.
-// If not specified, defaults to true
-func CTCLossCtcMergeRepeated(value bool) CTCLossAttr {
-	return func(m optionalAttr) {
-		m["ctc_merge_repeated"] = value
-	}
-}
-
-// CTCLossIgnoreLongerOutputsThanInputs sets the optional ignore_longer_outputs_than_inputs attribute to value.
-//
-// value: Scalar. If set to true, during CTC
-// calculation, items that have longer output sequences than input sequences
-// are skipped: they don't contribute to the loss term and have zero-gradient.
-// If not specified, defaults to false
-func CTCLossIgnoreLongerOutputsThanInputs(value bool) CTCLossAttr {
-	return func(m optionalAttr) {
-		m["ignore_longer_outputs_than_inputs"] = value
-	}
-}
-
-// Calculates the CTC Loss (log probability) for each batch entry.  Also calculates
-//
-// the gradient.  This class performs the softmax operation for you, so inputs
-// should be e.g. linear projections of outputs by an LSTM.
-//
-// Arguments:
-//	inputs: 3-D, shape: `(max_time x batch_size x num_classes)`, the logits.
-//	labels_indices: The indices of a `SparseTensor<int32, 2>`.
-// `labels_indices(i, :) == [b, t]` means `labels_values(i)` stores the id for
-// `(batch b, time t)`.
-//	labels_values: The values (labels) associated with the given batch and time.
-//	sequence_length: A vector containing sequence lengths (batch).
-//
-// Returns A vector (batch) containing log-probabilities.The gradient of `loss`.  3-D, shape:
-// `(max_time x batch_size x num_classes)`.
-func CTCLoss(scope *Scope, inputs tf.Output, labels_indices tf.Output, labels_values tf.Output, sequence_length tf.Output, optional ...CTCLossAttr) (loss tf.Output, gradient tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CTCLoss",
-		Input: []tf.Input{
-			inputs, labels_indices, labels_values, sequence_length,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// RandomUniformAttr is an optional argument to RandomUniform.
-type RandomUniformAttr func(optionalAttr)
-
-// RandomUniformSeed sets the optional seed attribute to value.
-//
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func RandomUniformSeed(value int64) RandomUniformAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// RandomUniformSeed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomUniformSeed2(value int64) RandomUniformAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Outputs random values from a uniform distribution.
-//
-// The generated values follow a uniform distribution in the range `[0, 1)`. The
-// lower bound 0 is included in the range, while the upper bound 1 is excluded.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	dtype: The type of the output.
-//
-// Returns A tensor of the specified shape filled with uniform random values.
-func RandomUniform(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomUniformAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RandomUniform",
-		Input: []tf.Input{
-			shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Bucketize each feature based on bucket boundaries.
-//
-// An op that returns a list of float tensors, where each tensor represents the
-// bucketized values for a single feature.
-//
-// Arguments:
-//	float_values: float; List of Rank 1 Tensor each containing float values for a single feature.
-//	bucket_boundaries: float; List of Rank 1 Tensors each containing the bucket boundaries for a single
-// feature.
-//
-// Returns int; List of Rank 1 Tensors each containing the bucketized values for a single feature.
-func BoostedTreesBucketize(scope *Scope, float_values []tf.Output, bucket_boundaries []tf.Output) (buckets []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesBucketize",
-		Input: []tf.Input{
-			tf.OutputList(float_values), tf.OutputList(bucket_boundaries),
-		},
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if buckets, idx, err = makeOutputList(op, idx, "buckets"); err != nil {
-		scope.UpdateErr("BoostedTreesBucketize", err)
-		return
-	}
-	return buckets
-}
-
-// FusedResizeAndPadConv2DAttr is an optional argument to FusedResizeAndPadConv2D.
-type FusedResizeAndPadConv2DAttr func(optionalAttr)
-
-// FusedResizeAndPadConv2DResizeAlignCorners sets the optional resize_align_corners attribute to value.
-//
-// value: If true, the centers of the 4 corner pixels of the input and output tensors are
-// aligned, preserving the values at the corner pixels. Defaults to false.
-// If not specified, defaults to false
-func FusedResizeAndPadConv2DResizeAlignCorners(value bool) FusedResizeAndPadConv2DAttr {
-	return func(m optionalAttr) {
-		m["resize_align_corners"] = value
-	}
-}
-
-// Performs a resize and padding as a preprocess during a convolution.
-//
-// It's often possible to do spatial transformations more efficiently as part of
-// the packing stage of a convolution, so this op allows for an optimized
-// implementation where these stages are fused together. This prevents the need to
-// write out the intermediate results as whole tensors, reducing memory pressure,
-// and we can get some latency gains by merging the transformation calculations.
-// The data_format attribute for Conv2D isn't supported by this op, and defaults to
-// 'NHWC' order.
-// Internally this op uses a single per-graph scratch buffer, which means that it
-// will block if multiple versions are being run in parallel. This is because this
-// operator is primarily an optimization to minimize memory usage.
-//
-// Arguments:
-//	input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
-//	size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-// new size for the images.
-//	paddings: A two-column matrix specifying the padding sizes. The number of
-// rows must be the same as the rank of `input`.
-//	filter: 4-D with shape
-// `[filter_height, filter_width, in_channels, out_channels]`.
-//
-//	strides: 1-D of length 4.  The stride of the sliding window for each dimension
-// of `input`. Must be in the same order as the dimension specified with format.
-//	padding: The type of padding algorithm to use.
-func FusedResizeAndPadConv2D(scope *Scope, input tf.Output, size tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string, optional ...FusedResizeAndPadConv2DAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FusedResizeAndPadConv2D",
-		Input: []tf.Input{
-			input, size, paddings, filter,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Generates sparse cross from a list of sparse and dense tensors.
-//
-// The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each
-// representing features of one feature column. It outputs a 2D `SparseTensor` with
-// the batchwise crosses of these features.
-//
-// For example, if the inputs are
-//
-//     inputs[0]: SparseTensor with shape = [2, 2]
-//     [0, 0]: "a"
-//     [1, 0]: "b"
-//     [1, 1]: "c"
-//
-//     inputs[1]: SparseTensor with shape = [2, 1]
-//     [0, 0]: "d"
-//     [1, 0]: "e"
-//
-//     inputs[2]: Tensor [["f"], ["g"]]
-//
-// then the output will be
-//
-//     shape = [2, 2]
-//     [0, 0]: "a_X_d_X_f"
-//     [1, 0]: "b_X_e_X_g"
-//     [1, 1]: "c_X_e_X_g"
-//
-// if hashed_output=true then the output will be
-//
-//     shape = [2, 2]
-//     [0, 0]: FingerprintCat64(
-//                 Fingerprint64("f"), FingerprintCat64(
-//                     Fingerprint64("d"), Fingerprint64("a")))
-//     [1, 0]: FingerprintCat64(
-//                 Fingerprint64("g"), FingerprintCat64(
-//                     Fingerprint64("e"), Fingerprint64("b")))
-//     [1, 1]: FingerprintCat64(
-//                 Fingerprint64("g"), FingerprintCat64(
-//                     Fingerprint64("e"), Fingerprint64("c")))
-//
-// Arguments:
-//	indices: 2-D.  Indices of each input `SparseTensor`.
-//	values: 1-D.   values of each `SparseTensor`.
-//	shapes: 1-D.   Shapes of each `SparseTensor`.
-//	dense_inputs: 2-D.    Columns represented by dense `Tensor`.
-//	hashed_output: If true, returns the hash of the cross instead of the string.
-// This will allow us avoiding string manipulations.
-//	num_buckets: It is used if hashed_output is true.
-// output = hashed_value%num_buckets if num_buckets > 0 else hashed_value.
-//	hash_key: Specify the hash_key that will be used by the `FingerprintCat64`
-// function to combine the crosses fingerprints.
-//
-//
-//
-// Returns 2-D.  Indices of the concatenated `SparseTensor`.1-D.  Non-empty values of the concatenated or hashed
-// `SparseTensor`.1-D.  Shape of the concatenated `SparseTensor`.
-func SparseCross(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, dense_inputs []tf.Output, hashed_output bool, num_buckets int64, hash_key int64, out_type tf.DataType, internal_type tf.DataType) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"hashed_output": hashed_output, "num_buckets": num_buckets, "hash_key": hash_key, "out_type": out_type, "internal_type": internal_type}
-	opspec := tf.OpSpec{
-		Type: "SparseCross",
-		Input: []tf.Input{
-			tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), tf.OutputList(dense_inputs),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// ResourceApplyCenteredRMSPropAttr is an optional argument to ResourceApplyCenteredRMSProp.
-type ResourceApplyCenteredRMSPropAttr func(optionalAttr)
-
-// ResourceApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var, mg, ms, and mom tensors is
-// protected by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyCenteredRMSPropUseLocking(value bool) ResourceApplyCenteredRMSPropAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the centered RMSProp algorithm.
-//
-// The centered RMSProp algorithm uses an estimate of the centered second moment
-// (i.e., the variance) for normalization, as opposed to regular RMSProp, which
-// uses the (uncentered) second moment. This often helps with training, but is
-// slightly more expensive in terms of computation and memory.
-//
-// Note that in dense implementation of this algorithm, mg, ms, and mom will
-// update even if the grad is zero, but in this sparse implementation, mg, ms,
-// and mom will not update in iterations during which the grad is zero.
-//
-// mean_square = decay * mean_square + (1-decay) * gradient ** 2
-// mean_grad = decay * mean_grad + (1-decay) * gradient
-//
-// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
-//
-// mg <- rho * mg_{t-1} + (1-rho) * grad
-// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
-// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)
-// var <- var - mom
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	mg: Should be from a Variable().
-//	ms: Should be from a Variable().
-//	mom: Should be from a Variable().
-//	lr: Scaling factor. Must be a scalar.
-//	rho: Decay rate. Must be a scalar.
-//
-//	epsilon: Ridge term. Must be a scalar.
-//	grad: The gradient.
-//
-// Returns the created operation.
-func ResourceApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyCenteredRMSPropAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyCenteredRMSProp",
-		Input: []tf.Input{
-			var_, mg, ms, mom, lr, rho, momentum, epsilon, grad,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Returns x - y element-wise.
-//
-// *NOTE*: `Subtract` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Sub",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// An op enabling differentiation of TPU Embeddings.
-//
-// This op simply returns its first input, which is assumed to have been sliced
-// from the Tensors returned by TPUEmbeddingDequeueActivations. The presence of
-// this op, and its first argument being a trainable Variable, enables automatic
-// differentiation of graphs containing embeddings via the TPU Embedding Python
-// libraries.
-//
-// Arguments:
-//	embedding_variable: A trainable variable, enabling optimizers to find this op.
-//	sliced_activations: The embedding activations Tensor to return.
-//	table_id: The id of the table in the embedding layer configuration from which
-// these activations were computed.
-//	lookup_id: Identifier of the set of embedding indices which produced these
-// activations.
-func TPUEmbeddingActivations(scope *Scope, embedding_variable tf.Output, sliced_activations tf.Output, table_id int64, lookup_id int64) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"table_id": table_id, "lookup_id": lookup_id}
-	opspec := tf.OpSpec{
-		Type: "TPUEmbeddingActivations",
-		Input: []tf.Input{
-			embedding_variable, sliced_activations,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Extract `patches` from `input` and put them in the "depth" output dimension. 3D extension of `extract_image_patches`.
-//
-// Arguments:
-//	input: 5-D Tensor with shape `[batch, in_planes, in_rows, in_cols, depth]`.
-//	ksizes: The size of the sliding window for each dimension of `input`.
-//	strides: 1-D of length 5. How far the centers of two consecutive patches are in
-// `input`. Must be: `[1, stride_planes, stride_rows, stride_cols, 1]`.
-//	padding: The type of padding algorithm to use.
-//
-// We specify the size-related attributes as:
-//
-// ```python
-//       ksizes = [1, ksize_planes, ksize_rows, ksize_cols, 1]
-//       strides = [1, stride_planes, strides_rows, strides_cols, 1]
-// ```
-//
-// Returns 5-D Tensor with shape `[batch, out_planes, out_rows, out_cols,
-// ksize_planes * ksize_rows * ksize_cols * depth]` containing patches
-// with size `ksize_planes x ksize_rows x ksize_cols x depth` vectorized
-// in the "depth" dimension. Note `out_planes`, `out_rows` and `out_cols`
-// are the dimensions of the output patches.
-func ExtractVolumePatches(scope *Scope, input tf.Output, ksizes []int64, strides []int64, padding string) (patches tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksizes": ksizes, "strides": strides, "padding": padding}
-	opspec := tf.OpSpec{
-		Type: "ExtractVolumePatches",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Generates values in an interval.
-//
-// A sequence of `num` evenly-spaced values are generated beginning at `start`.
-// If `num > 1`, the values in the sequence increase by `stop - start / num - 1`,
-// so that the last one is exactly `stop`.
-//
-// For example:
-//
-// ```
-// tf.linspace(10.0, 12.0, 3, name="linspace") => [ 10.0  11.0  12.0]
-// ```
-//
-// Arguments:
-//	start: 0-D tensor. First entry in the range.
-//	stop: 0-D tensor. Last entry in the range.
-//	num: 0-D tensor. Number of values to generate.
-//
-// Returns 1-D. The generated values.
-func LinSpace(scope *Scope, start tf.Output, stop tf.Output, num tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "LinSpace",
-		Input: []tf.Input{
-			start, stop, num,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Performs gradient updates of embedding tables.
-//
-// Arguments:
-//	inputs: A TensorList of gradients with which to update embedding tables.
-// This argument has the same length and shapes as the return value of
-// RecvTPUEmbeddingActivations, but contains gradients of the model's loss
-// with respect to the embedding activations. The embedding tables are updated
-// from these gradients via the optimizer specified in the TPU embedding
-// configuration given to tpu.initialize_system.
-//	learning_rates: A TensorList of float32 scalars, one for each dynamic learning
-// rate tag: see the comments in
-// //third_party/tensorflow/core/protobuf/tpu/optimization_parameters.proto.
-// Multiple tables can share the same dynamic learning rate tag as specified
-// in the configuration. If the learning rates for all tables are constant,
-// this list should be empty.
-//	config: Serialized TPUEmbeddingConfiguration proto.
-//
-// Returns the created operation.
-func SendTPUEmbeddingGradients(scope *Scope, inputs []tf.Output, learning_rates []tf.Output, config string) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"config": config}
-	opspec := tf.OpSpec{
-		Type: "SendTPUEmbeddingGradients",
-		Input: []tf.Input{
-			tf.OutputList(inputs), tf.OutputList(learning_rates),
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// 2D fast Fourier transform.
-//
-// Computes the 2-dimensional discrete Fourier transform over the inner-most
-// 2 dimensions of `input`.
-//
-// Arguments:
-//	input: A complex tensor.
-//
-// Returns A complex tensor of the same shape as `input`. The inner-most 2
-//   dimensions of `input` are replaced with their 2D Fourier transform.
-//
-// @compatibility(numpy)
-// Equivalent to np.fft.fft2
-// @end_compatibility
-func FFT2D(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "FFT2D",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceScatterNdAddAttr is an optional argument to ResourceScatterNdAdd.
-type ResourceScatterNdAddAttr func(optionalAttr)
-
-// ResourceScatterNdAddUseLocking sets the optional use_locking attribute to value.
-//
-// value: An optional bool. Defaults to True. If True, the assignment will
-// be protected by a lock; otherwise the behavior is undefined,
-// but may exhibit less contention.
-// If not specified, defaults to true
-func ResourceScatterNdAddUseLocking(value bool) ResourceScatterNdAddAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Applies sparse addition to individual values or slices in a Variable.
-//
-// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
-//
-// `indices` must be integer tensor, containing indices into `ref`.
-// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
-//
-// The innermost dimension of `indices` (with length `K`) corresponds to
-// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
-// dimension of `ref`.
-//
-// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
-//
-// ```
-// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]
-// ```
-//
-// For example, say we want to add 4 scattered elements to a rank-1 tensor to
-// 8 elements. In Python, that addition would look like this:
-//
-// ```python
-// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8], use_resource=True)
-// indices = tf.constant([[4], [3], [1], [7]])
-// updates = tf.constant([9, 10, 11, 12])
-// add = tf.scatter_nd_add(ref, indices, updates)
-// with tf.Session() as sess:
-//   print sess.run(add)
-// ```
-//
-// The resulting update to ref would look like this:
-//
-//     [1, 13, 3, 14, 14, 6, 7, 20]
-//
-// See `tf.scatter_nd` for more details about how to make updates to
-// slices.
-//
-// Arguments:
-//	ref: A resource handle. Must be from a VarHandleOp.
-//	indices: A Tensor. Must be one of the following types: int32, int64.
-// A tensor of indices into ref.
-//	updates: A Tensor. Must have the same type as ref. A tensor of
-// values to add to ref.
-//
-// Returns the created operation.
-func ResourceScatterNdAdd(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdAddAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceScatterNdAdd",
-		Input: []tf.Input{
-			ref, indices, updates,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// ResourceApplyProximalGradientDescentAttr is an optional argument to ResourceApplyProximalGradientDescent.
-type ResourceApplyProximalGradientDescentAttr func(optionalAttr)
-
-// ResourceApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value.
-//
-// value: If True, the subtraction will be protected by a lock;
-// otherwise the behavior is undefined, but may exhibit less contention.
-// If not specified, defaults to false
-func ResourceApplyProximalGradientDescentUseLocking(value bool) ResourceApplyProximalGradientDescentAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' as FOBOS algorithm with fixed learning rate.
-//
-// prox_v = var - alpha * delta
-// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	alpha: Scaling factor. Must be a scalar.
-//	l1: L1 regularization. Must be a scalar.
-//	l2: L2 regularization. Must be a scalar.
-//	delta: The change.
-//
-// Returns the created operation.
-func ResourceApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, delta tf.Output, optional ...ResourceApplyProximalGradientDescentAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyProximalGradientDescent",
-		Input: []tf.Input{
-			var_, alpha, l1, l2, delta,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// SerializeManySparseAttr is an optional argument to SerializeManySparse.
-type SerializeManySparseAttr func(optionalAttr)
-
-// SerializeManySparseOutType sets the optional out_type attribute to value.
-//
-// value: The `dtype` to use for serialization; the supported types are `string`
-// (default) and `variant`.
-// If not specified, defaults to DT_STRING
-func SerializeManySparseOutType(value tf.DataType) SerializeManySparseAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object.
-//
-// The `SparseTensor` must have rank `R` greater than 1, and the first dimension
-// is treated as the minibatch dimension.  Elements of the `SparseTensor`
-// must be sorted in increasing order of this first dimension.  The serialized
-// `SparseTensor` objects going into each row of `serialized_sparse` will have
-// rank `R-1`.
-//
-// The minibatch size `N` is extracted from `sparse_shape[0]`.
-//
-// Arguments:
-//	sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
-//	sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
-//	sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
-func SerializeManySparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeManySparseAttr) (serialized_sparse tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SerializeManySparse",
-		Input: []tf.Input{
-			sparse_indices, sparse_values, sparse_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Sets the index-th position of the list to contain the given tensor.
-//
-// input_handle: the list
-// index: the position in the list to which the tensor will be assigned
-// item: the element to be assigned to that position
-// output_handle: the new list, with the element in the proper position
-//
-func TensorListSetItem(scope *Scope, input_handle tf.Output, index tf.Output, item tf.Output) (output_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorListSetItem",
-		Input: []tf.Input{
-			input_handle, index, item,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// An Op to exchange data across TPU replicas.
-//
-// On each replica, the input is split into `split_count` blocks along
-// `split_dimension` and send to the other replicas given group_assignment. After
-// receiving `split_count` - 1 blocks from other replicas, we concatenate the
-// blocks along `concat_dimension` as the output.
-//
-// For example, suppose there are 2 TPU replicas:
-// replica 0 receives input: `[[A, B]]`
-// replica 1 receives input: `[[C, D]]`
-//
-// group_assignment=`[[0, 1]]`
-// concat_dimension=0
-// split_dimension=1
-// split_count=2
-//
-// replica 0's output: `[[A], [C]]`
-// replica 1's output: `[[B], [D]]`
-//
-// Arguments:
-//	input: The local input to the sum.
-//	group_assignment: An int32 tensor with shape
-// [num_groups, num_replicas_per_group]. `group_assignment[i]` represents the
-// replica ids in the ith subgroup.
-//	concat_dimension: The dimension number to concatenate.
-//	split_dimension: The dimension number to split.
-//	split_count: The number of splits, this number must equal to the sub-group
-// size(group_assignment.get_shape()[1])
-//
-// Returns The exchanged result.
-func AllToAll(scope *Scope, input tf.Output, group_assignment tf.Output, concat_dimension int64, split_dimension int64, split_count int64) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"concat_dimension": concat_dimension, "split_dimension": split_dimension, "split_count": split_count}
-	opspec := tf.OpSpec{
-		Type: "AllToAll",
-		Input: []tf.Input{
-			input, group_assignment,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// An Op to permute tensors across replicated TPU instances.
-//
-// Each instance supplies its own input.
-//
-// For example, suppose there are 4 TPU instances: `[A, B, C, D]`. Passing
-// source_target_pairs=`[[0,1],[1,2],[2,3],[3,0]]` gets the outputs:
-// `[D, A, B, C]`.
-//
-// Arguments:
-//	input: The local input to be permuted. Currently only supports float and
-// bfloat16.
-//	source_target_pairs: A tensor with shape [num_pairs, 2].
-//
-// Returns The permuted input.
-func CollectivePermute(scope *Scope, input tf.Output, source_target_pairs tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "CollectivePermute",
-		Input: []tf.Input{
-			input, source_target_pairs,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// StringFormatAttr is an optional argument to StringFormat.
-type StringFormatAttr func(optionalAttr)
-
-// StringFormatTemplate sets the optional template attribute to value.
-//
-// value: A string, the template to format tensor summaries into.
-// If not specified, defaults to "%s"
-func StringFormatTemplate(value string) StringFormatAttr {
-	return func(m optionalAttr) {
-		m["template"] = value
-	}
-}
-
-// StringFormatPlaceholder sets the optional placeholder attribute to value.
-//
-// value: A string, at each placeholder in the template a subsequent tensor summary will be inserted.
-// If not specified, defaults to "%s"
-func StringFormatPlaceholder(value string) StringFormatAttr {
-	return func(m optionalAttr) {
-		m["placeholder"] = value
-	}
-}
-
-// StringFormatSummarize sets the optional summarize attribute to value.
-//
-// value: When formatting the tensor summaries print the first and last summarize entries of each tensor dimension.
-// If not specified, defaults to 3
-func StringFormatSummarize(value int64) StringFormatAttr {
-	return func(m optionalAttr) {
-		m["summarize"] = value
-	}
-}
-
-// Formats a string template using a list of tensors.
-//
-// Formats a string template using a list of tensors, pretty-printing tensor summaries.
-//
-// Arguments:
-//	inputs: The list of tensors to format into the placeholder string.
-//
-// Returns = The resulting string scalar.
-func StringFormat(scope *Scope, inputs []tf.Output, optional ...StringFormatAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StringFormat",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Compute the polygamma function \\(\psi^{(n)}(x)\\).
-//
-// The polygamma function is defined as:
-//
-//
-// \\(\psi^{(a)}(x) = \frac{d^a}{dx^a} \psi(x)\\)
-//
-// where \\(\psi(x)\\) is the digamma function.
-// The polygamma function is defined only for non-negative integer orders \\a\\.
-func Polygamma(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Polygamma",
-		Input: []tf.Input{
-			a, x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax.
-type MaxPoolWithArgmaxAttr func(optionalAttr)
-
-// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value.
-// If not specified, defaults to DT_INT64
-func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr {
-	return func(m optionalAttr) {
-		m["Targmax"] = value
-	}
-}
-
-// MaxPoolWithArgmaxIncludeBatchInIndex sets the optional include_batch_in_index attribute to value.
-//
-// value: Whether to include batch dimension in flattened index of `argmax`.
-// If not specified, defaults to false
-func MaxPoolWithArgmaxIncludeBatchInIndex(value bool) MaxPoolWithArgmaxAttr {
-	return func(m optionalAttr) {
-		m["include_batch_in_index"] = value
-	}
-}
-
-// Performs max pooling on the input and outputs both max values and indices.
-//
-// The indices in `argmax` are flattened, so that a maximum value at position
-// `[b, y, x, c]` becomes flattened index:
-// `(y * width + x) * channels + c` if `include_batch_in_index` is False;
-// `((b * height + y) * width + x) * channels + c` if `include_batch_in_index` is True.
-//
-// The indices returned are always in `[0, height) x [0, width)` before flattening,
-// even if padding is involved and the mathematically correct answer is outside
-// (either negative or too large).  This is a bug, but fixing it is difficult to do
-// in a safe backwards compatible way, especially due to flattening.
-//
-// Arguments:
-//	input: 4-D with shape `[batch, height, width, channels]`.  Input to pool over.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
-//
-// Returns The max pooled output tensor.4-D.  The flattened indices of the max values chosen for each output.
-func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MaxPoolWithArgmax",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Elementwise computes the bitwise right-shift of `x` and `y`.
-//
-// Performs a logical shift for unsigned integer types, and an arithmetic shift
-// for signed integer types.
-//
-// If `y` is negative, or greater than or equal to than the width of `x` in bits
-// the result is implementation defined.
-//
-// Example:
-//
-// ```python
-// import tensorflow as tf
-// from tensorflow.python.ops import bitwise_ops
-// import numpy as np
-// dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64]
-//
-// for dtype in dtype_list:
-//   lhs = tf.constant([-1, -5, -3, -14], dtype=dtype)
-//   rhs = tf.constant([5, 0, 7, 11], dtype=dtype)
-//
-//   right_shift_result = bitwise_ops.right_shift(lhs, rhs)
-//
-//   print(right_shift_result)
-//
-// # This will print:
-// # tf.Tensor([-1 -5 -1 -1], shape=(4,), dtype=int8)
-// # tf.Tensor([-1 -5 -1 -1], shape=(4,), dtype=int16)
-// # tf.Tensor([-1 -5 -1 -1], shape=(4,), dtype=int32)
-// # tf.Tensor([-1 -5 -1 -1], shape=(4,), dtype=int64)
-//
-// lhs = np.array([-2, 64, 101, 32], dtype=np.int8)
-// rhs = np.array([-1, -5, -3, -14], dtype=np.int8)
-// bitwise_ops.right_shift(lhs, rhs)
-// # <tf.Tensor: id=151, shape=(4,), dtype=int8, numpy=array([ -2,  64, 101,  32], dtype=int8)>
-// ```
-//
-func RightShift(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "RightShift",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MultinomialAttr is an optional argument to Multinomial.
-type MultinomialAttr func(optionalAttr)
-
-// MultinomialSeed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 is set to be non-zero, the internal random number
-// generator is seeded by the given seed.  Otherwise, a random seed is used.
-// If not specified, defaults to 0
-func MultinomialSeed(value int64) MultinomialAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// MultinomialSeed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func MultinomialSeed2(value int64) MultinomialAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// MultinomialOutputDtype sets the optional output_dtype attribute to value.
-// If not specified, defaults to DT_INT64
-func MultinomialOutputDtype(value tf.DataType) MultinomialAttr {
-	return func(m optionalAttr) {
-		m["output_dtype"] = value
-	}
-}
-
-// Draws samples from a multinomial distribution.
-//
-// Arguments:
-//	logits: 2-D Tensor with shape `[batch_size, num_classes]`.  Each slice `[i, :]`
-// represents the unnormalized log probabilities for all classes.
-//	num_samples: 0-D.  Number of independent samples to draw for each row slice.
-//
-// Returns 2-D Tensor with shape `[batch_size, num_samples]`.  Each slice `[i, :]`
-// contains the drawn class labels with range `[0, num_classes)`.
-func Multinomial(scope *Scope, logits tf.Output, num_samples tf.Output, optional ...MultinomialAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Multinomial",
-		Input: []tf.Input{
-			logits, num_samples,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SdcaOptimizerV2Attr is an optional argument to SdcaOptimizerV2.
-type SdcaOptimizerV2Attr func(optionalAttr)
-
-// SdcaOptimizerV2Adaptive sets the optional adaptive attribute to value.
-//
-// value: Whether to use Adaptive SDCA for the inner loop.
-// If not specified, defaults to true
-func SdcaOptimizerV2Adaptive(value bool) SdcaOptimizerV2Attr {
-	return func(m optionalAttr) {
-		m["adaptive"] = value
-	}
-}
-
-// Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for
-//
-// linear models with L1 + L2 regularization. As global optimization objective is
-// strongly-convex, the optimizer optimizes the dual objective at each step. The
-// optimizer applies each update one example at a time. Examples are sampled
-// uniformly, and the optimizer is learning rate free and enjoys linear convergence
-// rate.
-//
-// [Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).<br>
-// Shai Shalev-Shwartz, Tong Zhang. 2012
-//
-// $$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$
-//
-// [Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).<br>
-// Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan,
-// Peter Richtarik, Martin Takac. 2015
-//
-// [Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).<br>
-// Dominik Csiba, Zheng Qu, Peter Richtarik. 2015
-//
-// Arguments:
-//	sparse_example_indices: a list of vectors which contain example indices.
-//	sparse_feature_indices: a list of vectors which contain feature indices.
-//	sparse_feature_values: a list of vectors which contains feature value
-// associated with each feature group.
-//	dense_features: a list of matrices which contains the dense feature values.
-//	example_weights: a vector which contains the weight associated with each
-// example.
-//	example_labels: a vector which contains the label/target associated with each
-// example.
-//	sparse_indices: a list of vectors where each value is the indices which has
-// corresponding weights in sparse_weights. This field maybe omitted for the
-// dense approach.
-//	sparse_weights: a list of vectors where each value is the weight associated with
-// a sparse feature group.
-//	dense_weights: a list of vectors where the values are the weights associated
-// with a dense feature group.
-//	example_state_data: a list of vectors containing the example state data.
-//	loss_type: Type of the primal loss. Currently SdcaSolver supports logistic,
-// squared and hinge losses.
-//	l1: Symmetric l1 regularization strength.
-//	l2: Symmetric l2 regularization strength.
-//	num_loss_partitions: Number of partitions of the global loss function.
-//	num_inner_iterations: Number of iterations per mini-batch.
-//
-// Returns a list of vectors containing the updated example state
-// data.a list of vectors where each value is the delta
-// weights associated with a sparse feature group.a list of vectors where the values are the delta
-// weights associated with a dense feature group.
-func SdcaOptimizerV2(scope *Scope, sparse_example_indices []tf.Output, sparse_feature_indices []tf.Output, sparse_feature_values []tf.Output, dense_features []tf.Output, example_weights tf.Output, example_labels tf.Output, sparse_indices []tf.Output, sparse_weights []tf.Output, dense_weights []tf.Output, example_state_data tf.Output, loss_type string, l1 float32, l2 float32, num_loss_partitions int64, num_inner_iterations int64, optional ...SdcaOptimizerV2Attr) (out_example_state_data tf.Output, out_delta_sparse_weights []tf.Output, out_delta_dense_weights []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"loss_type": loss_type, "l1": l1, "l2": l2, "num_loss_partitions": num_loss_partitions, "num_inner_iterations": num_inner_iterations}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SdcaOptimizerV2",
-		Input: []tf.Input{
-			tf.OutputList(sparse_example_indices), tf.OutputList(sparse_feature_indices), tf.OutputList(sparse_feature_values), tf.OutputList(dense_features), example_weights, example_labels, tf.OutputList(sparse_indices), tf.OutputList(sparse_weights), tf.OutputList(dense_weights), example_state_data,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	out_example_state_data = op.Output(idx)
-	if out_delta_sparse_weights, idx, err = makeOutputList(op, idx, "out_delta_sparse_weights"); err != nil {
-		scope.UpdateErr("SdcaOptimizerV2", err)
-		return
-	}
-	if out_delta_dense_weights, idx, err = makeOutputList(op, idx, "out_delta_dense_weights"); err != nil {
-		scope.UpdateErr("SdcaOptimizerV2", err)
-		return
-	}
-	return out_example_state_data, out_delta_sparse_weights, out_delta_dense_weights
-}
-
-// Connects N inputs to an N-way replicated TPU computation.
-func TPUReplicatedInput(scope *Scope, inputs []tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TPUReplicatedInput",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns a list list which has the passed-in `Tensor` as last element and the other elements of the given list in `input_handle`.
-//
-// tensor: The tensor to put on the list.
-// input_handle: The old list.
-// output_handle: A list with the elements of the old list followed by tensor.
-// element_dtype: the type of elements in the list.
-// element_shape: a shape compatible with that of elements in the list.
-func TensorListPushBack(scope *Scope, input_handle tf.Output, tensor tf.Output) (output_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorListPushBack",
-		Input: []tf.Input{
-			input_handle, tensor,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// EncodeProtoAttr is an optional argument to EncodeProto.
-type EncodeProtoAttr func(optionalAttr)
-
-// EncodeProtoDescriptorSource sets the optional descriptor_source attribute to value.
-// If not specified, defaults to "local://"
-func EncodeProtoDescriptorSource(value string) EncodeProtoAttr {
-	return func(m optionalAttr) {
-		m["descriptor_source"] = value
-	}
-}
-
-// The op serializes protobuf messages provided in the input tensors.
-//
-// The types of the tensors in `values` must match the schema for the fields
-// specified in `field_names`. All the tensors in `values` must have a common
-// shape prefix, *batch_shape*.
-//
-// The `sizes` tensor specifies repeat counts for each field.  The repeat count
-// (last dimension) of a each tensor in `values` must be greater than or equal
-// to corresponding repeat count in `sizes`.
-//
-// A `message_type` name must be provided to give context for the field names.
-// The actual message descriptor can be looked up either in the linked-in
-// descriptor pool or a filename provided by the caller using the
-// `descriptor_source` attribute.
-//
-// For the most part, the mapping between Proto field types and TensorFlow dtypes
-// is straightforward. However, there are a few special cases:
-//
-// - A proto field that contains a submessage or group can only be converted
-// to `DT_STRING` (the serialized submessage). This is to reduce the complexity
-// of the API. The resulting string can be used as input to another instance of
-// the decode_proto op.
-//
-// - TensorFlow lacks support for unsigned integers. The ops represent uint64
-// types as a `DT_INT64` with the same twos-complement bit pattern (the obvious
-// way). Unsigned int32 values can be represented exactly by specifying type
-// `DT_INT64`, or using twos-complement if the caller specifies `DT_INT32` in
-// the `output_types` attribute.
-//
-// The `descriptor_source` attribute selects the source of protocol
-// descriptors to consult when looking up `message_type`. This may be:
-//
-// - An empty string  or "local://", in which case protocol descriptors are
-// created for C++ (not Python) proto definitions linked to the binary.
-//
-// - A file, in which case protocol descriptors are created from the file,
-// which is expected to contain a `FileDescriptorSet` serialized as a string.
-// NOTE: You can build a `descriptor_source` file using the `--descriptor_set_out`
-// and `--include_imports` options to the protocol compiler `protoc`.
-//
-// - A "bytes://<bytes>", in which protocol descriptors are created from `<bytes>`,
-// which is expected to be a `FileDescriptorSet` serialized as a string.
-//
-// Arguments:
-//	sizes: Tensor of int32 with shape `[batch_shape, len(field_names)]`.
-//	values: List of tensors containing values for the corresponding field.
-//	field_names: List of strings containing proto field names.
-//	message_type: Name of the proto message type to decode.
-//
-// Returns Tensor of serialized protos with shape `batch_shape`.
-func EncodeProto(scope *Scope, sizes tf.Output, values []tf.Output, field_names []string, message_type string, optional ...EncodeProtoAttr) (bytes tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"field_names": field_names, "message_type": message_type}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "EncodeProto",
-		Input: []tf.Input{
-			sizes, tf.OutputList(values),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes softsign: `features / (abs(features) + 1)`.
-func Softsign(scope *Scope, features tf.Output) (activations tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Softsign",
-		Input: []tf.Input{
-			features,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Locks a mutex resource.  The output is the lock.  So long as the lock tensor
-//
-// is alive, any other request to use `MutexLock` with this mutex will wait.
-//
-// This is particularly useful for creating a critical section when used in
-// conjunction with `MutexLockIdentity`:
-//
-// ```python
-//
-// mutex = mutex_v2(
-//   shared_name=handle_name, container=container, name=name)
-//
-// def execute_in_critical_section(fn, *args, **kwargs):
-//   lock = gen_resource_variable_ops.mutex_lock(mutex)
-//
-//   with ops.control_dependencies([lock]):
-//     r = fn(*args, **kwargs)
-//
-//   with ops.control_dependencies(nest.flatten(r)):
-//     with ops.colocate_with(mutex):
-//       ensure_lock_exists = mutex_lock_identity(lock)
-//
-//     # Make sure that if any element of r is accessed, all of
-//     # them are executed together.
-//     r = nest.map_structure(tf.identity, r)
-//
-//   with ops.control_dependencies([ensure_lock_exists]):
-//     return nest.map_structure(tf.identity, r)
-// ```
-//
-// While `fn` is running in the critical section, no other functions which wish to
-// use this critical section may run.
-//
-// Often the use case is that two executions of the same graph, in parallel,
-// wish to run `fn`; and we wish to ensure that only one of them executes
-// at a time.  This is especially important if `fn` modifies one or more
-// variables at a time.
-//
-// It is also useful if two separate functions must share a resource, but we
-// wish to ensure the usage is exclusive.
-//
-// Arguments:
-//	mutex: The mutex resource to lock.
-//
-// Returns A tensor that keeps a shared pointer to a lock on the mutex;
-// when the Tensor is destroyed, the use count on the shared pointer is decreased
-// by 1.  When it reaches 0, the lock is released.
-func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MutexLock",
-		Input: []tf.Input{
-			mutex,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// FusedBatchNormAttr is an optional argument to FusedBatchNorm.
-type FusedBatchNormAttr func(optionalAttr)
-
-// FusedBatchNormEpsilon sets the optional epsilon attribute to value.
-//
-// value: A small float number added to the variance of x.
-// If not specified, defaults to 0.0001
-func FusedBatchNormEpsilon(value float32) FusedBatchNormAttr {
-	return func(m optionalAttr) {
-		m["epsilon"] = value
-	}
-}
-
-// FusedBatchNormDataFormat sets the optional data_format attribute to value.
-//
-// value: The data format for x and y. Either "NHWC" (default) or "NCHW".
-// If not specified, defaults to "NHWC"
-func FusedBatchNormDataFormat(value string) FusedBatchNormAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// FusedBatchNormIsTraining sets the optional is_training attribute to value.
-//
-// value: A bool value to indicate the operation is for training (default)
-// or inference.
-// If not specified, defaults to true
-func FusedBatchNormIsTraining(value bool) FusedBatchNormAttr {
-	return func(m optionalAttr) {
-		m["is_training"] = value
-	}
-}
-
-// Batch normalization.
-//
-// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
-// The size of 1D Tensors matches the dimension C of the 4D Tensors.
-//
-// Arguments:
-//	x: A 4D Tensor for input data.
-//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
-//	offset: A 1D Tensor for offset, to shift to the normalized x.
-//	mean: A 1D Tensor for population mean. Used for inference only;
-// must be empty for training.
-//	variance: A 1D Tensor for population variance. Used for inference only;
-// must be empty for training.
-//
-// Returns A 4D Tensor for output data.A 1D Tensor for the computed batch mean, to be used by TensorFlow
-// to compute the running mean.A 1D Tensor for the computed batch variance, to be used by
-// TensorFlow to compute the running variance.A 1D Tensor for the computed batch mean, to be reused
-// in the gradient computation.A 1D Tensor for the computed batch variance (inverted variance
-// in the cuDNN case), to be reused in the gradient computation.
-func FusedBatchNorm(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormAttr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FusedBatchNorm",
-		Input: []tf.Input{
-			x, scale, offset, mean, variance,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
-}
-
-// QrAttr is an optional argument to Qr.
-type QrAttr func(optionalAttr)
-
-// QrFullMatrices sets the optional full_matrices attribute to value.
-//
-// value: If true, compute full-sized `q` and `r`. If false
-// (the default), compute only the leading `P` columns of `q`.
-// If not specified, defaults to false
-func QrFullMatrices(value bool) QrAttr {
-	return func(m optionalAttr) {
-		m["full_matrices"] = value
-	}
-}
-
-// Computes the QR decompositions of one or more matrices.
-//
-// Computes the QR decomposition of each inner matrix in `tensor` such that
-// `tensor[..., :, :] = q[..., :, :] * r[..., :,:])`
-//
-// ```python
-// # a is a tensor.
-// # q is a tensor of orthonormal matrices.
-// # r is a tensor of upper triangular matrices.
-// q, r = qr(a)
-// q_full, r_full = qr(a, full_matrices=True)
-// ```
-//
-// Arguments:
-//	input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions
-// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`.
-//
-// Returns Orthonormal basis for range of `a`. If `full_matrices` is `False` then
-// shape is `[..., M, P]`; if `full_matrices` is `True` then shape is
-// `[..., M, M]`.Triangular factor. If `full_matrices` is `False` then shape is
-// `[..., P, N]`. If `full_matrices` is `True` then shape is `[..., M, N]`.
-func Qr(scope *Scope, input tf.Output, optional ...QrAttr) (q tf.Output, r tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Qr",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Converts a `RaggedTensor` into a `SparseTensor` with the same values.
-//
-// input=ragged.from_nested_row_splits(rt_dense_values, rt_nested_splits)
-// output=SparseTensor(indices=sparse_indices, values=sparse_values,
-//                     dense_shape=sparse_dense_shape)
-//
-// Arguments:
-//	rt_nested_splits: The `row_splits` for the `RaggedTensor`.
-//	rt_dense_values: The `flat_values` for the `RaggedTensor`.
-//
-// Returns The indices for the `SparseTensor`.The values of the `SparseTensor`.`sparse_dense_shape` is a tight bounding box of the input `RaggedTensor`.
-func RaggedTensorToSparse(scope *Scope, rt_nested_splits []tf.Output, rt_dense_values tf.Output) (sparse_indices tf.Output, sparse_values tf.Output, sparse_dense_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "RaggedTensorToSparse",
-		Input: []tf.Input{
-			tf.OutputList(rt_nested_splits), rt_dense_values,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Delete the stack from its resource container.
-//
-// Arguments:
-//	handle: The handle to a stack.
-//
-// Returns the created operation.
-func StackCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "StackCloseV2",
-		Input: []tf.Input{
-			handle,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Reduces sparse updates into the variable referenced by `resource` using the `min` operation.
-//
-// This operation computes
-//
-//     # Scalar indices
-//     ref[indices, ...] = min(ref[indices, ...], updates[...])
-//
-//     # Vector indices (for each i)
-//     ref[indices[i], ...] = min(ref[indices[i], ...], updates[i, ...])
-//
-//     # High rank indices (for each i, ..., j)
-//     ref[indices[i, ..., j], ...] = min(ref[indices[i, ..., j], ...], updates[i, ..., j, ...])
-//
-// Duplicate entries are handled correctly: if multiple `indices` reference
-// the same location, their contributions are combined.
-//
-// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
-// </div>
-//
-// Arguments:
-//	resource: Should be from a `Variable` node.
-//	indices: A tensor of indices into the first dimension of `ref`.
-//	updates: A tensor of updated values to add to `ref`.
-//
-// Returns the created operation.
-func ResourceScatterMin(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceScatterMin",
-		Input: []tf.Input{
-			resource, indices, updates,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// StringJoinAttr is an optional argument to StringJoin.
-type StringJoinAttr func(optionalAttr)
-
-// StringJoinSeparator sets the optional separator attribute to value.
-//
-// value: string, an optional join separator.
-// If not specified, defaults to ""
-func StringJoinSeparator(value string) StringJoinAttr {
-	return func(m optionalAttr) {
-		m["separator"] = value
-	}
-}
-
-// Joins the strings in the given list of string tensors into one tensor;
-//
-// with the given separator (default is an empty separator).
-//
-// Arguments:
-//	inputs: A list of string tensors.  The tensors must all have the same shape,
-// or be scalars.  Scalars may be mixed in; these will be broadcast to the shape
-// of non-scalar inputs.
-func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StringJoin",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// StringSplitAttr is an optional argument to StringSplit.
-type StringSplitAttr func(optionalAttr)
-
-// StringSplitSkipEmpty sets the optional skip_empty attribute to value.
-//
-// value: A `bool`. If `True`, skip the empty strings from the result.
-// If not specified, defaults to true
-func StringSplitSkipEmpty(value bool) StringSplitAttr {
-	return func(m optionalAttr) {
-		m["skip_empty"] = value
-	}
-}
-
-// Split elements of `input` based on `delimiter` into a `SparseTensor`.
-//
-// Let N be the size of source (typically N will be the batch size). Split each
-// element of `input` based on `delimiter` and return a `SparseTensor`
-// containing the splitted tokens. Empty tokens are ignored.
-//
-// `delimiter` can be empty, or a string of split characters. If `delimiter` is an
-//  empty string, each element of `input` is split into individual single-byte
-//  character strings, including splitting of UTF-8 multibyte sequences. Otherwise
-//  every character of `delimiter` is a potential split point.
-//
-// For example:
-//   N = 2, input[0] is 'hello world' and input[1] is 'a b c', then the output
-//   will be
-//
-//   indices = [0, 0;
-//              0, 1;
-//              1, 0;
-//              1, 1;
-//              1, 2]
-//   shape = [2, 3]
-//   values = ['hello', 'world', 'a', 'b', 'c']
-//
-// Arguments:
-//	input: 1-D. Strings to split.
-//	delimiter: 0-D. Delimiter characters (bytes), or empty string.
-//
-// Returns A dense matrix of int64 representing the indices of the sparse tensor.A vector of strings corresponding to the splited values.a length-2 vector of int64 representing the shape of the sparse
-// tensor, where the first value is N and the second value is the maximum number
-// of tokens in a single input entry.
-func StringSplit(scope *Scope, input tf.Output, delimiter tf.Output, optional ...StringSplitAttr) (indices tf.Output, values tf.Output, shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StringSplit",
-		Input: []tf.Input{
-			input, delimiter,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// QueueEnqueueV2Attr is an optional argument to QueueEnqueueV2.
-type QueueEnqueueV2Attr func(optionalAttr)
-
-// QueueEnqueueV2TimeoutMs sets the optional timeout_ms attribute to value.
-//
-// value: If the queue is full, this operation will block for up to
-// timeout_ms milliseconds.
-// Note: This option is not supported yet.
-// If not specified, defaults to -1
-func QueueEnqueueV2TimeoutMs(value int64) QueueEnqueueV2Attr {
-	return func(m optionalAttr) {
-		m["timeout_ms"] = value
-	}
-}
-
-// Enqueues a tuple of one or more tensors in the given queue.
-//
-// The components input has k elements, which correspond to the components of
-// tuples stored in the given queue.
-//
-// N.B. If the queue is full, this operation will block until the given
-// element has been enqueued (or 'timeout_ms' elapses, if specified).
-//
-// Arguments:
-//	handle: The handle to a queue.
-//	components: One or more tensors from which the enqueued tensors should be taken.
-//
-// Returns the created operation.
-func QueueEnqueueV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueV2Attr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QueueEnqueueV2",
-		Input: []tf.Input{
-			handle, tf.OutputList(components),
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// MaxPoolAttr is an optional argument to MaxPool.
-type MaxPoolAttr func(optionalAttr)
-
-// MaxPoolDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func MaxPoolDataFormat(value string) MaxPoolAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Performs max pooling on the input.
-//
-// Arguments:
-//	input: 4-D input to pool over.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
-//
-// Returns The max pooled output tensor.
-func MaxPool(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MaxPool",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// DataFormatDimMapAttr is an optional argument to DataFormatDimMap.
-type DataFormatDimMapAttr func(optionalAttr)
-
-// DataFormatDimMapSrcFormat sets the optional src_format attribute to value.
-//
-// value: source data format.
-// If not specified, defaults to "NHWC"
-func DataFormatDimMapSrcFormat(value string) DataFormatDimMapAttr {
-	return func(m optionalAttr) {
-		m["src_format"] = value
-	}
-}
-
-// DataFormatDimMapDstFormat sets the optional dst_format attribute to value.
-//
-// value: destination data format.
-// If not specified, defaults to "NCHW"
-func DataFormatDimMapDstFormat(value string) DataFormatDimMapAttr {
-	return func(m optionalAttr) {
-		m["dst_format"] = value
-	}
-}
-
-// Returns the dimension index in the destination data format given the one in
-//
-// the source data format.
-//
-// Arguments:
-//	x: A Tensor with each element as a dimension index in source data format.
-// Must be in the range [-4, 4).
-//
-// Returns A Tensor with each element as a dimension index in destination data format.
-func DataFormatDimMap(scope *Scope, x tf.Output, optional ...DataFormatDimMapAttr) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DataFormatDimMap",
-		Input: []tf.Input{
-			x,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// DecodeCSVAttr is an optional argument to DecodeCSV.
-type DecodeCSVAttr func(optionalAttr)
-
-// DecodeCSVFieldDelim sets the optional field_delim attribute to value.
-//
-// value: char delimiter to separate fields in a record.
-// If not specified, defaults to ","
-func DecodeCSVFieldDelim(value string) DecodeCSVAttr {
-	return func(m optionalAttr) {
-		m["field_delim"] = value
-	}
-}
-
-// DecodeCSVUseQuoteDelim sets the optional use_quote_delim attribute to value.
-//
-// value: If false, treats double quotation marks as regular
-// characters inside of the string fields (ignoring RFC 4180, Section 2,
-// Bullet 5).
-// If not specified, defaults to true
-func DecodeCSVUseQuoteDelim(value bool) DecodeCSVAttr {
-	return func(m optionalAttr) {
-		m["use_quote_delim"] = value
-	}
-}
-
-// DecodeCSVNaValue sets the optional na_value attribute to value.
-//
-// value: Additional string to recognize as NA/NaN.
-// If not specified, defaults to ""
-func DecodeCSVNaValue(value string) DecodeCSVAttr {
-	return func(m optionalAttr) {
-		m["na_value"] = value
-	}
-}
-
-// DecodeCSVSelectCols sets the optional select_cols attribute to value.
-// If not specified, defaults to <>
-func DecodeCSVSelectCols(value []int64) DecodeCSVAttr {
-	return func(m optionalAttr) {
-		m["select_cols"] = value
-	}
-}
-
-// Convert CSV records to tensors. Each column maps to one tensor.
-//
-// RFC 4180 format is expected for the CSV records.
-// (https://tools.ietf.org/html/rfc4180)
-// Note that we allow leading and trailing spaces with int or float field.
-//
-// Arguments:
-//	records: Each string is a record/row in the csv and all records should have
-// the same format.
-//	record_defaults: One tensor per column of the input record, with either a
-// scalar default value for that column or an empty vector if the column is
-// required.
-//
-// Returns Each tensor will have the same shape as records.
-func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DecodeCSV",
-		Input: []tf.Input{
-			records, tf.OutputList(record_defaults),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
-		scope.UpdateErr("DecodeCSV", err)
-		return
-	}
-	return output
-}
-
-// Conv2DBackpropFilterAttr is an optional argument to Conv2DBackpropFilter.
-type Conv2DBackpropFilterAttr func(optionalAttr)
-
-// Conv2DBackpropFilterUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value.
-// If not specified, defaults to true
-func Conv2DBackpropFilterUseCudnnOnGpu(value bool) Conv2DBackpropFilterAttr {
-	return func(m optionalAttr) {
-		m["use_cudnn_on_gpu"] = value
-	}
-}
-
-// Conv2DBackpropFilterExplicitPaddings sets the optional explicit_paddings attribute to value.
-//
-// value: If `padding` is `"EXPLICIT"`, the list of explicit padding amounts. For the ith
-// dimension, the amount of padding inserted before and after the dimension is
-// `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If
-// `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty.
-// If not specified, defaults to <>
-func Conv2DBackpropFilterExplicitPaddings(value []int64) Conv2DBackpropFilterAttr {
-	return func(m optionalAttr) {
-		m["explicit_paddings"] = value
-	}
-}
-
-// Conv2DBackpropFilterDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, in_channels, in_height, in_width].
-// If not specified, defaults to "NHWC"
-func Conv2DBackpropFilterDataFormat(value string) Conv2DBackpropFilterAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Conv2DBackpropFilterDilations sets the optional dilations attribute to value.
-//
-// value: 1-D tensor of length 4.  The dilation factor for each dimension of
-// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
-// element on that dimension. The dimension order is determined by the value of
-// `data_format`, see above for details. Dilations in the batch and depth
-// dimensions must be 1.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 >
-func Conv2DBackpropFilterDilations(value []int64) Conv2DBackpropFilterAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes the gradients of convolution with respect to the filter.
-//
-// Arguments:
-//	input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
-//	filter_sizes: An integer vector representing the tensor shape of `filter`,
-// where `filter` is a 4-D
-// `[filter_height, filter_width, in_channels, out_channels]` tensor.
-//	out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`.
-// Gradients w.r.t. the output of the convolution.
-//	strides: The stride of the sliding window for each dimension of the input
-// of the convolution. Must be in the same order as the dimension specified with
-// format.
-//	padding: The type of padding algorithm to use.
-//
-// Returns 4-D with shape
-// `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
-// the `filter` input of the convolution.
-func Conv2DBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropFilterAttr) (output tf.Output) {
+func Conv3DBackpropInputV2(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputV2Attr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -29353,9 +22712,9 @@ func Conv2DBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output,
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Conv2DBackpropFilter",
+		Type: "Conv3DBackpropInputV2",
 		Input: []tf.Input{
-			input, filter_sizes, out_backprop,
+			input_sizes, filter, out_backprop,
 		},
 		Attrs: attrs,
 	}
@@ -29363,2855 +22722,6 @@ func Conv2DBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output,
 	return op.Output(0)
 }
 
-// Converts each string in the input Tensor to its hash mod by a number of buckets.
-//
-// The hash function is deterministic on the content of the string within the
-// process and will never change. However, it is not suitable for cryptography.
-// This function may be used when CPU time is scarce and inputs are trusted or
-// unimportant. There is a risk of adversaries constructing inputs that all hash
-// to the same bucket. To prevent this problem, use a strong hash function with
-// `tf.string_to_hash_bucket_strong`.
-//
-// Arguments:
-//	input: The strings to assign a hash bucket.
-//	num_buckets: The number of buckets.
-//
-// Returns A Tensor of the same shape as the input `string_tensor`.
-func StringToHashBucketFast(scope *Scope, input tf.Output, num_buckets int64) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_buckets": num_buckets}
-	opspec := tf.OpSpec{
-		Type: "StringToHashBucketFast",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Scatters tensor at indices in an input list.
-//
-// Each member of the TensorList corresponds to one row of the input tensor,
-// specified by the given index (see `tf.gather`).
-//
-// input_handle: The list to scatter into.
-// tensor: The input tensor.
-// indices: The indices used to index into the list.
-// output_handle: The TensorList.
-func TensorListScatterIntoExistingList(scope *Scope, input_handle tf.Output, tensor tf.Output, indices tf.Output) (output_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorListScatterIntoExistingList",
-		Input: []tf.Input{
-			input_handle, tensor, indices,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// UnicodeTranscodeAttr is an optional argument to UnicodeTranscode.
-type UnicodeTranscodeAttr func(optionalAttr)
-
-// UnicodeTranscodeErrors sets the optional errors attribute to value.
-//
-// value: Error handling policy when there is invalid formatting found in the input.
-// The value of 'strict' will cause the operation to produce a InvalidArgument
-// error on any invalid input formatting. A value of 'replace' (the default) will
-// cause the operation to replace any invalid formatting in the input with the
-// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
-// skip any invalid formatting in the input and produce no corresponding output
-// character.
-// If not specified, defaults to "replace"
-func UnicodeTranscodeErrors(value string) UnicodeTranscodeAttr {
-	return func(m optionalAttr) {
-		m["errors"] = value
-	}
-}
-
-// UnicodeTranscodeReplacementChar sets the optional replacement_char attribute to value.
-//
-// value: The replacement character codepoint to be used in place of any invalid
-// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
-// be used. The default value is the default unicode replacement character is
-// 0xFFFD or U+65533.)
-//
-// Note that for UTF-8, passing a replacement character expressible in 1 byte, such
-// as ' ', will preserve string alignment to the source since invalid bytes will be
-// replaced with a 1-byte replacement. For UTF-16-BE and UTF-16-LE, any 1 or 2 byte
-// replacement character will preserve byte alignment to the source.
-// If not specified, defaults to 65533
-func UnicodeTranscodeReplacementChar(value int64) UnicodeTranscodeAttr {
-	return func(m optionalAttr) {
-		m["replacement_char"] = value
-	}
-}
-
-// UnicodeTranscodeReplaceControlCharacters sets the optional replace_control_characters attribute to value.
-//
-// value: Whether to replace the C0 control characters (00-1F) with the
-// `replacement_char`. Default is false.
-// If not specified, defaults to false
-func UnicodeTranscodeReplaceControlCharacters(value bool) UnicodeTranscodeAttr {
-	return func(m optionalAttr) {
-		m["replace_control_characters"] = value
-	}
-}
-
-// Transcode the input text from a source encoding to a destination encoding.
-//
-// The input is a string tensor of any shape. The output is a string tensor of
-// the same shape containing the transcoded strings. Output strings are always
-// valid unicode. If the input contains invalid encoding positions, the
-// `errors` attribute sets the policy for how to deal with them. If the default
-// error-handling policy is used, invalid formatting will be substituted in the
-// output by the `replacement_char`. If the errors policy is to `ignore`, any
-// invalid encoding positions in the input are skipped and not included in the
-// output. If it set to `strict` then any invalid formatting will result in an
-// InvalidArgument error.
-//
-// This operation can be used with `output_encoding = input_encoding` to enforce
-// correct formatting for inputs even if they are already in the desired encoding.
-//
-// If the input is prefixed by a Byte Order Mark needed to determine encoding
-// (e.g. if the encoding is UTF-16 and the BOM indicates big-endian), then that
-// BOM will be consumed and not emitted into the output. If the input encoding
-// is marked with an explicit endianness (e.g. UTF-16-BE), then the BOM is
-// interpreted as a non-breaking-space and is preserved in the output (including
-// always for UTF-8).
-//
-// The end result is that if the input is marked as an explicit endianness the
-// transcoding is faithful to all codepoints in the source. If it is not marked
-// with an explicit endianness, the BOM is not considered part of the string itself
-// but as metadata, and so is not preserved in the output.
-//
-// Arguments:
-//	input: The text to be processed. Can have any shape.
-//	input_encoding: Text encoding of the input strings. This is any of the encodings supported
-// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
-//	output_encoding: The unicode encoding to use in the output. Must be one of
-// `"UTF-8", "UTF-16-BE", "UTF-32-BE"`. Multi-byte encodings will be big-endian.
-//
-// Returns A string tensor containing unicode text encoded using `output_encoding`.
-func UnicodeTranscode(scope *Scope, input tf.Output, input_encoding string, output_encoding string, optional ...UnicodeTranscodeAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"input_encoding": input_encoding, "output_encoding": output_encoding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "UnicodeTranscode",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Deserializes a serialized tree ensemble config and replaces current tree
-//
-// ensemble.
-//
-// Arguments:
-//	tree_ensemble_handle: Handle to the tree ensemble.
-//	stamp_token: Token to use as the new value of the resource stamp.
-//	tree_ensemble_serialized: Serialized proto of the ensemble.
-//
-// Returns the created operation.
-func BoostedTreesDeserializeEnsemble(scope *Scope, tree_ensemble_handle tf.Output, stamp_token tf.Output, tree_ensemble_serialized tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesDeserializeEnsemble",
-		Input: []tf.Input{
-			tree_ensemble_handle, stamp_token, tree_ensemble_serialized,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// UnicodeDecodeWithOffsetsAttr is an optional argument to UnicodeDecodeWithOffsets.
-type UnicodeDecodeWithOffsetsAttr func(optionalAttr)
-
-// UnicodeDecodeWithOffsetsErrors sets the optional errors attribute to value.
-//
-// value: Error handling policy when there is invalid formatting found in the input.
-// The value of 'strict' will cause the operation to produce a InvalidArgument
-// error on any invalid input formatting. A value of 'replace' (the default) will
-// cause the operation to replace any invalid formatting in the input with the
-// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
-// skip any invalid formatting in the input and produce no corresponding output
-// character.
-// If not specified, defaults to "replace"
-func UnicodeDecodeWithOffsetsErrors(value string) UnicodeDecodeWithOffsetsAttr {
-	return func(m optionalAttr) {
-		m["errors"] = value
-	}
-}
-
-// UnicodeDecodeWithOffsetsReplacementChar sets the optional replacement_char attribute to value.
-//
-// value: The replacement character codepoint to be used in place of any invalid
-// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
-// be used. The default value is the default unicode replacement character is
-// 0xFFFD or U+65533.)
-// If not specified, defaults to 65533
-func UnicodeDecodeWithOffsetsReplacementChar(value int64) UnicodeDecodeWithOffsetsAttr {
-	return func(m optionalAttr) {
-		m["replacement_char"] = value
-	}
-}
-
-// UnicodeDecodeWithOffsetsReplaceControlCharacters sets the optional replace_control_characters attribute to value.
-//
-// value: Whether to replace the C0 control characters (00-1F) with the
-// `replacement_char`. Default is false.
-// If not specified, defaults to false
-func UnicodeDecodeWithOffsetsReplaceControlCharacters(value bool) UnicodeDecodeWithOffsetsAttr {
-	return func(m optionalAttr) {
-		m["replace_control_characters"] = value
-	}
-}
-
-// UnicodeDecodeWithOffsetsTsplits sets the optional Tsplits attribute to value.
-// If not specified, defaults to DT_INT64
-func UnicodeDecodeWithOffsetsTsplits(value tf.DataType) UnicodeDecodeWithOffsetsAttr {
-	return func(m optionalAttr) {
-		m["Tsplits"] = value
-	}
-}
-
-// Decodes each string in `input` into a sequence of Unicode code points.
-//
-// The character codepoints for all strings are returned using a single vector
-// `char_values`, with strings expanded to characters in row-major order.
-// Similarly, the character start byte offsets are returned using a single vector
-// `char_to_byte_starts`, with strings expanded in row-major order.
-//
-// The `row_splits` tensor indicates where the codepoints and start offsets for
-// each input string begin and end within the `char_values` and
-// `char_to_byte_starts` tensors.  In particular, the values for the `i`th
-// string (in row-major order) are stored in the slice
-// `[row_splits[i]:row_splits[i+1]]`. Thus:
-//
-// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th
-//   character in the `i`th string (in row-major order).
-// * `char_to_bytes_starts[row_splits[i]+j]` is the start byte offset for the `j`th
-//   character in the `i`th string (in row-major order).
-// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th
-//   string (in row-major order).
-//
-// Arguments:
-//	input: The text to be decoded. Can have any shape. Note that the output is flattened
-// to a vector of char values.
-//	input_encoding: Text encoding of the input strings. This is any of the encodings supported
-// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
-//
-// Returns A 1D int32 tensor containing the row splits.A 1D int32 Tensor containing the decoded codepoints.A 1D int32 Tensor containing the byte index in the input string where each
-// character in `char_values` starts.
-func UnicodeDecodeWithOffsets(scope *Scope, input tf.Output, input_encoding string, optional ...UnicodeDecodeWithOffsetsAttr) (row_splits tf.Output, char_values tf.Output, char_to_byte_starts tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"input_encoding": input_encoding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "UnicodeDecodeWithOffsets",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// RetrieveTPUEmbeddingProximalAdagradParametersAttr is an optional argument to RetrieveTPUEmbeddingProximalAdagradParameters.
-type RetrieveTPUEmbeddingProximalAdagradParametersAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingProximalAdagradParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func RetrieveTPUEmbeddingProximalAdagradParametersTableId(value int64) RetrieveTPUEmbeddingProximalAdagradParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingProximalAdagradParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingProximalAdagradParametersTableName(value string) RetrieveTPUEmbeddingProximalAdagradParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Retrieve proximal Adagrad embedding parameters.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns Parameter parameters updated by the proximal Adagrad optimization algorithm.Parameter accumulators updated by the proximal Adagrad optimization algorithm.
-func RetrieveTPUEmbeddingProximalAdagradParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingProximalAdagradParametersAttr) (parameters tf.Output, accumulators tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingProximalAdagradParameters",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// RandomUniformIntAttr is an optional argument to RandomUniformInt.
-type RandomUniformIntAttr func(optionalAttr)
-
-// RandomUniformIntSeed sets the optional seed attribute to value.
-//
-// value: If either `seed` or `seed2` are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func RandomUniformIntSeed(value int64) RandomUniformIntAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// RandomUniformIntSeed2 sets the optional seed2 attribute to value.
-//
-// value: A second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomUniformIntSeed2(value int64) RandomUniformIntAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Outputs random integers from a uniform distribution.
-//
-// The generated values are uniform integers in the range `[minval, maxval)`.
-// The lower bound `minval` is included in the range, while the upper bound
-// `maxval` is excluded.
-//
-// The random integers are slightly biased unless `maxval - minval` is an exact
-// power of two.  The bias is small for values of `maxval - minval` significantly
-// smaller than the range of the output (either `2^32` or `2^64`).
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	minval: 0-D.  Inclusive lower bound on the generated integers.
-//	maxval: 0-D.  Exclusive upper bound on the generated integers.
-//
-// Returns A tensor of the specified shape filled with uniform random integers.
-func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf.Output, optional ...RandomUniformIntAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RandomUniformInt",
-		Input: []tf.Input{
-			shape, minval, maxval,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// StatelessMultinomialAttr is an optional argument to StatelessMultinomial.
-type StatelessMultinomialAttr func(optionalAttr)
-
-// StatelessMultinomialOutputDtype sets the optional output_dtype attribute to value.
-// If not specified, defaults to DT_INT64
-func StatelessMultinomialOutputDtype(value tf.DataType) StatelessMultinomialAttr {
-	return func(m optionalAttr) {
-		m["output_dtype"] = value
-	}
-}
-
-// Draws samples from a multinomial distribution.
-//
-// Arguments:
-//	logits: 2-D Tensor with shape `[batch_size, num_classes]`.  Each slice `[i, :]`
-// represents the unnormalized log probabilities for all classes.
-//	num_samples: 0-D.  Number of independent samples to draw for each row slice.
-//	seed: 2 seeds (shape [2]).
-//
-// Returns 2-D Tensor with shape `[batch_size, num_samples]`.  Each slice `[i, :]`
-// contains the drawn class labels with range `[0, num_classes)`.
-func StatelessMultinomial(scope *Scope, logits tf.Output, num_samples tf.Output, seed tf.Output, optional ...StatelessMultinomialAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StatelessMultinomial",
-		Input: []tf.Input{
-			logits, num_samples, seed,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QuantizedMatMulWithBiasAndReluAndRequantizeAttr is an optional argument to QuantizedMatMulWithBiasAndReluAndRequantize.
-type QuantizedMatMulWithBiasAndReluAndRequantizeAttr func(optionalAttr)
-
-// QuantizedMatMulWithBiasAndReluAndRequantizeToutput sets the optional Toutput attribute to value.
-// If not specified, defaults to DT_QUINT8
-func QuantizedMatMulWithBiasAndReluAndRequantizeToutput(value tf.DataType) QuantizedMatMulWithBiasAndReluAndRequantizeAttr {
-	return func(m optionalAttr) {
-		m["Toutput"] = value
-	}
-}
-
-// QuantizedMatMulWithBiasAndReluAndRequantizeTransposeA sets the optional transpose_a attribute to value.
-//
-// value: If true, `a` is transposed before multiplication.
-// If not specified, defaults to false
-func QuantizedMatMulWithBiasAndReluAndRequantizeTransposeA(value bool) QuantizedMatMulWithBiasAndReluAndRequantizeAttr {
-	return func(m optionalAttr) {
-		m["transpose_a"] = value
-	}
-}
-
-// QuantizedMatMulWithBiasAndReluAndRequantizeTransposeB sets the optional transpose_b attribute to value.
-//
-// value: If true, `b` is transposed before multiplication.
-// If not specified, defaults to false
-func QuantizedMatMulWithBiasAndReluAndRequantizeTransposeB(value bool) QuantizedMatMulWithBiasAndReluAndRequantizeAttr {
-	return func(m optionalAttr) {
-		m["transpose_b"] = value
-	}
-}
-
-// QuantizedMatMulWithBiasAndReluAndRequantizeInputQuantMode sets the optional input_quant_mode attribute to value.
-//
-// value: Input data quantization mode. Either MIN_FIRST(default) or SCALED.
-// If not specified, defaults to "MIN_FIRST"
-func QuantizedMatMulWithBiasAndReluAndRequantizeInputQuantMode(value string) QuantizedMatMulWithBiasAndReluAndRequantizeAttr {
-	return func(m optionalAttr) {
-		m["input_quant_mode"] = value
-	}
-}
-
-// Perform a quantized matrix multiplication of  `a` by the matrix `b` with bias
-// add and relu and requantize fusion.
-//
-// The inputs must be two-dimensional matrices and 1D bias vector. And the inner
-// dimension of `a` (after being transposed if `transpose_a` is non-zero) must
-// match the outer dimension of `b` (after being transposed if `transposed_b` is
-// non-zero). Then do broadcast add operation with bias values on the matrix
-// mulplication result. The bias size must match inner dimension of `b`.  Then do
-// relu activation to get non-negative result. Then do requantize operation to get
-// final uint8 result.
-//
-// Arguments:
-//	a: A matrix to be multiplied. Must be a two-dimensional tensor of type `quint8`.
-//	b: A matrix to be multiplied and must be a two-dimensional tensor of type `qint8`.
-//	bias: A 1D bias tensor with size matching with inner dimension of `b` (after being
-// transposed if `transposed_b` is non-zero).
-//	min_a: The float value that the lowest quantized `a` value represents.
-//	max_a: The float value that the highest quantized `a` value represents.
-//	min_b: The float value that the lowest quantized `b` value represents.
-//	max_b: The float value that the highest quantized `b` value represents.
-//	min_freezed_output: The float value that the highest quantized output value after requantize.
-//
-//
-// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
-func QuantizedMatMulWithBiasAndReluAndRequantize(scope *Scope, a tf.Output, b tf.Output, bias tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, min_freezed_output tf.Output, max_freezed_output tf.Output, optional ...QuantizedMatMulWithBiasAndReluAndRequantizeAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedMatMulWithBiasAndReluAndRequantize",
-		Input: []tf.Input{
-			a, b, bias, min_a, max_a, min_b, max_b, min_freezed_output, max_freezed_output,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// SparseMatMulAttr is an optional argument to SparseMatMul.
-type SparseMatMulAttr func(optionalAttr)
-
-// SparseMatMulTransposeA sets the optional transpose_a attribute to value.
-// If not specified, defaults to false
-func SparseMatMulTransposeA(value bool) SparseMatMulAttr {
-	return func(m optionalAttr) {
-		m["transpose_a"] = value
-	}
-}
-
-// SparseMatMulTransposeB sets the optional transpose_b attribute to value.
-// If not specified, defaults to false
-func SparseMatMulTransposeB(value bool) SparseMatMulAttr {
-	return func(m optionalAttr) {
-		m["transpose_b"] = value
-	}
-}
-
-// SparseMatMulAIsSparse sets the optional a_is_sparse attribute to value.
-// If not specified, defaults to false
-func SparseMatMulAIsSparse(value bool) SparseMatMulAttr {
-	return func(m optionalAttr) {
-		m["a_is_sparse"] = value
-	}
-}
-
-// SparseMatMulBIsSparse sets the optional b_is_sparse attribute to value.
-// If not specified, defaults to false
-func SparseMatMulBIsSparse(value bool) SparseMatMulAttr {
-	return func(m optionalAttr) {
-		m["b_is_sparse"] = value
-	}
-}
-
-// Multiply matrix "a" by matrix "b".
-//
-// The inputs must be two-dimensional matrices and the inner dimension of "a" must
-// match the outer dimension of "b". Both "a" and "b" must be `Tensor`s not
-// `SparseTensor`s.  This op is optimized for the case where at least one of "a" or
-// "b" is sparse, in the sense that they have a large proportion of zero values.
-// The breakeven for using this versus a dense matrix multiply on one platform was
-// 30% zero values in the sparse matrix.
-//
-// The gradient computation of this operation will only take advantage of sparsity
-// in the input gradient when that gradient comes from a Relu.
-func SparseMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatMulAttr) (product tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseMatMul",
-		Input: []tf.Input{
-			a, b,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes requantization range per channel.
-//
-// Arguments:
-//	input: The original input tensor.
-//	input_min: The minimum value of the input tensor
-//	input_max: The maximum value of the input tensor.
-//	clip_value_max: The maximum value of the output that needs to be clipped.
-// Example: set this to 6 for Relu6.
-//
-// Returns The minimum value of the final output tensorThe maximum value of the final output tensor.
-func RequantizationRangePerChannel(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, clip_value_max float32) (output_min tf.Output, output_max tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"clip_value_max": clip_value_max}
-	opspec := tf.OpSpec{
-		Type: "RequantizationRangePerChannel",
-		Input: []tf.Input{
-			input, input_min, input_max,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Returns up to `num_records` (key, value) pairs produced by a Reader.
-//
-// Will dequeue from the input queue if necessary (e.g. when the
-// Reader needs to start reading from a new file since it has finished
-// with the previous file).
-// It may return less than `num_records` even before the last batch.
-//
-// Arguments:
-//	reader_handle: Handle to a `Reader`.
-//	queue_handle: Handle to a `Queue`, with string work items.
-//	num_records: number of records to read from `Reader`.
-//
-// Returns A 1-D tensor.A 1-D tensor.
-func ReaderReadUpToV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output, num_records tf.Output) (keys tf.Output, values tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ReaderReadUpToV2",
-		Input: []tf.Input{
-			reader_handle, queue_handle, num_records,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// LRNGradAttr is an optional argument to LRNGrad.
-type LRNGradAttr func(optionalAttr)
-
-// LRNGradDepthRadius sets the optional depth_radius attribute to value.
-//
-// value: A depth radius.
-// If not specified, defaults to 5
-func LRNGradDepthRadius(value int64) LRNGradAttr {
-	return func(m optionalAttr) {
-		m["depth_radius"] = value
-	}
-}
-
-// LRNGradBias sets the optional bias attribute to value.
-//
-// value: An offset (usually > 0 to avoid dividing by 0).
-// If not specified, defaults to 1
-func LRNGradBias(value float32) LRNGradAttr {
-	return func(m optionalAttr) {
-		m["bias"] = value
-	}
-}
-
-// LRNGradAlpha sets the optional alpha attribute to value.
-//
-// value: A scale factor, usually positive.
-// If not specified, defaults to 1
-func LRNGradAlpha(value float32) LRNGradAttr {
-	return func(m optionalAttr) {
-		m["alpha"] = value
-	}
-}
-
-// LRNGradBeta sets the optional beta attribute to value.
-//
-// value: An exponent.
-// If not specified, defaults to 0.5
-func LRNGradBeta(value float32) LRNGradAttr {
-	return func(m optionalAttr) {
-		m["beta"] = value
-	}
-}
-
-// Gradients for Local Response Normalization.
-//
-// Arguments:
-//	input_grads: 4-D with shape `[batch, height, width, channels]`.
-//	input_image: 4-D with shape `[batch, height, width, channels]`.
-//	output_image: 4-D with shape `[batch, height, width, channels]`.
-//
-// Returns The gradients for LRN.
-func LRNGrad(scope *Scope, input_grads tf.Output, input_image tf.Output, output_image tf.Output, optional ...LRNGradAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LRNGrad",
-		Input: []tf.Input{
-			input_grads, input_image, output_image,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Gets the next output from the given iterator.
-//
-// This operation is a synchronous version IteratorGetNext. It should only be used
-// in situations where the iterator does not block the calling thread, or where
-// the calling thread is not a member of the thread pool used to execute parallel
-// operations (e.g. in eager mode).
-func IteratorGetNextSync(scope *Scope, iterator tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "IteratorGetNextSync",
-		Input: []tf.Input{
-			iterator,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
-		scope.UpdateErr("IteratorGetNextSync", err)
-		return
-	}
-	return components
-}
-
-// Split a `SparseTensor` into `num_split` tensors along one dimension.
-//
-// If the `shape[split_dim]` is not an integer multiple of `num_split`. Slices
-// `[0 : shape[split_dim] % num_split]` gets one extra dimension.
-// For example, if `split_dim = 1` and `num_split = 2` and the input is
-//
-//     input_tensor = shape = [2, 7]
-//     [    a   d e  ]
-//     [b c          ]
-//
-// Graphically the output tensors are:
-//
-//     output_tensor[0] = shape = [2, 4]
-//     [    a  ]
-//     [b c    ]
-//
-//     output_tensor[1] = shape = [2, 3]
-//     [ d e  ]
-//     [      ]
-//
-// Arguments:
-//	split_dim: 0-D.  The dimension along which to split.  Must be in the range
-// `[0, rank(shape))`.
-//	indices: 2-D tensor represents the indices of the sparse tensor.
-//	values: 1-D tensor represents the values of the sparse tensor.
-//	shape: 1-D. tensor represents the shape of the sparse tensor.
-// output indices: A list of 1-D tensors represents the indices of the output
-// sparse tensors.
-//	num_split: The number of ways to split.
-//
-// Returns A list of 1-D tensors represents the values of the output sparse
-// tensors.A list of 1-D tensors represents the shape of the output sparse
-// tensors.
-func SparseSplit(scope *Scope, split_dim tf.Output, indices tf.Output, values tf.Output, shape tf.Output, num_split int64) (output_indices []tf.Output, output_values []tf.Output, output_shape []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_split": num_split}
-	opspec := tf.OpSpec{
-		Type: "SparseSplit",
-		Input: []tf.Input{
-			split_dim, indices, values, shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output_indices, idx, err = makeOutputList(op, idx, "output_indices"); err != nil {
-		scope.UpdateErr("SparseSplit", err)
-		return
-	}
-	if output_values, idx, err = makeOutputList(op, idx, "output_values"); err != nil {
-		scope.UpdateErr("SparseSplit", err)
-		return
-	}
-	if output_shape, idx, err = makeOutputList(op, idx, "output_shape"); err != nil {
-		scope.UpdateErr("SparseSplit", err)
-		return
-	}
-	return output_indices, output_values, output_shape
-}
-
-// L2 Loss.
-//
-// Computes half the L2 norm of a tensor without the `sqrt`:
-//
-//     output = sum(t ** 2) / 2
-//
-// Arguments:
-//	t: Typically 2-D, but may have any dimensions.
-//
-// Returns 0-D.
-func L2Loss(scope *Scope, t tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "L2Loss",
-		Input: []tf.Input{
-			t,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Multiplies sparse updates into the variable referenced by `resource`.
-//
-// This operation computes
-//
-//     # Scalar indices
-//     ref[indices, ...] *= updates[...]
-//
-//     # Vector indices (for each i)
-//     ref[indices[i], ...] *= updates[i, ...]
-//
-//     # High rank indices (for each i, ..., j)
-//     ref[indices[i, ..., j], ...] *= updates[i, ..., j, ...]
-//
-// Duplicate entries are handled correctly: if multiple `indices` reference
-// the same location, their contributions multiply.
-//
-// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
-// </div>
-//
-// Arguments:
-//	resource: Should be from a `Variable` node.
-//	indices: A tensor of indices into the first dimension of `ref`.
-//	updates: A tensor of updated values to add to `ref`.
-//
-// Returns the created operation.
-func ResourceScatterMul(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceScatterMul",
-		Input: []tf.Input{
-			resource, indices, updates,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// MutexV2Attr is an optional argument to MutexV2.
-type MutexV2Attr func(optionalAttr)
-
-// MutexV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this variable is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func MutexV2Container(value string) MutexV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// MutexV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this variable is named in the given bucket
-// with this shared_name. Otherwise, the node name is used instead.
-// If not specified, defaults to ""
-func MutexV2SharedName(value string) MutexV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Creates a Mutex resource that can be locked by `MutexLock`.
-//
-// Returns The mutex resource.
-func MutexV2(scope *Scope, optional ...MutexV2Attr) (resource tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MutexV2",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// LoadTPUEmbeddingCenteredRMSPropParametersAttr is an optional argument to LoadTPUEmbeddingCenteredRMSPropParameters.
-type LoadTPUEmbeddingCenteredRMSPropParametersAttr func(optionalAttr)
-
-// LoadTPUEmbeddingCenteredRMSPropParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func LoadTPUEmbeddingCenteredRMSPropParametersTableId(value int64) LoadTPUEmbeddingCenteredRMSPropParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingCenteredRMSPropParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingCenteredRMSPropParametersTableName(value string) LoadTPUEmbeddingCenteredRMSPropParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Load centered RMSProp embedding parameters.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the centered RMSProp optimization algorithm.
-//	ms: Value of ms used in the centered RMSProp optimization algorithm.
-//	mom: Value of mom used in the centered RMSProp optimization algorithm.
-//	mg: Value of mg used in the centered RMSProp optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingCenteredRMSPropParameters(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, mg tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingCenteredRMSPropParametersAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingCenteredRMSPropParameters",
-		Input: []tf.Input{
-			parameters, ms, mom, mg,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// StatefulTruncatedNormalAttr is an optional argument to StatefulTruncatedNormal.
-type StatefulTruncatedNormalAttr func(optionalAttr)
-
-// StatefulTruncatedNormalDtype sets the optional dtype attribute to value.
-//
-// value: The type of the output.
-// If not specified, defaults to DT_FLOAT
-func StatefulTruncatedNormalDtype(value tf.DataType) StatefulTruncatedNormalAttr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Outputs random values from a truncated normal distribution.
-//
-// The generated values follow a normal distribution with mean 0 and standard
-// deviation 1, except that values whose magnitude is more than 2 standard
-// deviations from the mean are dropped and re-picked.
-//
-// Arguments:
-//	resource: The handle of the resource variable that stores the state of the RNG.
-//	algorithm: The RNG algorithm.
-//	shape: The shape of the output tensor.
-//
-// Returns Random values with specified shape.
-func StatefulTruncatedNormal(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, optional ...StatefulTruncatedNormalAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StatefulTruncatedNormal",
-		Input: []tf.Input{
-			resource, algorithm, shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Outputs random integers from a uniform distribution.
-//
-// The generated values are uniform integers in the range `[minval, maxval)`.
-// The lower bound `minval` is included in the range, while the upper bound
-// `maxval` is excluded.
-//
-// The random integers are slightly biased unless `maxval - minval` is an exact
-// power of two.  The bias is small for values of `maxval - minval` significantly
-// smaller than the range of the output (either `2^32` or `2^64`).
-//
-// Arguments:
-//	resource: The handle of the resource variable that stores the state of the RNG.
-//	algorithm: The RNG algorithm.
-//	shape: The shape of the output tensor.
-//	minval: Minimum value (inclusive, scalar).
-//	maxval: Maximum value (exclusive, scalar).
-//
-// Returns Random values with specified shape.
-func StatefulUniformInt(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, minval tf.Output, maxval tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "StatefulUniformInt",
-		Input: []tf.Input{
-			resource, algorithm, shape, minval, maxval,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// BoostedTreesCreateQuantileStreamResourceAttr is an optional argument to BoostedTreesCreateQuantileStreamResource.
-type BoostedTreesCreateQuantileStreamResourceAttr func(optionalAttr)
-
-// BoostedTreesCreateQuantileStreamResourceMaxElements sets the optional max_elements attribute to value.
-//
-// value: int; The maximum number of data points that can be fed to the stream.
-// If not specified, defaults to 1099511627776
-func BoostedTreesCreateQuantileStreamResourceMaxElements(value int64) BoostedTreesCreateQuantileStreamResourceAttr {
-	return func(m optionalAttr) {
-		m["max_elements"] = value
-	}
-}
-
-// Create the Resource for Quantile Streams.
-//
-// Arguments:
-//	quantile_stream_resource_handle: resource; Handle to quantile stream resource.
-//	epsilon: float; The required approximation error of the stream resource.
-//	num_streams: int; The number of streams managed by the resource that shares the same epsilon.
-//
-// Returns the created operation.
-func BoostedTreesCreateQuantileStreamResource(scope *Scope, quantile_stream_resource_handle tf.Output, epsilon tf.Output, num_streams tf.Output, optional ...BoostedTreesCreateQuantileStreamResourceAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesCreateQuantileStreamResource",
-		Input: []tf.Input{
-			quantile_stream_resource_handle, epsilon, num_streams,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Reshapes a SparseTensor to represent values in a new dense shape.
-//
-// This operation has the same semantics as reshape on the represented dense
-// tensor.  The `input_indices` are recomputed based on the requested `new_shape`.
-//
-// If one component of `new_shape` is the special value -1, the size of that
-// dimension is computed so that the total dense size remains constant.  At
-// most one component of `new_shape` can be -1.  The number of dense elements
-// implied by `new_shape` must be the same as the number of dense elements
-// originally implied by `input_shape`.
-//
-// Reshaping does not affect the order of values in the SparseTensor.
-//
-// If the input tensor has rank `R_in` and `N` non-empty values, and `new_shape`
-// has length `R_out`, then `input_indices` has shape `[N, R_in]`,
-// `input_shape` has length `R_in`, `output_indices` has shape `[N, R_out]`, and
-// `output_shape` has length `R_out`.
-//
-// Arguments:
-//	input_indices: 2-D.  `N x R_in` matrix with the indices of non-empty values in a
-// SparseTensor.
-//	input_shape: 1-D.  `R_in` vector with the input SparseTensor's dense shape.
-//	new_shape: 1-D.  `R_out` vector with the requested new dense shape.
-//
-// Returns 2-D.  `N x R_out` matrix with the updated indices of non-empty
-// values in the output SparseTensor.1-D.  `R_out` vector with the full dense shape of the output
-// SparseTensor.  This is the same as `new_shape` but with any -1 dimensions
-// filled in.
-func SparseReshape(scope *Scope, input_indices tf.Output, input_shape tf.Output, new_shape tf.Output) (output_indices tf.Output, output_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseReshape",
-		Input: []tf.Input{
-			input_indices, input_shape, new_shape,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Adds Tensor 'bias' to Tensor 'input' for Quantized types.
-//
-// Broadcasts the values of bias on dimensions 0..N-2 of 'input'.
-//
-// Arguments:
-//
-//	bias: A 1D bias Tensor with size matching the last dimension of 'input'.
-//	min_input: The float value that the lowest quantized input value represents.
-//	max_input: The float value that the highest quantized input value represents.
-//	min_bias: The float value that the lowest quantized bias value represents.
-//	max_bias: The float value that the highest quantized bias value represents.
-//
-//
-// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
-func QuantizedBiasAdd(scope *Scope, input tf.Output, bias tf.Output, min_input tf.Output, max_input tf.Output, min_bias tf.Output, max_bias tf.Output, out_type tf.DataType) (output tf.Output, min_out tf.Output, max_out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"out_type": out_type}
-	opspec := tf.OpSpec{
-		Type: "QuantizedBiasAdd",
-		Input: []tf.Input{
-			input, bias, min_input, max_input, min_bias, max_bias,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// QuantizedConv2DAttr is an optional argument to QuantizedConv2D.
-type QuantizedConv2DAttr func(optionalAttr)
-
-// QuantizedConv2DOutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_QINT32
-func QuantizedConv2DOutType(value tf.DataType) QuantizedConv2DAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// QuantizedConv2DDilations sets the optional dilations attribute to value.
-//
-// value: 1-D tensor of length 4.  The dilation factor for each dimension of
-// `input`. If set to k > 1, there will be k-1 skipped cells between each
-// filter element on that dimension. The dimension order is determined by the
-// value of `data_format`, see above for details. Dilations in the batch and
-// depth dimensions must be 1.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 >
-func QuantizedConv2DDilations(value []int64) QuantizedConv2DAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes a 2D convolution given quantized 4D input and filter tensors.
-//
-// The inputs are quantized tensors where the lowest value represents the real
-// number of the associated minimum, and the highest represents the maximum.
-// This means that you can only interpret the quantized output in the same way, by
-// taking the returned minimum and maximum values into account.
-//
-// Arguments:
-//
-//	filter: filter's input_depth dimension must match input's depth dimensions.
-//	min_input: The float value that the lowest quantized input value represents.
-//	max_input: The float value that the highest quantized input value represents.
-//	min_filter: The float value that the lowest quantized filter value represents.
-//	max_filter: The float value that the highest quantized filter value represents.
-//	strides: The stride of the sliding window for each dimension of the input
-// tensor.
-//	padding: The type of padding algorithm to use.
-//
-// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
-func QuantizedConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedConv2DAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedConv2D",
-		Input: []tf.Input{
-			input, filter, min_input, max_input, min_filter, max_filter,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Produces the max pool of the input tensor for quantized types.
-//
-// Arguments:
-//	input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
-//	min_input: The float value that the lowest quantized input value represents.
-//	max_input: The float value that the highest quantized input value represents.
-//	ksize: The size of the window for each dimension of the input tensor.
-// The length must be 4 to match the number of dimensions of the input.
-//	strides: The stride of the sliding window for each dimension of the input
-// tensor. The length must be 4 to match the number of dimensions of the input.
-//	padding: The type of padding algorithm to use.
-//
-// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
-func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	opspec := tf.OpSpec{
-		Type: "QuantizedMaxPool",
-		Input: []tf.Input{
-			input, min_input, max_input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// QuantizedReluAttr is an optional argument to QuantizedRelu.
-type QuantizedReluAttr func(optionalAttr)
-
-// QuantizedReluOutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_QUINT8
-func QuantizedReluOutType(value tf.DataType) QuantizedReluAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Computes Quantized Rectified Linear: `max(features, 0)`
-//
-// Arguments:
-//
-//	min_features: The float value that the lowest quantized value represents.
-//	max_features: The float value that the highest quantized value represents.
-//
-// Returns Has the same output shape as "features".The float value that the lowest quantized value represents.The float value that the highest quantized value represents.
-func QuantizedRelu(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedRelu",
-		Input: []tf.Input{
-			features, min_features, max_features,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Looks up keys in a table, outputs the corresponding values.
-//
-// The tensor `keys` must of the same type as the keys of the table.
-// The output `values` is of the type of the table values.
-//
-// The scalar `default_value` is the value output for keys not present in the
-// table. It must also be of the same type as the table values.
-//
-// Arguments:
-//	table_handle: Handle to the table.
-//	keys: Any shape.  Keys to look up.
-//
-//
-// Returns Same shape as `keys`.  Values found in the table, or `default_values`
-// for missing keys.
-func LookupTableFindV2(scope *Scope, table_handle tf.Output, keys tf.Output, default_value tf.Output) (values tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "LookupTableFindV2",
-		Input: []tf.Input{
-			table_handle, keys, default_value,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Quantized Batch normalization.
-//
-// This op is deprecated and will be removed in the future. Prefer
-// `tf.nn.batch_normalization`.
-//
-// Arguments:
-//	t: A 4D input Tensor.
-//	t_min: The value represented by the lowest quantized input.
-//	t_max: The value represented by the highest quantized input.
-//	m: A 1D mean Tensor with size matching the last dimension of t.
-// This is the first output from tf.nn.moments,
-// or a saved moving average thereof.
-//	m_min: The value represented by the lowest quantized mean.
-//	m_max: The value represented by the highest quantized mean.
-//	v: A 1D variance Tensor with size matching the last dimension of t.
-// This is the second output from tf.nn.moments,
-// or a saved moving average thereof.
-//	v_min: The value represented by the lowest quantized variance.
-//	v_max: The value represented by the highest quantized variance.
-//	beta: A 1D beta Tensor with size matching the last dimension of t.
-// An offset to be added to the normalized tensor.
-//	beta_min: The value represented by the lowest quantized offset.
-//	beta_max: The value represented by the highest quantized offset.
-//	gamma: A 1D gamma Tensor with size matching the last dimension of t.
-// If "scale_after_normalization" is true, this tensor will be multiplied
-// with the normalized tensor.
-//	gamma_min: The value represented by the lowest quantized gamma.
-//	gamma_max: The value represented by the highest quantized gamma.
-//
-//	variance_epsilon: A small float number to avoid dividing by 0.
-//	scale_after_normalization: A bool indicating whether the resulted tensor
-// needs to be multiplied with gamma.
-func QuantizedBatchNormWithGlobalNormalization(scope *Scope, t tf.Output, t_min tf.Output, t_max tf.Output, m tf.Output, m_min tf.Output, m_max tf.Output, v tf.Output, v_min tf.Output, v_max tf.Output, beta tf.Output, beta_min tf.Output, beta_max tf.Output, gamma tf.Output, gamma_min tf.Output, gamma_max tf.Output, out_type tf.DataType, variance_epsilon float32, scale_after_normalization bool) (result tf.Output, result_min tf.Output, result_max tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"out_type": out_type, "variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization}
-	opspec := tf.OpSpec{
-		Type: "QuantizedBatchNormWithGlobalNormalization",
-		Input: []tf.Input{
-			t, t_min, t_max, m, m_min, m_max, v, v_min, v_max, beta, beta_min, beta_max, gamma, gamma_min, gamma_max,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Returns the index of a data point that should be added to the seed set.
-//
-// Entries in distances are assumed to be squared distances of candidate points to
-// the already sampled centers in the seed set. The op constructs one Markov chain
-// of the k-MC^2 algorithm and returns the index of one candidate point to be added
-// as an additional cluster center.
-//
-// Arguments:
-//	distances: Vector with squared distances to the closest previously sampled cluster center
-// for each candidate point.
-//	seed: Scalar. Seed for initializing the random number generator.
-//
-// Returns Scalar with the index of the sampled point.
-func KMC2ChainInitialization(scope *Scope, distances tf.Output, seed tf.Output) (index tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "KMC2ChainInitialization",
-		Input: []tf.Input{
-			distances, seed,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Subtracts sparse `updates` from an existing tensor according to `indices`.
-//
-// This operation creates a new tensor by subtracting sparse `updates` from the
-// passed in `tensor`.
-// This operation is very similar to `tf.scatter_nd_sub`, except that the updates
-// are subtracted from an existing tensor (as opposed to a variable). If the memory
-// for the existing tensor cannot be re-used, a copy is made and updated.
-//
-// `indices` is an integer tensor containing indices into a new tensor of shape
-// `shape`.  The last dimension of `indices` can be at most the rank of `shape`:
-//
-//     indices.shape[-1] <= shape.rank
-//
-// The last dimension of `indices` corresponds to indices into elements
-// (if `indices.shape[-1] = shape.rank`) or slices
-// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of
-// `shape`.  `updates` is a tensor with shape
-//
-//     indices.shape[:-1] + shape[indices.shape[-1]:]
-//
-// The simplest form of tensor_scatter_sub is to subtract individual elements
-// from a tensor by index. For example, say we want to insert 4 scattered elements
-// in a rank-1 tensor with 8 elements.
-//
-// In Python, this scatter subtract operation would look like this:
-//
-// ```python
-//     indices = tf.constant([[4], [3], [1], [7]])
-//     updates = tf.constant([9, 10, 11, 12])
-//     tensor = tf.ones([8], dtype=tf.int32)
-//     updated = tf.tensor_scatter_sub(tensor, indices, updates)
-//     with tf.Session() as sess:
-//       print(sess.run(scatter))
-// ```
-//
-// The resulting tensor would look like this:
-//
-//     [1, -10, 1, -9, -8, 1, 1, -11]
-//
-// We can also, insert entire slices of a higher rank tensor all at once. For
-// example, if we wanted to insert two slices in the first dimension of a
-// rank-3 tensor with two matrices of new values.
-//
-// In Python, this scatter add operation would look like this:
-//
-// ```python
-//     indices = tf.constant([[0], [2]])
-//     updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6],
-//                             [7, 7, 7, 7], [8, 8, 8, 8]],
-//                            [[5, 5, 5, 5], [6, 6, 6, 6],
-//                             [7, 7, 7, 7], [8, 8, 8, 8]]])
-//     tensor = tf.ones([4, 4, 4])
-//     updated = tf.tensor_scatter_sub(tensor, indices, updates)
-//     with tf.Session() as sess:
-//       print(sess.run(scatter))
-// ```
-//
-// The resulting tensor would look like this:
-//
-//     [[[-4, -4, -4, -4], [-5, -5, -5, -5], [-6, -6, -6, -6], [-7, -7, -7, -7]],
-//      [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
-//      [[-4, -4, -4, -4], [-5, -5, -5, -5], [-6, -6, -6, -6], [-7, -7, -7, -7]],
-//      [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]]
-//
-// Note that on CPU, if an out of bound index is found, an error is returned.
-// On GPU, if an out of bound index is found, the index is ignored.
-//
-// Arguments:
-//	tensor: Tensor to copy/update.
-//	indices: Index tensor.
-//	updates: Updates to scatter into output.
-//
-// Returns A new tensor copied from tensor and updates subtracted according to the indices.
-func TensorScatterSub(scope *Scope, tensor tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorScatterSub",
-		Input: []tf.Input{
-			tensor, indices, updates,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Splits a tensor into `num_split` tensors along one dimension.
-//
-// Arguments:
-//	value: The tensor to split.
-//	size_splits: list containing the sizes of each output tensor along the split
-// dimension. Must sum to the dimension of value along split_dim.
-// Can contain one -1 indicating that dimension is to be inferred.
-//	axis: 0-D.  The dimension along which to split.  Must be in the range
-// `[-rank(value), rank(value))`.
-//
-//
-// Returns Tensors whose shape matches that of `value`
-// except along `axis`, where their sizes are
-// `size_splits[i]`.
-func SplitV(scope *Scope, value tf.Output, size_splits tf.Output, axis tf.Output, num_split int64) (output []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_split": num_split}
-	opspec := tf.OpSpec{
-		Type: "SplitV",
-		Input: []tf.Input{
-			value, size_splits, axis,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
-		scope.UpdateErr("SplitV", err)
-		return
-	}
-	return output
-}
-
-// Computes the minimum along segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
-// for an explanation of segments.
-//
-// Computes a tensor such that
-// \\(output_i = \min_j(data_j)\\) where `min` is over `j` such
-// that `segment_ids[j] == i`.
-//
-// If the min is empty for a given segment ID `i`, `output[i] = 0`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMin.png" alt>
-// </div>
-//
-// For example:
-//
-// ```
-// c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]])
-// tf.segment_min(c, tf.constant([0, 0, 1]))
-// # ==> [[1, 2, 2, 1],
-// #      [5, 6, 7, 8]]
-// ```
-//
-// Arguments:
-//
-//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
-// first dimension.  Values should be sorted and can be repeated.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SegmentMin",
-		Input: []tf.Input{
-			data, segment_ids,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// UnbatchAttr is an optional argument to Unbatch.
-type UnbatchAttr func(optionalAttr)
-
-// UnbatchContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func UnbatchContainer(value string) UnbatchAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// UnbatchSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func UnbatchSharedName(value string) UnbatchAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Reverses the operation of Batch for a single output Tensor.
-//
-// An instance of Unbatch either receives an empty batched_tensor, in which case it
-// asynchronously waits until the values become available from a concurrently
-// running instance of Unbatch with the same container and shared_name, or receives
-// a non-empty batched_tensor in which case it finalizes all other concurrently
-// running instances and outputs its own element from the batch.
-//
-// batched_tensor: The possibly transformed output of Batch. The size of the first
-//  dimension should remain unchanged by the transformations for the operation to
-//  work.
-// batch_index: The matching batch_index obtained from Batch.
-// id: The id scalar emitted by Batch.
-// unbatched_tensor: The Tensor corresponding to this execution.
-// timeout_micros: Maximum amount of time (in microseconds) to wait to receive the
-//  batched input tensor associated with a given invocation of the op.
-// container: Container to control resource sharing.
-// shared_name: Instances of Unbatch with the same container and shared_name are
-//  assumed to possibly belong to the same batch. If left empty, the op name will
-//  be used as the shared name.
-func Unbatch(scope *Scope, batched_tensor tf.Output, batch_index tf.Output, id tf.Output, timeout_micros int64, optional ...UnbatchAttr) (unbatched_tensor tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"timeout_micros": timeout_micros}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Unbatch",
-		Input: []tf.Input{
-			batched_tensor, batch_index, id,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QuantizedDepthwiseConv2DAttr is an optional argument to QuantizedDepthwiseConv2D.
-type QuantizedDepthwiseConv2DAttr func(optionalAttr)
-
-// QuantizedDepthwiseConv2DOutType sets the optional out_type attribute to value.
-//
-// value: The type of the output.
-// If not specified, defaults to DT_QINT32
-func QuantizedDepthwiseConv2DOutType(value tf.DataType) QuantizedDepthwiseConv2DAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// QuantizedDepthwiseConv2DDilations sets the optional dilations attribute to value.
-//
-// value: List of dilation values.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 >
-func QuantizedDepthwiseConv2DDilations(value []int64) QuantizedDepthwiseConv2DAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes quantized depthwise Conv2D.
-//
-// Arguments:
-//	input: The original input tensor.
-//	filter: The original filter tensor.
-//	min_input: The float value that the minimum quantized input value represents.
-//	max_input: The float value that the maximum quantized input value represents.
-//	min_filter: The float value that the minimum quantized filter value represents.
-//	max_filter: The float value that the maximum quantized filter value represents.
-//	strides: List of stride values.
-//
-//
-// Returns The output tensor.The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents.
-func QuantizedDepthwiseConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedDepthwiseConv2DAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedDepthwiseConv2D",
-		Input: []tf.Input{
-			input, filter, min_input, max_input, min_filter, max_filter,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Outputs a tensor containing the reduction across all input tensors.
-//
-// Outputs a tensor containing the reduction across all input tensors passed to ops
-// within the same `shared_name.
-//
-// The graph should be constructed so if one op runs with shared_name value `c`,
-// then `num_devices` ops will run with shared_name value `c`.  Failure to do so
-// will cause the graph execution to fail to complete.
-//
-// input: the input to the reduction
-// data: the value of the reduction across all `num_devices` devices.
-// reduction: the reduction operation to perform.
-// num_devices: The number of devices participating in this reduction.
-// shared_name: Identifier that shared between ops of the same reduction.
-func NcclAllReduce(scope *Scope, input tf.Output, reduction string, num_devices int64, shared_name string) (data tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"reduction": reduction, "num_devices": num_devices, "shared_name": shared_name}
-	opspec := tf.OpSpec{
-		Type: "NcclAllReduce",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns element-wise remainder of division. This emulates C semantics in that
-//
-// the result here is consistent with a truncating divide. E.g. `truncate(x / y) *
-// y + truncate_mod(x, y) = x`.
-//
-// *NOTE*: `TruncateMod` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func TruncateMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TruncateMod",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// DecodeJpegAttr is an optional argument to DecodeJpeg.
-type DecodeJpegAttr func(optionalAttr)
-
-// DecodeJpegChannels sets the optional channels attribute to value.
-//
-// value: Number of color channels for the decoded image.
-// If not specified, defaults to 0
-func DecodeJpegChannels(value int64) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["channels"] = value
-	}
-}
-
-// DecodeJpegRatio sets the optional ratio attribute to value.
-//
-// value: Downscaling ratio.
-// If not specified, defaults to 1
-func DecodeJpegRatio(value int64) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["ratio"] = value
-	}
-}
-
-// DecodeJpegFancyUpscaling sets the optional fancy_upscaling attribute to value.
-//
-// value: If true use a slower but nicer upscaling of the
-// chroma planes (yuv420/422 only).
-// If not specified, defaults to true
-func DecodeJpegFancyUpscaling(value bool) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["fancy_upscaling"] = value
-	}
-}
-
-// DecodeJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value.
-//
-// value: If true try to recover an image from truncated input.
-// If not specified, defaults to false
-func DecodeJpegTryRecoverTruncated(value bool) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["try_recover_truncated"] = value
-	}
-}
-
-// DecodeJpegAcceptableFraction sets the optional acceptable_fraction attribute to value.
-//
-// value: The minimum required fraction of lines before a truncated
-// input is accepted.
-// If not specified, defaults to 1
-func DecodeJpegAcceptableFraction(value float32) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["acceptable_fraction"] = value
-	}
-}
-
-// DecodeJpegDctMethod sets the optional dct_method attribute to value.
-//
-// value: string specifying a hint about the algorithm used for
-// decompression.  Defaults to "" which maps to a system-specific
-// default.  Currently valid values are ["INTEGER_FAST",
-// "INTEGER_ACCURATE"].  The hint may be ignored (e.g., the internal
-// jpeg library changes to a version that does not have that specific
-// option.)
-// If not specified, defaults to ""
-func DecodeJpegDctMethod(value string) DecodeJpegAttr {
-	return func(m optionalAttr) {
-		m["dct_method"] = value
-	}
-}
-
-// Decode a JPEG-encoded image to a uint8 tensor.
-//
-// The attr `channels` indicates the desired number of color channels for the
-// decoded image.
-//
-// Accepted values are:
-//
-// *   0: Use the number of channels in the JPEG-encoded image.
-// *   1: output a grayscale image.
-// *   3: output an RGB image.
-//
-// If needed, the JPEG-encoded image is transformed to match the requested number
-// of color channels.
-//
-// The attr `ratio` allows downscaling the image by an integer factor during
-// decoding.  Allowed values are: 1, 2, 4, and 8.  This is much faster than
-// downscaling the image later.
-//
-//
-// This op also supports decoding PNGs and non-animated GIFs since the interface is
-// the same, though it is cleaner to use `tf.image.decode_image`.
-//
-// Arguments:
-//	contents: 0-D.  The JPEG-encoded image.
-//
-// Returns 3-D with shape `[height, width, channels]`..
-func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DecodeJpeg",
-		Input: []tf.Input{
-			contents,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// AudioSpectrogramAttr is an optional argument to AudioSpectrogram.
-type AudioSpectrogramAttr func(optionalAttr)
-
-// AudioSpectrogramMagnitudeSquared sets the optional magnitude_squared attribute to value.
-//
-// value: Whether to return the squared magnitude or just the
-// magnitude. Using squared magnitude can avoid extra calculations.
-// If not specified, defaults to false
-func AudioSpectrogramMagnitudeSquared(value bool) AudioSpectrogramAttr {
-	return func(m optionalAttr) {
-		m["magnitude_squared"] = value
-	}
-}
-
-// Produces a visualization of audio data over time.
-//
-// Spectrograms are a standard way of representing audio information as a series of
-// slices of frequency information, one slice for each window of time. By joining
-// these together into a sequence, they form a distinctive fingerprint of the sound
-// over time.
-//
-// This op expects to receive audio data as an input, stored as floats in the range
-// -1 to 1, together with a window width in samples, and a stride specifying how
-// far to move the window between slices. From this it generates a three
-// dimensional output. The first dimension is for the channels in the input, so a
-// stereo audio input would have two here for example. The second dimension is time,
-// with successive frequency slices. The third dimension has an amplitude value for
-// each frequency during that time slice.
-//
-// This means the layout when converted and saved as an image is rotated 90 degrees
-// clockwise from a typical spectrogram. Time is descending down the Y axis, and
-// the frequency decreases from left to right.
-//
-// Each value in the result represents the square root of the sum of the real and
-// imaginary parts of an FFT on the current window of samples. In this way, the
-// lowest dimension represents the power of each frequency in the current window,
-// and adjacent windows are concatenated in the next dimension.
-//
-// To get a more intuitive and visual look at what this operation does, you can run
-// tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the
-// resulting spectrogram as a PNG image.
-//
-// Arguments:
-//	input: Float representation of audio data.
-//	window_size: How wide the input window is in samples. For the highest efficiency
-// this should be a power of two, but other values are accepted.
-//	stride: How widely apart the center of adjacent sample windows should be.
-//
-// Returns 3D representation of the audio frequencies as an image.
-func AudioSpectrogram(scope *Scope, input tf.Output, window_size int64, stride int64, optional ...AudioSpectrogramAttr) (spectrogram tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"window_size": window_size, "stride": stride}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "AudioSpectrogram",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// StatefulUniformAttr is an optional argument to StatefulUniform.
-type StatefulUniformAttr func(optionalAttr)
-
-// StatefulUniformDtype sets the optional dtype attribute to value.
-//
-// value: The type of the output.
-// If not specified, defaults to DT_FLOAT
-func StatefulUniformDtype(value tf.DataType) StatefulUniformAttr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Outputs random values from a uniform distribution.
-//
-// The generated values follow a uniform distribution in the range `[0, 1)`. The
-// lower bound 0 is included in the range, while the upper bound 1 is excluded.
-//
-// Arguments:
-//	resource: The handle of the resource variable that stores the state of the RNG.
-//	algorithm: The RNG algorithm.
-//	shape: The shape of the output tensor.
-//
-// Returns Random values with specified shape.
-func StatefulUniform(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, optional ...StatefulUniformAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StatefulUniform",
-		Input: []tf.Input{
-			resource, algorithm, shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// TFRecordReaderV2Attr is an optional argument to TFRecordReaderV2.
-type TFRecordReaderV2Attr func(optionalAttr)
-
-// TFRecordReaderV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this reader is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func TFRecordReaderV2Container(value string) TFRecordReaderV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// TFRecordReaderV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this reader is named in the given bucket
-// with this shared_name. Otherwise, the node name is used instead.
-// If not specified, defaults to ""
-func TFRecordReaderV2SharedName(value string) TFRecordReaderV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// TFRecordReaderV2CompressionType sets the optional compression_type attribute to value.
-// If not specified, defaults to ""
-func TFRecordReaderV2CompressionType(value string) TFRecordReaderV2Attr {
-	return func(m optionalAttr) {
-		m["compression_type"] = value
-	}
-}
-
-// A Reader that outputs the records from a TensorFlow Records file.
-//
-// Returns The handle to reference the Reader.
-func TFRecordReaderV2(scope *Scope, optional ...TFRecordReaderV2Attr) (reader_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TFRecordReaderV2",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Adds a value to the current value of a variable.
-//
-// Any ReadVariableOp with a control dependency on this op is guaranteed to
-// see the incremented value or a subsequent newer one.
-//
-// Arguments:
-//	resource: handle to the resource in which to store the variable.
-//	value: the value by which the variable will be incremented.
-//
-// Returns the created operation.
-func AssignAddVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "AssignAddVariableOp",
-		Input: []tf.Input{
-			resource, value,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Returns the name of the device on which `resource` has been placed.
-func ExperimentalIteratorGetDevice(scope *Scope, resource tf.Output) (device tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalIteratorGetDevice",
-		Input: []tf.Input{
-			resource,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the element-wise sum of a list of tensors.
-//
-// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not
-// wait for all of its inputs to be ready before beginning to sum. This can
-// save memory if inputs are ready at different times, since minimum temporary
-// storage is proportional to the output size rather than the inputs size.
-//
-// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable.
-//
-// Returns a `Tensor` of same shape and type as the elements of `inputs`.
-//
-// Arguments:
-//	inputs: A list of `Tensor` objects, each with same shape and type.
-//	shape: Shape of elements of `inputs`.
-func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"shape": shape}
-	opspec := tf.OpSpec{
-		Type: "AccumulateNV2",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Conv2DAttr is an optional argument to Conv2D.
-type Conv2DAttr func(optionalAttr)
-
-// Conv2DUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value.
-// If not specified, defaults to true
-func Conv2DUseCudnnOnGpu(value bool) Conv2DAttr {
-	return func(m optionalAttr) {
-		m["use_cudnn_on_gpu"] = value
-	}
-}
-
-// Conv2DExplicitPaddings sets the optional explicit_paddings attribute to value.
-//
-// value: If `padding` is `"EXPLICIT"`, the list of explicit padding amounts. For the ith
-// dimension, the amount of padding inserted before and after the dimension is
-// `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If
-// `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty.
-// If not specified, defaults to <>
-func Conv2DExplicitPaddings(value []int64) Conv2DAttr {
-	return func(m optionalAttr) {
-		m["explicit_paddings"] = value
-	}
-}
-
-// Conv2DDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, height, width, channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, channels, height, width].
-// If not specified, defaults to "NHWC"
-func Conv2DDataFormat(value string) Conv2DAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Conv2DDilations sets the optional dilations attribute to value.
-//
-// value: 1-D tensor of length 4.  The dilation factor for each dimension of
-// `input`. If set to k > 1, there will be k-1 skipped cells between each
-// filter element on that dimension. The dimension order is determined by the
-// value of `data_format`, see above for details. Dilations in the batch and
-// depth dimensions must be 1.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 >
-func Conv2DDilations(value []int64) Conv2DAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes a 2-D convolution given 4-D `input` and `filter` tensors.
-//
-// Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
-// and a filter / kernel tensor of shape
-// `[filter_height, filter_width, in_channels, out_channels]`, this op
-// performs the following:
-//
-// 1. Flattens the filter to a 2-D matrix with shape
-//    `[filter_height * filter_width * in_channels, output_channels]`.
-// 2. Extracts image patches from the input tensor to form a *virtual*
-//    tensor of shape `[batch, out_height, out_width,
-//    filter_height * filter_width * in_channels]`.
-// 3. For each patch, right-multiplies the filter matrix and the image patch
-//    vector.
-//
-// In detail, with the default NHWC format,
-//
-//     output[b, i, j, k] =
-//         sum_{di, dj, q} input[b, strides[1] * i + di, strides[2] * j + dj, q] *
-//                         filter[di, dj, q, k]
-//
-// Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
-// horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
-//
-// Arguments:
-//	input: A 4-D tensor. The dimension order is interpreted according to the value
-// of `data_format`, see below for details.
-//	filter: A 4-D tensor of shape
-// `[filter_height, filter_width, in_channels, out_channels]`
-//	strides: 1-D tensor of length 4.  The stride of the sliding window for each
-// dimension of `input`. The dimension order is determined by the value of
-// `data_format`, see below for details.
-//	padding: The type of padding algorithm to use.
-//
-// Returns A 4-D tensor. The dimension order is determined by the value of
-// `data_format`, see below for details.
-func Conv2D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...Conv2DAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Conv2D",
-		Input: []tf.Input{
-			input, filter,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// BatchMatMulAttr is an optional argument to BatchMatMul.
-type BatchMatMulAttr func(optionalAttr)
-
-// BatchMatMulAdjX sets the optional adj_x attribute to value.
-//
-// value: If `True`, adjoint the slices of `x`. Defaults to `False`.
-// If not specified, defaults to false
-func BatchMatMulAdjX(value bool) BatchMatMulAttr {
-	return func(m optionalAttr) {
-		m["adj_x"] = value
-	}
-}
-
-// BatchMatMulAdjY sets the optional adj_y attribute to value.
-//
-// value: If `True`, adjoint the slices of `y`. Defaults to `False`.
-// If not specified, defaults to false
-func BatchMatMulAdjY(value bool) BatchMatMulAttr {
-	return func(m optionalAttr) {
-		m["adj_y"] = value
-	}
-}
-
-// Multiplies slices of two tensors in batches.
-//
-// Multiplies all slices of `Tensor` `x` and `y` (each slice can be
-// viewed as an element of a batch), and arranges the individual results
-// in a single output tensor of the same batch size. Each of the
-// individual slices can optionally be adjointed (to adjoint a matrix
-// means to transpose and conjugate it) before multiplication by setting
-// the `adj_x` or `adj_y` flag to `True`, which are by default `False`.
-//
-// The input tensors `x` and `y` are 2-D or higher with shape `[..., r_x, c_x]`
-// and `[..., r_y, c_y]`.
-//
-// The output tensor is 2-D or higher with shape `[..., r_o, c_o]`, where:
-//
-//     r_o = c_x if adj_x else r_x
-//     c_o = r_y if adj_y else c_y
-//
-// It is computed as:
-//
-//     output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :])
-//
-// Arguments:
-//	x: 2-D or higher with shape `[..., r_x, c_x]`.
-//	y: 2-D or higher with shape `[..., r_y, c_y]`.
-//
-// Returns 3-D or higher with shape `[..., r_o, c_o]`
-func BatchMatMul(scope *Scope, x tf.Output, y tf.Output, optional ...BatchMatMulAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "BatchMatMul",
-		Input: []tf.Input{
-			x, y,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MatrixSolveLsAttr is an optional argument to MatrixSolveLs.
-type MatrixSolveLsAttr func(optionalAttr)
-
-// MatrixSolveLsFast sets the optional fast attribute to value.
-// If not specified, defaults to true
-func MatrixSolveLsFast(value bool) MatrixSolveLsAttr {
-	return func(m optionalAttr) {
-		m["fast"] = value
-	}
-}
-
-// Solves one or more linear least-squares problems.
-//
-// `matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions
-// form real or complex matrices of size `[M, N]`. `Rhs` is a tensor of the same
-// type as `matrix` and shape `[..., M, K]`.
-// The output is a tensor shape `[..., N, K]` where each output matrix solves
-// each of the equations
-// `matrix[..., :, :]` * `output[..., :, :]` = `rhs[..., :, :]`
-// in the least squares sense.
-//
-// We use the following notation for (complex) matrix and right-hand sides
-// in the batch:
-//
-// `matrix`=\\(A \in \mathbb{C}^{m \times n}\\),
-// `rhs`=\\(B  \in \mathbb{C}^{m \times k}\\),
-// `output`=\\(X  \in \mathbb{C}^{n \times k}\\),
-// `l2_regularizer`=\\(\lambda \in \mathbb{R}\\).
-//
-// If `fast` is `True`, then the solution is computed by solving the normal
-// equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then
-// \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares
-// problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + \lambda ||Z||_F^2\\).
-// If \\(m \lt n\\) then `output` is computed as
-// \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the
-// minimum-norm solution to the under-determined linear system, i.e.
-// \\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\),
-// subject to \\(A Z = B\\). Notice that the fast path is only numerically stable
-// when \\(A\\) is numerically full rank and has a condition number
-// \\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or \\(\lambda\\) is
-// sufficiently large.
-//
-// If `fast` is `False` an algorithm based on the numerically robust complete
-// orthogonal decomposition is used. This computes the minimum-norm
-// least-squares solution, even when \\(A\\) is rank deficient. This path is
-// typically 6-7 times slower than the fast path. If `fast` is `False` then
-// `l2_regularizer` is ignored.
-//
-// Arguments:
-//	matrix: Shape is `[..., M, N]`.
-//	rhs: Shape is `[..., M, K]`.
-//	l2_regularizer: Scalar tensor.
-//
-// @compatibility(numpy)
-// Equivalent to np.linalg.lstsq
-// @end_compatibility
-//
-// Returns Shape is `[..., N, K]`.
-func MatrixSolveLs(scope *Scope, matrix tf.Output, rhs tf.Output, l2_regularizer tf.Output, optional ...MatrixSolveLsAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MatrixSolveLs",
-		Input: []tf.Input{
-			matrix, rhs, l2_regularizer,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// LoadTPUEmbeddingProximalAdagradParametersAttr is an optional argument to LoadTPUEmbeddingProximalAdagradParameters.
-type LoadTPUEmbeddingProximalAdagradParametersAttr func(optionalAttr)
-
-// LoadTPUEmbeddingProximalAdagradParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func LoadTPUEmbeddingProximalAdagradParametersTableId(value int64) LoadTPUEmbeddingProximalAdagradParametersAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// LoadTPUEmbeddingProximalAdagradParametersTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingProximalAdagradParametersTableName(value string) LoadTPUEmbeddingProximalAdagradParametersAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Load proximal Adagrad embedding parameters.
-//
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
-//
-// Arguments:
-//	parameters: Value of parameters used in the proximal Adagrad optimization algorithm.
-//	accumulators: Value of accumulators used in the proximal Adagrad optimization algorithm.
-//
-//
-//
-// Returns the created operation.
-func LoadTPUEmbeddingProximalAdagradParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingProximalAdagradParametersAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingProximalAdagradParameters",
-		Input: []tf.Input{
-			parameters, accumulators,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// BatchMatMulV2Attr is an optional argument to BatchMatMulV2.
-type BatchMatMulV2Attr func(optionalAttr)
-
-// BatchMatMulV2AdjX sets the optional adj_x attribute to value.
-//
-// value: If `True`, adjoint the slices of `x`. Defaults to `False`.
-// If not specified, defaults to false
-func BatchMatMulV2AdjX(value bool) BatchMatMulV2Attr {
-	return func(m optionalAttr) {
-		m["adj_x"] = value
-	}
-}
-
-// BatchMatMulV2AdjY sets the optional adj_y attribute to value.
-//
-// value: If `True`, adjoint the slices of `y`. Defaults to `False`.
-// If not specified, defaults to false
-func BatchMatMulV2AdjY(value bool) BatchMatMulV2Attr {
-	return func(m optionalAttr) {
-		m["adj_y"] = value
-	}
-}
-
-// Multiplies slices of two tensors in batches.
-//
-// Multiplies all slices of `Tensor` `x` and `y` (each slice can be
-// viewed as an element of a batch), and arranges the individual results
-// in a single output tensor of the same batch size. Each of the
-// individual slices can optionally be adjointed (to adjoint a matrix
-// means to transpose and conjugate it) before multiplication by setting
-// the `adj_x` or `adj_y` flag to `True`, which are by default `False`.
-//
-// The input tensors `x` and `y` are 2-D or higher with shape `[..., r_x, c_x]`
-// and `[..., r_y, c_y]`.
-//
-// The output tensor is 2-D or higher with shape `[..., r_o, c_o]`, where:
-//
-//     r_o = c_x if adj_x else r_x
-//     c_o = r_y if adj_y else c_y
-//
-// It is computed as:
-//
-//     output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :])
-//
-// *NOTE*: `BatchMatMulV2` supports broadcasting in the batch dimensions. More
-// about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html).
-//
-//
-// Arguments:
-//	x: 2-D or higher with shape `[..., r_x, c_x]`.
-//	y: 2-D or higher with shape `[..., r_y, c_y]`.
-//
-// Returns 3-D or higher with shape `[..., r_o, c_o]`
-func BatchMatMulV2(scope *Scope, x tf.Output, y tf.Output, optional ...BatchMatMulV2Attr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "BatchMatMulV2",
-		Input: []tf.Input{
-			x, y,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// CastAttr is an optional argument to Cast.
-type CastAttr func(optionalAttr)
-
-// CastTruncate sets the optional Truncate attribute to value.
-// If not specified, defaults to false
-func CastTruncate(value bool) CastAttr {
-	return func(m optionalAttr) {
-		m["Truncate"] = value
-	}
-}
-
-// Cast x of type SrcT to y of DstT.
-func Cast(scope *Scope, x tf.Output, DstT tf.DataType, optional ...CastAttr) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"DstT": DstT}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Cast",
-		Input: []tf.Input{
-			x,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal.
-type StatelessTruncatedNormalAttr func(optionalAttr)
-
-// StatelessTruncatedNormalDtype sets the optional dtype attribute to value.
-//
-// value: The type of the output.
-// If not specified, defaults to DT_FLOAT
-func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Outputs deterministic pseudorandom values from a truncated normal distribution.
-//
-// The generated values follow a normal distribution with mean 0 and standard
-// deviation 1, except that values whose magnitude is more than 2 standard
-// deviations from the mean are dropped and re-picked.
-//
-// The outputs are a deterministic function of `shape` and `seed`.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//	seed: 2 seeds (shape [2]).
-//
-// Returns Random values with specified shape.
-func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StatelessTruncatedNormal",
-		Input: []tf.Input{
-			shape, seed,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the reciprocal of x element-wise.
-//
-// I.e., \\(y = 1 / x\\).
-func Inv(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Inv",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the reciprocal of x element-wise.
-//
-// I.e., \\(y = 1 / x\\).
-func Reciprocal(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Reciprocal",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the gradient for the inverse of `x` wrt its input.
-//
-// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy`
-// is the corresponding input gradient.
-func ReciprocalGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ReciprocalGrad",
-		Input: []tf.Input{
-			y, dy,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// AsStringAttr is an optional argument to AsString.
-type AsStringAttr func(optionalAttr)
-
-// AsStringPrecision sets the optional precision attribute to value.
-//
-// value: The post-decimal precision to use for floating point numbers.
-// Only used if precision > -1.
-// If not specified, defaults to -1
-func AsStringPrecision(value int64) AsStringAttr {
-	return func(m optionalAttr) {
-		m["precision"] = value
-	}
-}
-
-// AsStringScientific sets the optional scientific attribute to value.
-//
-// value: Use scientific notation for floating point numbers.
-// If not specified, defaults to false
-func AsStringScientific(value bool) AsStringAttr {
-	return func(m optionalAttr) {
-		m["scientific"] = value
-	}
-}
-
-// AsStringShortest sets the optional shortest attribute to value.
-//
-// value: Use shortest representation (either scientific or standard) for
-// floating point numbers.
-// If not specified, defaults to false
-func AsStringShortest(value bool) AsStringAttr {
-	return func(m optionalAttr) {
-		m["shortest"] = value
-	}
-}
-
-// AsStringWidth sets the optional width attribute to value.
-//
-// value: Pad pre-decimal numbers to this width.
-// Applies to both floating point and integer numbers.
-// Only used if width > -1.
-// If not specified, defaults to -1
-func AsStringWidth(value int64) AsStringAttr {
-	return func(m optionalAttr) {
-		m["width"] = value
-	}
-}
-
-// AsStringFill sets the optional fill attribute to value.
-//
-// value: The value to pad if width > -1.  If empty, pads with spaces.
-// Another typical value is '0'.  String cannot be longer than 1 character.
-// If not specified, defaults to ""
-func AsStringFill(value string) AsStringAttr {
-	return func(m optionalAttr) {
-		m["fill"] = value
-	}
-}
-
-// Converts each entry in the given tensor to strings.
-//
-// Supports many numeric types and boolean.
-//
-// For Unicode, see the
-// [https://www.tensorflow.org/tutorials/representation/unicode](Working with Unicode text)
-// tutorial.
-func AsString(scope *Scope, input tf.Output, optional ...AsStringAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "AsString",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResizeNearestNeighborGradAttr is an optional argument to ResizeNearestNeighborGrad.
-type ResizeNearestNeighborGradAttr func(optionalAttr)
-
-// ResizeNearestNeighborGradAlignCorners sets the optional align_corners attribute to value.
-//
-// value: If true, the centers of the 4 corner pixels of the input and grad tensors are
-// aligned. Defaults to false.
-// If not specified, defaults to false
-func ResizeNearestNeighborGradAlignCorners(value bool) ResizeNearestNeighborGradAttr {
-	return func(m optionalAttr) {
-		m["align_corners"] = value
-	}
-}
-
-// ResizeNearestNeighborGradHalfPixelCenters sets the optional half_pixel_centers attribute to value.
-// If not specified, defaults to false
-func ResizeNearestNeighborGradHalfPixelCenters(value bool) ResizeNearestNeighborGradAttr {
-	return func(m optionalAttr) {
-		m["half_pixel_centers"] = value
-	}
-}
-
-// Computes the gradient of nearest neighbor interpolation.
-//
-// Arguments:
-//	grads: 4-D with shape `[batch, height, width, channels]`.
-//	size: = A 1-D int32 Tensor of 2 elements: `orig_height, orig_width`. The
-// original input size.
-//
-// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`. Gradients
-// with respect to the input image.
-func ResizeNearestNeighborGrad(scope *Scope, grads tf.Output, size tf.Output, optional ...ResizeNearestNeighborGradAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResizeNearestNeighborGrad",
-		Input: []tf.Input{
-			grads, size,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// UpperBoundAttr is an optional argument to UpperBound.
-type UpperBoundAttr func(optionalAttr)
-
-// UpperBoundOutType sets the optional out_type attribute to value.
-// If not specified, defaults to DT_INT32
-func UpperBoundOutType(value tf.DataType) UpperBoundAttr {
-	return func(m optionalAttr) {
-		m["out_type"] = value
-	}
-}
-
-// Applies upper_bound(sorted_search_values, values) along each row.
-//
-// Each set of rows with the same index in (sorted_inputs, values) is treated
-// independently.  The resulting row is the equivalent of calling
-// `np.searchsorted(sorted_inputs, values, side='right')`.
-//
-// The result is not a global index to the entire
-// `Tensor`, but rather just the index in the last dimension.
-//
-// A 2-D example:
-//   sorted_sequence = [[0, 3, 9, 9, 10],
-//                      [1, 2, 3, 4, 5]]
-//   values = [[2, 4, 9],
-//             [0, 2, 6]]
-//
-//   result = UpperBound(sorted_sequence, values)
-//
-//   result == [[1, 2, 4],
-//              [0, 2, 5]]
-//
-// Arguments:
-//	sorted_inputs: 2-D Tensor where each row is ordered.
-//	values: 2-D Tensor with the same numbers of rows as `sorted_search_values`. Contains
-// the values that will be searched for in `sorted_search_values`.
-//
-// Returns A `Tensor` with the same shape as `values`.  It contains the last scalar index
-// into the last dimension where values can be inserted without changing the
-// ordered property.
-func UpperBound(scope *Scope, sorted_inputs tf.Output, values tf.Output, optional ...UpperBoundAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "UpperBound",
-		Input: []tf.Input{
-			sorted_inputs, values,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Makes the summary of quantiles for the batch.
-//
-// An op that takes a list of tensors (one tensor per feature) and outputs the
-// quantile summaries for each tensor.
-//
-// Arguments:
-//	float_values: float; List of Rank 1 Tensors each containing values for a single feature.
-//	example_weights: float; Rank 1 Tensor with weights per instance.
-//	epsilon: float; The required maximum approximation error.
-//
-// Returns float; List of Rank 2 Tensors each containing the quantile summary
-// (value, weight, min_rank, max_rank) of a single feature.
-func BoostedTreesMakeQuantileSummaries(scope *Scope, float_values []tf.Output, example_weights tf.Output, epsilon tf.Output) (summaries []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesMakeQuantileSummaries",
-		Input: []tf.Input{
-			tf.OutputList(float_values), example_weights, epsilon,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if summaries, idx, err = makeOutputList(op, idx, "summaries"); err != nil {
-		scope.UpdateErr("BoostedTreesMakeQuantileSummaries", err)
-		return
-	}
-	return summaries
-}
-
-// Computes square of x element-wise.
-//
-// I.e., \\(y = x * x = x^2\\).
-func Square(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Square",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns which elements of x are NaN.
-//
-// @compatibility(numpy)
-// Equivalent to np.isnan
-// @end_compatibility
-func IsNan(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "IsNan",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the gradient for the sqrt of `x` wrt its input.
-//
-// Specifically, `grad = dy * 0.5 / y`, where `y = sqrt(x)`, and `dy`
-// is the corresponding input gradient.
-func SqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SqrtGrad",
-		Input: []tf.Input{
-			y, dy,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
 // LRNAttr is an optional argument to LRN.
 type LRNAttr func(optionalAttr)
 
@@ -32290,35 +22800,75 @@ func LRN(scope *Scope, input tf.Output, optional ...LRNAttr) (output tf.Output)
 	return op.Output(0)
 }
 
-// MatrixSolveAttr is an optional argument to MatrixSolve.
-type MatrixSolveAttr func(optionalAttr)
+// QuantizedMatMulWithBiasAndReluAndRequantizeAttr is an optional argument to QuantizedMatMulWithBiasAndReluAndRequantize.
+type QuantizedMatMulWithBiasAndReluAndRequantizeAttr func(optionalAttr)
 
-// MatrixSolveAdjoint sets the optional adjoint attribute to value.
-//
-// value: Boolean indicating whether to solve with `matrix` or its (block-wise)
-// adjoint.
-// If not specified, defaults to false
-func MatrixSolveAdjoint(value bool) MatrixSolveAttr {
+// QuantizedMatMulWithBiasAndReluAndRequantizeToutput sets the optional Toutput attribute to value.
+// If not specified, defaults to DT_QUINT8
+func QuantizedMatMulWithBiasAndReluAndRequantizeToutput(value tf.DataType) QuantizedMatMulWithBiasAndReluAndRequantizeAttr {
 	return func(m optionalAttr) {
-		m["adjoint"] = value
+		m["Toutput"] = value
 	}
 }
 
-// Solves systems of linear equations.
+// QuantizedMatMulWithBiasAndReluAndRequantizeTransposeA sets the optional transpose_a attribute to value.
 //
-// `Matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-// form square matrices. `Rhs` is a tensor of shape `[..., M, K]`. The `output` is
-// a tensor shape `[..., M, K]`.  If `adjoint` is `False` then each output matrix
-// satisfies `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`.
-// If `adjoint` is `True` then each output matrix satisfies
-// `adjoint(matrix[..., :, :]) * output[..., :, :] = rhs[..., :, :]`.
+// value: If true, `a` is transposed before multiplication.
+// If not specified, defaults to false
+func QuantizedMatMulWithBiasAndReluAndRequantizeTransposeA(value bool) QuantizedMatMulWithBiasAndReluAndRequantizeAttr {
+	return func(m optionalAttr) {
+		m["transpose_a"] = value
+	}
+}
+
+// QuantizedMatMulWithBiasAndReluAndRequantizeTransposeB sets the optional transpose_b attribute to value.
+//
+// value: If true, `b` is transposed before multiplication.
+// If not specified, defaults to false
+func QuantizedMatMulWithBiasAndReluAndRequantizeTransposeB(value bool) QuantizedMatMulWithBiasAndReluAndRequantizeAttr {
+	return func(m optionalAttr) {
+		m["transpose_b"] = value
+	}
+}
+
+// QuantizedMatMulWithBiasAndReluAndRequantizeInputQuantMode sets the optional input_quant_mode attribute to value.
+//
+// value: Input data quantization mode. Either MIN_FIRST(default) or SCALED.
+// If not specified, defaults to "MIN_FIRST"
+func QuantizedMatMulWithBiasAndReluAndRequantizeInputQuantMode(value string) QuantizedMatMulWithBiasAndReluAndRequantizeAttr {
+	return func(m optionalAttr) {
+		m["input_quant_mode"] = value
+	}
+}
+
+// Perform a quantized matrix multiplication of  `a` by the matrix `b` with bias
+// add and relu and requantize fusion.
+//
+// The inputs must be two-dimensional matrices and 1D bias vector. And the inner
+// dimension of `a` (after being transposed if `transpose_a` is non-zero) must
+// match the outer dimension of `b` (after being transposed if `transposed_b` is
+// non-zero). Then do broadcast add operation with bias values on the matrix
+// mulplication result. The bias size must match inner dimension of `b`.  Then do
+// relu activation to get non-negative result. Then do requantize operation to get
+// final uint8 result.
 //
 // Arguments:
-//	matrix: Shape is `[..., M, M]`.
-//	rhs: Shape is `[..., M, K]`.
+//	a: A matrix to be multiplied. Must be a two-dimensional tensor of type `quint8`.
+//	b: A matrix to be multiplied and must be a two-dimensional tensor of type `qint8`.
+//	bias: A 1D bias tensor with size matching with inner dimension of `b` (after being
+// transposed if `transposed_b` is non-zero).
+//	min_a: The float value that the lowest quantized `a` value represents.
+//	max_a: The float value that the highest quantized `a` value represents.
+//	min_b: The float value that the lowest quantized `b` value represents.
+//	max_b: The float value that the highest quantized `b` value represents.
+//	min_freezed_output: The float value that the highest quantized output value after requantize.
 //
-// Returns Shape is `[..., M, K]`.
-func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixSolveAttr) (output tf.Output) {
+//
+// Returns:
+//	out
+//	min_out: The float value that the lowest quantized output value represents.
+//	max_out: The float value that the highest quantized output value represents.
+func QuantizedMatMulWithBiasAndReluAndRequantize(scope *Scope, a tf.Output, b tf.Output, bias tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, min_freezed_output tf.Output, max_freezed_output tf.Output, optional ...QuantizedMatMulWithBiasAndReluAndRequantizeAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -32327,59 +22877,209 @@ func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...Matr
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MatrixSolve",
+		Type: "QuantizedMatMulWithBiasAndReluAndRequantize",
 		Input: []tf.Input{
-			matrix, rhs,
+			a, b, bias, min_a, max_a, min_b, max_b, min_freezed_output, max_freezed_output,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Rounds the values of a tensor to the nearest integer, element-wise.
-//
-// Rounds half to even.  Also known as bankers rounding. If you want to round
-// according to the current system rounding mode use std::cint.
-func Round(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
+// QuantizedMatMulWithBiasAndReluAttr is an optional argument to QuantizedMatMulWithBiasAndRelu.
+type QuantizedMatMulWithBiasAndReluAttr func(optionalAttr)
+
+// QuantizedMatMulWithBiasAndReluToutput sets the optional Toutput attribute to value.
+// If not specified, defaults to DT_QINT32
+func QuantizedMatMulWithBiasAndReluToutput(value tf.DataType) QuantizedMatMulWithBiasAndReluAttr {
+	return func(m optionalAttr) {
+		m["Toutput"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "Round",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
 }
 
-// Creates a dataset that shards the input dataset.
+// QuantizedMatMulWithBiasAndReluTransposeA sets the optional transpose_a attribute to value.
 //
-// Creates a dataset that shards the input dataset by num_workers, returning a
-// sharded dataset for the index-th worker. This attempts to automatically shard
-// a dataset by examining the Dataset graph and inserting a shard op before the
-// inputs to a reader Dataset (e.g. CSVDataset, TFRecordDataset).
+// value: If true, `a` is transposed before multiplication.
+// If not specified, defaults to false
+func QuantizedMatMulWithBiasAndReluTransposeA(value bool) QuantizedMatMulWithBiasAndReluAttr {
+	return func(m optionalAttr) {
+		m["transpose_a"] = value
+	}
+}
+
+// QuantizedMatMulWithBiasAndReluTransposeB sets the optional transpose_b attribute to value.
 //
-// This dataset will throw a NotFound error if we cannot shard the dataset
-// automatically.
+// value: If true, `b` is transposed before multiplication.
+// If not specified, defaults to false
+func QuantizedMatMulWithBiasAndReluTransposeB(value bool) QuantizedMatMulWithBiasAndReluAttr {
+	return func(m optionalAttr) {
+		m["transpose_b"] = value
+	}
+}
+
+// QuantizedMatMulWithBiasAndReluInputQuantMode sets the optional input_quant_mode attribute to value.
+//
+// value: Input data quantization mode. Either MIN_FIRST(default) or SCALED.
+// If not specified, defaults to "MIN_FIRST"
+func QuantizedMatMulWithBiasAndReluInputQuantMode(value string) QuantizedMatMulWithBiasAndReluAttr {
+	return func(m optionalAttr) {
+		m["input_quant_mode"] = value
+	}
+}
+
+// Perform a quantized matrix multiplication of  `a` by the matrix `b` with bias
+// add and relu fusion.
+//
+// The inputs must be two-dimensional matrices and 1D bias vector. And the inner
+// dimension of `a` (after being transposed if `transpose_a` is non-zero) must
+// match the outer dimension of `b` (after being transposed if `transposed_b` is
+// non-zero). Then do broadcast add operation with bias values on the matrix
+// mulplication result. The bias size must match inner dimension of `b`. Then do
+// relu activation to get non-negative result.
 //
 // Arguments:
-//	input_dataset: A variant tensor representing the input dataset.
-//	num_workers: A scalar representing the number of workers to distribute this dataset across.
-//	index: A scalar representing the index of the current worker out of num_workers.
+//	a: A matrix to be multiplied. Must be a two-dimensional tensor of type `quint8`.
+//	b: A matrix to be multiplied and must be a two-dimensional tensor of type `qint8`.
+//	bias: A 1D bias tensor with size matching with inner dimension of `b` (after being
+// transposed if `transposed_b` is non-zero).
+//	min_a: The float value that the lowest quantized `a` value represents.
+//	max_a: The float value that the highest quantized `a` value represents.
+//	min_b: The float value that the lowest quantized `b` value represents.
+//	max_b: The float value that the highest quantized `b` value represents.
 //
-//
-func ExperimentalAutoShardDataset(scope *Scope, input_dataset tf.Output, num_workers tf.Output, index tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+// Returns:
+//	out
+//	min_out: The float value that the lowest quantized output value represents.
+//	max_out: The float value that the highest quantized output value represents.
+func QuantizedMatMulWithBiasAndRelu(scope *Scope, a tf.Output, b tf.Output, bias tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, optional ...QuantizedMatMulWithBiasAndReluAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "ExperimentalAutoShardDataset",
+		Type: "QuantizedMatMulWithBiasAndRelu",
 		Input: []tf.Input{
-			input_dataset, num_workers, index,
+			a, b, bias, min_a, max_a, min_b, max_b,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// QuantizedMatMulWithBiasAttr is an optional argument to QuantizedMatMulWithBias.
+type QuantizedMatMulWithBiasAttr func(optionalAttr)
+
+// QuantizedMatMulWithBiasToutput sets the optional Toutput attribute to value.
+// If not specified, defaults to DT_QINT32
+func QuantizedMatMulWithBiasToutput(value tf.DataType) QuantizedMatMulWithBiasAttr {
+	return func(m optionalAttr) {
+		m["Toutput"] = value
+	}
+}
+
+// QuantizedMatMulWithBiasTransposeA sets the optional transpose_a attribute to value.
+//
+// value: If true, `a` is transposed before multiplication.
+// If not specified, defaults to false
+func QuantizedMatMulWithBiasTransposeA(value bool) QuantizedMatMulWithBiasAttr {
+	return func(m optionalAttr) {
+		m["transpose_a"] = value
+	}
+}
+
+// QuantizedMatMulWithBiasTransposeB sets the optional transpose_b attribute to value.
+//
+// value: If true, `b` is transposed before multiplication.
+// If not specified, defaults to false
+func QuantizedMatMulWithBiasTransposeB(value bool) QuantizedMatMulWithBiasAttr {
+	return func(m optionalAttr) {
+		m["transpose_b"] = value
+	}
+}
+
+// QuantizedMatMulWithBiasInputQuantMode sets the optional input_quant_mode attribute to value.
+//
+// value: Input data quantization mode. Either MIN_FIRST(default) or SCALED.
+// If not specified, defaults to "MIN_FIRST"
+func QuantizedMatMulWithBiasInputQuantMode(value string) QuantizedMatMulWithBiasAttr {
+	return func(m optionalAttr) {
+		m["input_quant_mode"] = value
+	}
+}
+
+// Performs a quantized matrix multiplication of `a` by the matrix `b` with bias
+// add.
+//
+// The inputs must be two-dimensional matrices and 1D bias vector. And the inner
+// dimension of `a` (after being transposed if `transpose_a` is non-zero) must
+// match the outer dimension of `b` (after being transposed if `transposed_b` is
+// non-zero). Then do broadcast add operation with bias values on the matrix
+// mulplication result. The bias size must match inner dimension of `b`.
+//
+// Arguments:
+//	a: A matrix to be multiplied. Must be a two-dimensional tensor of type `quint8`.
+//	b: A matrix to be multiplied and must be a two-dimensional tensor of type `qint8`.
+//	bias: A 1D bias tensor with size matching inner dimension of `b` (after being
+// transposed if `transposed_b` is non-zero).
+//	min_a: The float value that the lowest quantized `a` value represents.
+//	max_a: The float value that the highest quantized `a` value represents.
+//	min_b: The float value that the lowest quantized `b` value represents.
+//	max_b: The float value that the highest quantized `b` value represents.
+//
+// Returns:
+//	out
+//	min_out: The float value that the lowest quantized output value represents.
+//	max_out: The float value that the highest quantized output value represents.
+func QuantizedMatMulWithBias(scope *Scope, a tf.Output, b tf.Output, bias tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, optional ...QuantizedMatMulWithBiasAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizedMatMulWithBias",
+		Input: []tf.Input{
+			a, b, bias, min_a, max_a, min_b, max_b,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// TensorArrayGatherV2Attr is an optional argument to TensorArrayGatherV2.
+type TensorArrayGatherV2Attr func(optionalAttr)
+
+// TensorArrayGatherV2ElementShape sets the optional element_shape attribute to value.
+// If not specified, defaults to {unknown_rank:true}
+func TensorArrayGatherV2ElementShape(value tf.Shape) TensorArrayGatherV2Attr {
+	return func(m optionalAttr) {
+		m["element_shape"] = value
+	}
+}
+
+// Deprecated. Use TensorArrayGatherV3
+//
+// DEPRECATED at GraphDef version 26: Use TensorArrayGatherV3
+func TensorArrayGatherV2(scope *Scope, handle tf.Output, indices tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayGatherV2Attr) (value tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayGatherV2",
+		Input: []tf.Input{
+			handle, indices, flow_in,
 		},
 		Attrs: attrs,
 	}
@@ -32387,1061 +23087,254 @@ func ExperimentalAutoShardDataset(scope *Scope, input_dataset tf.Output, num_wor
 	return op.Output(0)
 }
 
-// Gives a guarantee to the TF runtime that the input tensor is a constant.
+// RFFT3DAttr is an optional argument to RFFT3D.
+type RFFT3DAttr func(optionalAttr)
+
+// RFFT3DTcomplex sets the optional Tcomplex attribute to value.
+// If not specified, defaults to DT_COMPLEX64
+func RFFT3DTcomplex(value tf.DataType) RFFT3DAttr {
+	return func(m optionalAttr) {
+		m["Tcomplex"] = value
+	}
+}
+
+// 3D real-valued fast Fourier transform.
 //
-// The runtime is then free to make optimizations based on this.
+// Computes the 3-dimensional discrete Fourier transform of a real-valued signal
+// over the inner-most 3 dimensions of `input`.
 //
-// Only accepts value typed tensors as inputs and rejects resource variable handles
-// as input.
+// Since the DFT of a real signal is Hermitian-symmetric, `RFFT3D` only returns the
+// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension
+// of `output`: the zero-frequency term, followed by the `fft_length / 2`
+// positive-frequency terms.
 //
-// Returns the input tensor without modification.
-func GuaranteeConst(scope *Scope, input tf.Output) (output tf.Output) {
+// Along each axis `RFFT3D` is computed on, if `fft_length` is smaller than the
+// corresponding dimension of `input`, the dimension is cropped. If it is larger,
+// the dimension is padded with zeros.
+//
+// Arguments:
+//	input: A float32 tensor.
+//	fft_length: An int32 tensor of shape [3]. The FFT length for each dimension.
+//
+// Returns A complex64 tensor of the same rank as `input`. The inner-most 3
+//   dimensions of `input` are replaced with the their 3D Fourier transform. The
+//   inner-most dimension contains `fft_length / 2 + 1` unique frequency
+//   components.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.rfftn with 3 dimensions.
+// @end_compatibility
+func RFFT3D(scope *Scope, input tf.Output, fft_length tf.Output, optional ...RFFT3DAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RFFT3D",
+		Input: []tf.Input{
+			input, fft_length,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Reorders a SparseTensor into the canonical, row-major ordering.
+//
+// Note that by convention, all sparse ops preserve the canonical ordering along
+// increasing dimension number. The only time ordering can be violated is during
+// manual manipulation of the indices and values vectors to add entries.
+//
+// Reordering does not affect the shape of the SparseTensor.
+//
+// If the tensor has rank `R` and `N` non-empty values, `input_indices` has
+// shape `[N, R]`, input_values has length `N`, and input_shape has length `R`.
+//
+// Arguments:
+//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+//	input_shape: 1-D.  Shape of the input SparseTensor.
+//
+// Returns:
+//	output_indices: 2-D.  `N x R` matrix with the same indices as input_indices, but
+// in canonical row-major ordering.
+//	output_values: 1-D.  `N` non-empty values corresponding to `output_indices`.
+func SparseReorder(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "GuaranteeConst",
+		Type: "SparseReorder",
+		Input: []tf.Input{
+			input_indices, input_values, input_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Generates fingerprint values.
+//
+// Generates fingerprint values of `data`.
+//
+// Fingerprint op considers the first dimension of `data` as the batch dimension,
+// and `output[i]` contains the fingerprint value generated from contents in
+// `data[i, ...]` for all `i`.
+//
+// Fingerprint op writes fingerprint values as byte arrays. For example, the
+// default method `farmhash64` generates a 64-bit fingerprint value at a time.
+// This 8-byte value is written out as an `uint8` array of size 8, in little-endian
+// order.
+//
+// For example, suppose that `data` has data type `DT_INT32` and shape (2, 3, 4),
+// and that the fingerprint method is `farmhash64`. In this case, the output shape
+// is (2, 8), where 2 is the batch dimension size of `data`, and 8 is the size of
+// each fingerprint value in bytes. `output[0, :]` is generated from 12 integers in
+// `data[0, :, :]` and similarly `output[1, :]` is generated from other 12 integers
+// in `data[1, :, :]`.
+//
+// Note that this op fingerprints the raw underlying buffer, and it does not
+// fingerprint Tensor's metadata such as data type and/or shape. For example, the
+// fingerprint values are invariant under reshapes and bitcasts as long as the
+// batch dimension remain the same:
+//
+// ```
+// Fingerprint(data) == Fingerprint(Reshape(data, ...))
+// Fingerprint(data) == Fingerprint(Bitcast(data, ...))
+// ```
+//
+// For string data, one should expect `Fingerprint(data) !=
+// Fingerprint(ReduceJoin(data))` in general.
+//
+// Arguments:
+//	data: Must have rank 1 or higher.
+//	method: Fingerprint method used by this op. Currently available method is
+// `farmhash::fingerprint64`.
+//
+// Returns A two-dimensional `Tensor` of type `tf.uint8`. The first dimension equals to
+// `data`'s first dimension, and the second dimension size depends on the
+// fingerprint algorithm.
+func Fingerprint(scope *Scope, data tf.Output, method tf.Output) (fingerprint tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Fingerprint",
+		Input: []tf.Input{
+			data, method,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// CopyAttr is an optional argument to Copy.
+type CopyAttr func(optionalAttr)
+
+// CopyTensorName sets the optional tensor_name attribute to value.
+//
+// value: The name of the input tensor.
+// If not specified, defaults to ""
+func CopyTensorName(value string) CopyAttr {
+	return func(m optionalAttr) {
+		m["tensor_name"] = value
+	}
+}
+
+// CopyDebugOpsSpec sets the optional debug_ops_spec attribute to value.
+//
+// value: A list of debug op spec (op, url, gated_grpc) for attached debug
+// ops. Each element of the list has the format
+// <debug_op>;<grpc_url>;<gated_grpc>, wherein gated_grpc is boolean represented
+// as 0/1. E.g., "DebugIdentity;grpc://foo:3333;1",
+// "DebugIdentity;file:///tmp/tfdbg_1;0".
+// If not specified, defaults to {}
+func CopyDebugOpsSpec(value []string) CopyAttr {
+	return func(m optionalAttr) {
+		m["debug_ops_spec"] = value
+	}
+}
+
+// Copy a tensor from CPU-to-CPU or GPU-to-GPU.
+//
+// Performs CPU-to-CPU or GPU-to-GPU deep-copying of tensor, depending on the
+// device on which the tensor is allocated.
+// N.B.: If the all downstream attached debug ops are disabled given the current
+// gRPC gating status, the output will simply forward the input tensor without
+// deep-copying. See the documentation of Debug* ops for more details.
+//
+// Unlike the CopyHost Op, this op does not have HostMemory constraint on its
+// input or output.
+//
+// Arguments:
+//	input: Input tensor.
+func Copy(scope *Scope, input tf.Output, optional ...CopyAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Copy",
 		Input: []tf.Input{
 			input,
 		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes hyperbolic tangent of `x` element-wise.
-//
-//   Given an input tensor, this function computes hyperbolic tangent of every
-//   element in the tensor. Input range is `[-inf, inf]` and
-//   output range is `[-1,1]`.
-//
-//   ```python
-//   x = tf.constant([-float("inf"), -5, -0.5, 1, 1.2, 2, 3, float("inf")])
-//   tf.math.tanh(x) ==> [-1. -0.99990916 -0.46211717 0.7615942 0.8336547 0.9640276 0.9950547 1.]
-//   ```
-func Tanh(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Tanh",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// The shape of the elements of the given list, as a tensor.
-//
-//   input_handle: the list
-//   element_shape: the shape of elements of the list
-func TensorListElementShape(scope *Scope, input_handle tf.Output, shape_type tf.DataType) (element_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"shape_type": shape_type}
-	opspec := tf.OpSpec{
-		Type: "TensorListElementShape",
-		Input: []tf.Input{
-			input_handle,
-		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes inverse hyperbolic cosine of x element-wise.
+//     Updates specified rows with values in `v`.
 //
-// Given an input tensor, the function computes inverse hyperbolic cosine of every element.
-// Input range is `[1, inf]`. It returns `nan` if the input lies outside the range.
-//
-// ```python
-// x = tf.constant([-2, -0.5, 1, 1.2, 200, 10000, float("inf")])
-// tf.math.acosh(x) ==> [nan nan 0. 0.62236255 5.9914584 9.903487 inf]
-// ```
-func Acosh(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Acosh",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the gradient for the tanh of `x` wrt its input.
-//
-// Specifically, `grad = dy * (1 - y*y)`, where `y = tanh(x)`, and `dy`
-// is the corresponding input gradient.
-func TanhGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TanhGrad",
-		Input: []tf.Input{
-			y, dy,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RestoreAttr is an optional argument to Restore.
-type RestoreAttr func(optionalAttr)
-
-// RestorePreferredShard sets the optional preferred_shard attribute to value.
-//
-// value: Index of file to open first if multiple files match
-// `file_pattern`.
-// If not specified, defaults to -1
-func RestorePreferredShard(value int64) RestoreAttr {
-	return func(m optionalAttr) {
-		m["preferred_shard"] = value
-	}
-}
-
-// Restores a tensor from checkpoint files.
-//
-// Reads a tensor stored in one or several files. If there are several files (for
-// instance because a tensor was saved as slices), `file_pattern` may contain
-// wildcard symbols (`*` and `?`) in the filename portion only, not in the
-// directory portion.
-//
-// If a `file_pattern` matches several files, `preferred_shard` can be used to hint
-// in which file the requested tensor is likely to be found. This op will first
-// open the file at index `preferred_shard` in the list of matching files and try
-// to restore tensors from that file.  Only if some tensors or tensor slices are
-// not found in that first file, then the Op opens all the files. Setting
-// `preferred_shard` to match the value passed as the `shard` input
-// of a matching `Save` Op may speed up Restore.  This attribute only affects
-// performance, not correctness.  The default value -1 means files are processed in
-// order.
-//
-// See also `RestoreSlice`.
+//     Computes `x[i, :] = v; return x`.
 //
 // Arguments:
-//	file_pattern: Must have a single element. The pattern of the files from
-// which we read the tensor.
-//	tensor_name: Must have a single element. The name of the tensor to be
-// restored.
-//	dt: The type of the tensor to be restored.
+//	x: A tensor of type `T`.
+//	i: A vector. Indices into the left-most dimension of `x`.
+//	v: A `Tensor` of type T. Same dimension sizes as x except the first dimension, which must be the same as i's size.
 //
-// Returns The restored tensor.
-func Restore(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, dt tf.DataType, optional ...RestoreAttr) (tensor tf.Output) {
+// Returns A `Tensor` of type T. An alias of `x`. The content of `y` is undefined if there are duplicates in `i`.
+func InplaceUpdate(scope *Scope, x tf.Output, i tf.Output, v tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"dt": dt}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "Restore",
+		Type: "InplaceUpdate",
 		Input: []tf.Input{
-			file_pattern, tensor_name,
+			x, i, v,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Compute the pairwise cross product.
-//
-// `a` and `b` must be the same shape; they can either be simple 3-element vectors,
-// or any shape where the innermost dimension is 3. In the latter case, each pair
-// of corresponding 3-element vectors is cross-multiplied independently.
+// Table initializer that takes two tensors for keys and values respectively.
 //
 // Arguments:
-//	a: A tensor containing 3-element vectors.
-//	b: Another tensor, of same type and shape as `a`.
+//	table_handle: Handle to a table which will be initialized.
+//	keys: Keys of type Tkey.
+//	values: Values of type Tval.
 //
-// Returns Pairwise cross product of the vectors in `a` and `b`.
-func Cross(scope *Scope, a tf.Output, b tf.Output) (product tf.Output) {
+// Returns the created operation.
+func InitializeTableV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Cross",
+		Type: "InitializeTableV2",
 		Input: []tf.Input{
-			a, b,
+			table_handle, keys, values,
 		},
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes Psi, the derivative of Lgamma (the log of the absolute value of
-//
-// `Gamma(x)`), element-wise.
-func Digamma(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Digamma",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the Gauss error function of `x` element-wise.
-func Erf(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Erf",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that splits a SparseTensor into elements row-wise.
-func SparseTensorSliceDataset(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseTensorSliceDataset",
-		Input: []tf.Input{
-			indices, values, dense_shape,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// DecodeRawAttr is an optional argument to DecodeRaw.
-type DecodeRawAttr func(optionalAttr)
-
-// DecodeRawLittleEndian sets the optional little_endian attribute to value.
-//
-// value: Whether the input `bytes` are in little-endian order.
-// Ignored for `out_type` values that are stored in a single byte like
-// `uint8`.
-// If not specified, defaults to true
-func DecodeRawLittleEndian(value bool) DecodeRawAttr {
-	return func(m optionalAttr) {
-		m["little_endian"] = value
-	}
-}
-
-// Reinterpret the bytes of a string as a vector of numbers.
-//
-// Arguments:
-//	bytes: All the elements must have the same length.
-//
-//
-// Returns A Tensor with one more dimension than the input `bytes`.  The
-// added dimension will have size equal to the length of the elements
-// of `bytes` divided by the number of bytes to represent `out_type`.
-func DecodeRaw(scope *Scope, bytes tf.Output, out_type tf.DataType, optional ...DecodeRawAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"out_type": out_type}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DecodeRaw",
-		Input: []tf.Input{
-			bytes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the gradient of the sigmoid of `x` wrt its input.
-//
-// Specifically, `grad = dy * y * (1 - y)`, where `y = sigmoid(x)`, and
-// `dy` is the corresponding input gradient.
-func SigmoidGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SigmoidGrad",
-		Input: []tf.Input{
-			y, dy,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes sine of x element-wise.
-//
-//   Given an input tensor, this function computes sine of every
-//   element in the tensor. Input range is `(-inf, inf)` and
-//   output range is `[-1,1]`.
-//
-//   ```python
-//   x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 200, 10, float("inf")])
-//   tf.math.sin(x) ==> [nan -0.4121185 -0.47942555 0.84147096 0.9320391 -0.87329733 -0.54402107 nan]
-//   ```
-func Sin(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Sin",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes cos of x element-wise.
-//
-//   Given an input tensor, this function computes cosine of every
-//   element in the tensor. Input range is `(-inf, inf)` and
-//   output range is `[-1,1]`. If input lies outside the boundary, `nan`
-//   is returned.
-//
-//   ```python
-//   x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 200, 10000, float("inf")])
-//   tf.math.cos(x) ==> [nan -0.91113025 0.87758255 0.5403023 0.36235774 0.48718765 -0.95215535 nan]
-//   ```
-func Cos(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Cos",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes acos of x element-wise.
-func Acos(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Acos",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the trignometric inverse tangent of x element-wise.
-//
-// The `tf.math.atan` operation returns the inverse of `tf.math.tan`, such that
-// if `y = tf.math.tan(x)` then, `x = tf.math.atan(y)`.
-//
-// **Note**: The output of `tf.math.atan` will lie within the invertible range
-// of tan, i.e (-pi/2, pi/2).
-//
-// For example:
-//
-// ```python
-// # Note: [1.047, 0.785] ~= [(pi/3), (pi/4)]
-// x = tf.constant([1.047, 0.785])
-// y = tf.math.tan(x) # [1.731261, 0.99920404]
-//
-// tf.math.atan(y) # [1.047, 0.785] = x
-// ```
-//
-func Atan(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Atan",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the Bessel i0e function of `x` element-wise.
-//
-// Exponentially scaled modified Bessel function of order 0 defined as
-// `bessel_i0e(x) = exp(-abs(x)) bessel_i0(x)`.
-//
-// This function is faster and numerically stabler than `bessel_i0(x)`.
-func BesselI0e(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BesselI0e",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns element-wise largest integer not greater than x.
-func Floor(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Floor",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns element-wise smallest integer not less than x.
-func Ceil(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Ceil",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns x + y element-wise.
-//
-// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Add(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Add",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Invert (flip) each bit of supported types; for example, type `uint8` value 01010101 becomes 10101010.
-//
-// Flip each bit of supported types.  For example, type `int8` (decimal 2) binary 00000010 becomes (decimal -3) binary 11111101.
-// This operation is performed on each element of the tensor argument `x`.
-//
-// Example:
-// ```python
-// import tensorflow as tf
-// from tensorflow.python.ops import bitwise_ops
-//
-// # flip 2 (00000010) to -3 (11111101)
-// tf.assert_equal(-3, bitwise_ops.invert(2))
-//
-// dtype_list = [dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
-//               dtypes.uint8, dtypes.uint16, dtypes.uint32, dtypes.uint64]
-//
-// inputs = [0, 5, 3, 14]
-// for dtype in dtype_list:
-//   # Because of issues with negative numbers, let's test this indirectly.
-//   # 1. invert(a) and a = 0
-//   # 2. invert(a) or a = invert(0)
-//   input_tensor = tf.constant([0, 5, 3, 14], dtype=dtype)
-//   not_a_and_a, not_a_or_a, not_0 = [bitwise_ops.bitwise_and(
-//                                       input_tensor, bitwise_ops.invert(input_tensor)),
-//                                     bitwise_ops.bitwise_or(
-//                                       input_tensor, bitwise_ops.invert(input_tensor)),
-//                                     bitwise_ops.invert(
-//                                       tf.constant(0, dtype=dtype))]
-//
-//   expected = tf.constant([0, 0, 0, 0], dtype=tf.float32)
-//   tf.assert_equal(tf.cast(not_a_and_a, tf.float32), expected)
-//
-//   expected = tf.cast([not_0] * 4, tf.float32)
-//   tf.assert_equal(tf.cast(not_a_or_a, tf.float32), expected)
-//
-//   # For unsigned dtypes let's also check the result directly.
-//   if dtype.is_unsigned:
-//     inverted = bitwise_ops.invert(input_tensor)
-//     expected = tf.constant([dtype.max - x for x in inputs], dtype=tf.float32)
-//     tf.assert_equal(tf.cast(inverted, tf.float32), tf.cast(expected, tf.float32))
-// ```
-func Invert(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Invert",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns x + y element-wise.
-//
-// *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func AddV2(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "AddV2",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Constructs an Optional variant from a tuple of tensors.
-func OptionalFromValue(scope *Scope, components []tf.Output) (optional tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "OptionalFromValue",
-		Input: []tf.Input{
-			tf.OutputList(components),
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Scatter `updates` into an existing tensor according to `indices`.
-//
-// This operation creates a new tensor by applying sparse `updates` to the passed
-// in `tensor`.
-// This operation is very similar to `tf.scatter_nd`, except that the updates are
-// scattered onto an existing tensor (as opposed to a zero-tensor). If the memory
-// for the existing tensor cannot be re-used, a copy is made and updated.
-//
-// If `indices` contains duplicates, then their updates are accumulated (summed).
-//
-// **WARNING**: The order in which updates are applied is nondeterministic, so the
-// output will be nondeterministic if `indices` contains duplicates -- because
-// of some numerical approximation issues, numbers summed in different order
-// may yield different results.
-//
-// `indices` is an integer tensor containing indices into a new tensor of shape
-// `shape`.  The last dimension of `indices` can be at most the rank of `shape`:
-//
-//     indices.shape[-1] <= shape.rank
-//
-// The last dimension of `indices` corresponds to indices into elements
-// (if `indices.shape[-1] = shape.rank`) or slices
-// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of
-// `shape`.  `updates` is a tensor with shape
-//
-//     indices.shape[:-1] + shape[indices.shape[-1]:]
-//
-// The simplest form of scatter is to insert individual elements in a tensor by
-// index. For example, say we want to insert 4 scattered elements in a rank-1
-// tensor with 8 elements.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/ScatterNd1.png" alt>
-// </div>
-//
-// In Python, this scatter operation would look like this:
-//
-// ```python
-//     indices = tf.constant([[4], [3], [1], [7]])
-//     updates = tf.constant([9, 10, 11, 12])
-//     tensor = tf.ones([8], dtype=tf.int32)
-//     updated = tf.tensor_scatter_update(tensor, indices, updates)
-//     with tf.Session() as sess:
-//       print(sess.run(scatter))
-// ```
-//
-// The resulting tensor would look like this:
-//
-//     [1, 11, 1, 10, 9, 1, 1, 12]
-//
-// We can also, insert entire slices of a higher rank tensor all at once. For
-// example, if we wanted to insert two slices in the first dimension of a
-// rank-3 tensor with two matrices of new values.
-//
-// In Python, this scatter operation would look like this:
-//
-// ```python
-//     indices = tf.constant([[0], [2]])
-//     updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6],
-//                             [7, 7, 7, 7], [8, 8, 8, 8]],
-//                            [[5, 5, 5, 5], [6, 6, 6, 6],
-//                             [7, 7, 7, 7], [8, 8, 8, 8]]])
-//     tensor = tf.ones([4, 4, 4])
-//     updated = tf.tensor_scatter_update(tensor, indices, updates)
-//     with tf.Session() as sess:
-//       print(sess.run(scatter))
-// ```
-//
-// The resulting tensor would look like this:
-//
-//     [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
-//      [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
-//      [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
-//      [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]]
-//
-// Note that on CPU, if an out of bound index is found, an error is returned.
-// On GPU, if an out of bound index is found, the index is ignored.
-//
-// Arguments:
-//	tensor: Tensor to copy/update.
-//	indices: Index tensor.
-//	updates: Updates to scatter into output.
-//
-// Returns A new tensor with the given shape and updates applied according
-// to the indices.
-func TensorScatterUpdate(scope *Scope, tensor tf.Output, indices tf.Output, updates tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorScatterUpdate",
-		Input: []tf.Input{
-			tensor, indices, updates,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns x * y element-wise.
-//
-// *NOTE*: `Multiply` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Mul(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Mul",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns x / y element-wise.
-//
-// *NOTE*: `Div` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Div(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Div",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns x // y element-wise.
-//
-// *NOTE*: `FloorDiv` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func FloorDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "FloorDiv",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// TridiagonalSolveAttr is an optional argument to TridiagonalSolve.
-type TridiagonalSolveAttr func(optionalAttr)
-
-// TridiagonalSolvePartialPivoting sets the optional partial_pivoting attribute to value.
-//
-// value: Whether to apply partial pivoting. Partial pivoting makes the procedure more
-// stable, but slower.
-// If not specified, defaults to true
-func TridiagonalSolvePartialPivoting(value bool) TridiagonalSolveAttr {
-	return func(m optionalAttr) {
-		m["partial_pivoting"] = value
-	}
-}
-
-// Solves tridiagonal systems of equations.
-//
-//   Solves tridiagonal systems of equations.
-//   Supports batch dimensions and multiple right-hand sides per each left-hand
-//   side.
-//   On CPU, solution is computed via Gaussian elimination with or without partial
-//   pivoting, depending on `partial_pivoting` attribute. On GPU, Nvidia's cuSPARSE
-//   library is used: https://docs.nvidia.com/cuda/cusparse/index.html#gtsv
-//
-// Arguments:
-//	diagonals: Tensor of shape `[..., 3, M]` whose innermost 2 dimensions represent the
-// tridiagonal matrices with three rows being the superdiagonal, diagonals, and
-// subdiagonals, in order. The last element of the superdiagonal and the first
-// element of the subdiagonal is ignored.
-//	rhs: Tensor of shape `[..., M, K]`, representing K right-hand sides per each
-// left-hand side.
-//
-// Returns Tensor of shape `[..., M, K]` containing the solutions
-func TridiagonalSolve(scope *Scope, diagonals tf.Output, rhs tf.Output, optional ...TridiagonalSolveAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TridiagonalSolve",
-		Input: []tf.Input{
-			diagonals, rhs,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Scatter `updates` into a new tensor according to `indices`.
-//
-// Creates a new tensor by applying sparse `updates` to individual values or
-// slices within a tensor (initially zero for numeric, empty for string) of
-// the given `shape` according to indices.  This operator is the inverse of the
-// `tf.gather_nd` operator which extracts values or slices from a given tensor.
-//
-// This operation is similar to tensor_scatter_add, except that the tensor is
-// zero-initialized. Calling `tf.scatter_nd(indices, values, shape)` is identical
-// to `tensor_scatter_add(tf.zeros(shape, values.dtype), indices, values)`
-//
-// If `indices` contains duplicates, then their updates are accumulated (summed).
-//
-// **WARNING**: The order in which updates are applied is nondeterministic, so the
-// output will be nondeterministic if `indices` contains duplicates -- because
-// of some numerical approximation issues, numbers summed in different order
-// may yield different results.
-//
-// `indices` is an integer tensor containing indices into a new tensor of shape
-// `shape`.  The last dimension of `indices` can be at most the rank of `shape`:
-//
-//     indices.shape[-1] <= shape.rank
-//
-// The last dimension of `indices` corresponds to indices into elements
-// (if `indices.shape[-1] = shape.rank`) or slices
-// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of
-// `shape`.  `updates` is a tensor with shape
-//
-//     indices.shape[:-1] + shape[indices.shape[-1]:]
-//
-// The simplest form of scatter is to insert individual elements in a tensor by
-// index. For example, say we want to insert 4 scattered elements in a rank-1
-// tensor with 8 elements.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/ScatterNd1.png" alt>
-// </div>
-//
-// In Python, this scatter operation would look like this:
-//
-// ```python
-//     indices = tf.constant([[4], [3], [1], [7]])
-//     updates = tf.constant([9, 10, 11, 12])
-//     shape = tf.constant([8])
-//     scatter = tf.scatter_nd(indices, updates, shape)
-//     with tf.Session() as sess:
-//       print(sess.run(scatter))
-// ```
-//
-// The resulting tensor would look like this:
-//
-//     [0, 11, 0, 10, 9, 0, 0, 12]
-//
-// We can also, insert entire slices of a higher rank tensor all at once. For
-// example, if we wanted to insert two slices in the first dimension of a
-// rank-3 tensor with two matrices of new values.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/ScatterNd2.png" alt>
-// </div>
-//
-// In Python, this scatter operation would look like this:
-//
-// ```python
-//     indices = tf.constant([[0], [2]])
-//     updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6],
-//                             [7, 7, 7, 7], [8, 8, 8, 8]],
-//                            [[5, 5, 5, 5], [6, 6, 6, 6],
-//                             [7, 7, 7, 7], [8, 8, 8, 8]]])
-//     shape = tf.constant([4, 4, 4])
-//     scatter = tf.scatter_nd(indices, updates, shape)
-//     with tf.Session() as sess:
-//       print(sess.run(scatter))
-// ```
-//
-// The resulting tensor would look like this:
-//
-//     [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
-//      [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
-//      [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
-//      [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]]
-//
-// Note that on CPU, if an out of bound index is found, an error is returned.
-// On GPU, if an out of bound index is found, the index is ignored.
-//
-// Arguments:
-//	indices: Index tensor.
-//	updates: Updates to scatter into output.
-//	shape: 1-D. The shape of the resulting tensor.
-//
-// Returns A new tensor with the given shape and updates applied according
-// to the indices.
-func ScatterNd(scope *Scope, indices tf.Output, updates tf.Output, shape tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ScatterNd",
-		Input: []tf.Input{
-			indices, updates, shape,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug.
-type RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
-//
-// REQUIRES: value >= -1
-func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Retrieve Adadelta embedding parameters with debug support.
-//
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
-//
-// Returns Parameter parameters updated by the Adadelta optimization algorithm.Parameter accumulators updated by the Adadelta optimization algorithm.Parameter updates updated by the Adadelta optimization algorithm.Parameter gradient_accumulators updated by the Adadelta optimization algorithm.
-func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, updates tf.Output, gradient_accumulators tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
-}
-
-// Returns (x - y)(x - y) element-wise.
-//
-// *NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func SquaredDifference(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SquaredDifference",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns 0 if x == 0, and x * log(y) otherwise, elementwise.
-func Xlogy(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Xlogy",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes square root of x element-wise.
-//
-// I.e., \\(y = \sqrt{x} = x^{1/2}\\).
-func Sqrt(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Sqrt",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns 0 if x == 0, and x / y otherwise, elementwise.
-func Xdivy(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Xdivy",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the max of x and y (i.e. x > y ? x : y) element-wise.
-//
-// *NOTE*: `Maximum` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Maximum(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Maximum",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// A placeholder op for a value that will be fed into the computation.
-//
-// Arguments:
-//	dtype: The type of elements in the tensor.
-//	shape: The shape of the tensor.
-//
-// Returns A tensor that will be provided using the infeed mechanism.
-func InfeedDequeue(scope *Scope, dtype tf.DataType, shape tf.Shape) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype, "shape": shape}
-	opspec := tf.OpSpec{
-		Type: "InfeedDequeue",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns element-wise remainder of division. This emulates C semantics in that
-//
-// the result here is consistent with a truncating divide. E.g.
-// `tf.truncatediv(x, y) * y + truncate_mod(x, y) = x`.
-//
-// *NOTE*: `Mod` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Mod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Mod",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the power of one value to another.
-//
-// Given a tensor `x` and a tensor `y`, this operation computes \\(x^y\\) for
-// corresponding elements in `x` and `y`. For example:
-//
-// ```
-// # tensor 'x' is [[2, 2]], [3, 3]]
-// # tensor 'y' is [[8, 16], [2, 3]]
-// tf.pow(x, y) ==> [[256, 65536], [9, 27]]
-// ```
-func Pow(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Pow",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return scope.AddOperation(opspec)
 }
 
 // BatchToSpace for N-D tensors of type T.
@@ -33580,74 +23473,718 @@ func BatchToSpaceND(scope *Scope, input tf.Output, block_shape tf.Output, crops
 	return op.Output(0)
 }
 
-// GenerateVocabRemappingAttr is an optional argument to GenerateVocabRemapping.
-type GenerateVocabRemappingAttr func(optionalAttr)
+// FIFOQueueV2Attr is an optional argument to FIFOQueueV2.
+type FIFOQueueV2Attr func(optionalAttr)
 
-// GenerateVocabRemappingOldVocabSize sets the optional old_vocab_size attribute to value.
+// FIFOQueueV2Shapes sets the optional shapes attribute to value.
 //
-// value: Number of entries in the old vocab file to consider.  If -1,
-// use the entire old vocabulary.
-// If not specified, defaults to -1
+// value: The shape of each component in a value. The length of this attr must
+// be either 0 or the same as the length of component_types. If the length of
+// this attr is 0, the shapes of queue elements are not constrained, and
+// only one element may be dequeued at a time.
+// If not specified, defaults to {}
 //
-// REQUIRES: value >= -1
-func GenerateVocabRemappingOldVocabSize(value int64) GenerateVocabRemappingAttr {
+// REQUIRES: len(value) >= 0
+func FIFOQueueV2Shapes(value []tf.Shape) FIFOQueueV2Attr {
 	return func(m optionalAttr) {
-		m["old_vocab_size"] = value
+		m["shapes"] = value
 	}
 }
 
-// Given a path to new and old vocabulary files, returns a remapping Tensor of
+// FIFOQueueV2Capacity sets the optional capacity attribute to value.
 //
-// length `num_new_vocab`, where `remapping[i]` contains the row number in the old
-// vocabulary that corresponds to row `i` in the new vocabulary (starting at line
-// `new_vocab_offset` and up to `num_new_vocab` entities), or `-1` if entry `i`
-// in the new vocabulary is not in the old vocabulary.  The old vocabulary is
-// constrained to the first `old_vocab_size` entries if `old_vocab_size` is not the
-// default value of -1.
+// value: The upper bound on the number of elements in this queue.
+// Negative numbers mean no limit.
+// If not specified, defaults to -1
+func FIFOQueueV2Capacity(value int64) FIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// FIFOQueueV2Container sets the optional container attribute to value.
 //
-// `num_vocab_offset` enables
-// use in the partitioned variable case, and should generally be set through
-// examining partitioning info.  The format of the files should be a text file,
-// with each line containing a single entity within the vocabulary.
+// value: If non-empty, this queue is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func FIFOQueueV2Container(value string) FIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// FIFOQueueV2SharedName sets the optional shared_name attribute to value.
 //
-// For example, with `new_vocab_file` a text file containing each of the following
-// elements on a single line: `[f0, f1, f2, f3]`, old_vocab_file = [f1, f0, f3],
-// `num_new_vocab = 3, new_vocab_offset = 1`, the returned remapping would be
-// `[0, -1, 2]`.
-//
-// The op also returns a count of how many entries in the new vocabulary
-// were present in the old vocabulary, which is used to calculate the number of
-// values to initialize in a weight matrix remapping
-//
-// This functionality can be used to remap both row vocabularies (typically,
-// features) and column vocabularies (typically, classes) from TensorFlow
-// checkpoints.  Note that the partitioning logic relies on contiguous vocabularies
-// corresponding to div-partitioned variables.  Moreover, the underlying remapping
-// uses an IndexTable (as opposed to an inexact CuckooTable), so client code should
-// use the corresponding index_table_from_file() as the FeatureColumn framework
-// does (as opposed to tf.feature_to_id(), which uses a CuckooTable).
+// value: If non-empty, this queue will be shared under the given name
+// across multiple sessions.
+// If not specified, defaults to ""
+func FIFOQueueV2SharedName(value string) FIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// A queue that produces elements in first-in first-out order.
 //
 // Arguments:
-//	new_vocab_file: Path to the new vocab file.
-//	old_vocab_file: Path to the old vocab file.
-//	new_vocab_offset: How many entries into the new vocab file to start reading.
-//	num_new_vocab: Number of entries in the new vocab file to remap.
+//	component_types: The type of each component in a value.
 //
-// Returns A Tensor of length num_new_vocab where the element at index i
-// is equal to the old ID that maps to the new ID i.  This element is -1 for any
-// new ID that is not found in the old vocabulary.Number of new vocab entries found in old vocab.
-func GenerateVocabRemapping(scope *Scope, new_vocab_file tf.Output, old_vocab_file tf.Output, new_vocab_offset int64, num_new_vocab int64, optional ...GenerateVocabRemappingAttr) (remapping tf.Output, num_present tf.Output) {
+// Returns The handle to the queue.
+func FIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...FIFOQueueV2Attr) (handle tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"new_vocab_offset": new_vocab_offset, "num_new_vocab": num_new_vocab}
+	attrs := map[string]interface{}{"component_types": component_types}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "GenerateVocabRemapping",
+		Type: "FIFOQueueV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Assigns a new value to a variable.
+//
+// Any ReadVariableOp with a control dependency on this op is guaranteed to return
+// this value or a subsequent newer value of the variable.
+//
+// Arguments:
+//	resource: handle to the resource in which to store the variable.
+//	value: the value to set the new tensor to use.
+//
+// Returns the created operation.
+func AssignVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "AssignVariableOp",
 		Input: []tf.Input{
-			new_vocab_file, old_vocab_file,
+			resource, value,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Quantized Batch normalization.
+//
+// This op is deprecated and will be removed in the future. Prefer
+// `tf.nn.batch_normalization`.
+//
+// Arguments:
+//	t: A 4D input Tensor.
+//	t_min: The value represented by the lowest quantized input.
+//	t_max: The value represented by the highest quantized input.
+//	m: A 1D mean Tensor with size matching the last dimension of t.
+// This is the first output from tf.nn.moments,
+// or a saved moving average thereof.
+//	m_min: The value represented by the lowest quantized mean.
+//	m_max: The value represented by the highest quantized mean.
+//	v: A 1D variance Tensor with size matching the last dimension of t.
+// This is the second output from tf.nn.moments,
+// or a saved moving average thereof.
+//	v_min: The value represented by the lowest quantized variance.
+//	v_max: The value represented by the highest quantized variance.
+//	beta: A 1D beta Tensor with size matching the last dimension of t.
+// An offset to be added to the normalized tensor.
+//	beta_min: The value represented by the lowest quantized offset.
+//	beta_max: The value represented by the highest quantized offset.
+//	gamma: A 1D gamma Tensor with size matching the last dimension of t.
+// If "scale_after_normalization" is true, this tensor will be multiplied
+// with the normalized tensor.
+//	gamma_min: The value represented by the lowest quantized gamma.
+//	gamma_max: The value represented by the highest quantized gamma.
+//
+//	variance_epsilon: A small float number to avoid dividing by 0.
+//	scale_after_normalization: A bool indicating whether the resulted tensor
+// needs to be multiplied with gamma.
+func QuantizedBatchNormWithGlobalNormalization(scope *Scope, t tf.Output, t_min tf.Output, t_max tf.Output, m tf.Output, m_min tf.Output, m_max tf.Output, v tf.Output, v_min tf.Output, v_max tf.Output, beta tf.Output, beta_min tf.Output, beta_max tf.Output, gamma tf.Output, gamma_min tf.Output, gamma_max tf.Output, out_type tf.DataType, variance_epsilon float32, scale_after_normalization bool) (result tf.Output, result_min tf.Output, result_max tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"out_type": out_type, "variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization}
+	opspec := tf.OpSpec{
+		Type: "QuantizedBatchNormWithGlobalNormalization",
+		Input: []tf.Input{
+			t, t_min, t_max, m, m_min, m_max, v, v_min, v_max, beta, beta_min, beta_max, gamma, gamma_min, gamma_max,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign.
+type ResourceStridedSliceAssignAttr func(optionalAttr)
+
+// ResourceStridedSliceAssignBeginMask sets the optional begin_mask attribute to value.
+// If not specified, defaults to 0
+func ResourceStridedSliceAssignBeginMask(value int64) ResourceStridedSliceAssignAttr {
+	return func(m optionalAttr) {
+		m["begin_mask"] = value
+	}
+}
+
+// ResourceStridedSliceAssignEndMask sets the optional end_mask attribute to value.
+// If not specified, defaults to 0
+func ResourceStridedSliceAssignEndMask(value int64) ResourceStridedSliceAssignAttr {
+	return func(m optionalAttr) {
+		m["end_mask"] = value
+	}
+}
+
+// ResourceStridedSliceAssignEllipsisMask sets the optional ellipsis_mask attribute to value.
+// If not specified, defaults to 0
+func ResourceStridedSliceAssignEllipsisMask(value int64) ResourceStridedSliceAssignAttr {
+	return func(m optionalAttr) {
+		m["ellipsis_mask"] = value
+	}
+}
+
+// ResourceStridedSliceAssignNewAxisMask sets the optional new_axis_mask attribute to value.
+// If not specified, defaults to 0
+func ResourceStridedSliceAssignNewAxisMask(value int64) ResourceStridedSliceAssignAttr {
+	return func(m optionalAttr) {
+		m["new_axis_mask"] = value
+	}
+}
+
+// ResourceStridedSliceAssignShrinkAxisMask sets the optional shrink_axis_mask attribute to value.
+// If not specified, defaults to 0
+func ResourceStridedSliceAssignShrinkAxisMask(value int64) ResourceStridedSliceAssignAttr {
+	return func(m optionalAttr) {
+		m["shrink_axis_mask"] = value
+	}
+}
+
+// Assign `value` to the sliced l-value reference of `ref`.
+//
+// The values of `value` are assigned to the positions in the variable
+// `ref` that are selected by the slice parameters. The slice parameters
+// `begin, `end`, `strides`, etc. work exactly as in `StridedSlice`.
+//
+// NOTE this op currently does not support broadcasting and so `value`'s
+// shape must be exactly the shape produced by the slice of `ref`.
+//
+// Returns the created operation.
+func ResourceStridedSliceAssign(scope *Scope, ref tf.Output, begin tf.Output, end tf.Output, strides tf.Output, value tf.Output, optional ...ResourceStridedSliceAssignAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceStridedSliceAssign",
+		Input: []tf.Input{
+			ref, begin, end, strides, value,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// QuantizedRelu6Attr is an optional argument to QuantizedRelu6.
+type QuantizedRelu6Attr func(optionalAttr)
+
+// QuantizedRelu6OutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_QUINT8
+func QuantizedRelu6OutType(value tf.DataType) QuantizedRelu6Attr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Computes Quantized Rectified Linear 6: `min(max(features, 0), 6)`
+//
+// Arguments:
+//
+//	min_features: The float value that the lowest quantized value represents.
+//	max_features: The float value that the highest quantized value represents.
+//
+// Returns:
+//	activations: Has the same output shape as "features".
+//	min_activations: The float value that the lowest quantized value represents.
+//	max_activations: The float value that the highest quantized value represents.
+func QuantizedRelu6(scope *Scope, features tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedRelu6Attr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizedRelu6",
+		Input: []tf.Input{
+			features, min_features, max_features,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// DataFormatVecPermuteAttr is an optional argument to DataFormatVecPermute.
+type DataFormatVecPermuteAttr func(optionalAttr)
+
+// DataFormatVecPermuteSrcFormat sets the optional src_format attribute to value.
+//
+// value: source data format.
+// If not specified, defaults to "NHWC"
+func DataFormatVecPermuteSrcFormat(value string) DataFormatVecPermuteAttr {
+	return func(m optionalAttr) {
+		m["src_format"] = value
+	}
+}
+
+// DataFormatVecPermuteDstFormat sets the optional dst_format attribute to value.
+//
+// value: destination data format.
+// If not specified, defaults to "NCHW"
+func DataFormatVecPermuteDstFormat(value string) DataFormatVecPermuteAttr {
+	return func(m optionalAttr) {
+		m["dst_format"] = value
+	}
+}
+
+// Returns the permuted vector/tensor in the destination data format given the
+//
+// one in the source data format.
+//
+// Arguments:
+//	x: Vector of size 4 or Tensor of shape (4, 2) in source data format.
+//
+// Returns Vector of size 4 or Tensor of shape (4, 2) in destination data format.
+func DataFormatVecPermute(scope *Scope, x tf.Output, optional ...DataFormatVecPermuteAttr) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DataFormatVecPermute",
+		Input: []tf.Input{
+			x,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Adds Tensor 'bias' to Tensor 'input' for Quantized types.
+//
+// Broadcasts the values of bias on dimensions 0..N-2 of 'input'.
+//
+// Arguments:
+//
+//	bias: A 1D bias Tensor with size matching the last dimension of 'input'.
+//	min_input: The float value that the lowest quantized input value represents.
+//	max_input: The float value that the highest quantized input value represents.
+//	min_bias: The float value that the lowest quantized bias value represents.
+//	max_bias: The float value that the highest quantized bias value represents.
+//
+//
+// Returns:
+//	output
+//	min_out: The float value that the lowest quantized output value represents.
+//	max_out: The float value that the highest quantized output value represents.
+func QuantizedBiasAdd(scope *Scope, input tf.Output, bias tf.Output, min_input tf.Output, max_input tf.Output, min_bias tf.Output, max_bias tf.Output, out_type tf.DataType) (output tf.Output, min_out tf.Output, max_out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"out_type": out_type}
+	opspec := tf.OpSpec{
+		Type: "QuantizedBiasAdd",
+		Input: []tf.Input{
+			input, bias, min_input, max_input, min_bias, max_bias,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// MutableDenseHashTableV2Attr is an optional argument to MutableDenseHashTableV2.
+type MutableDenseHashTableV2Attr func(optionalAttr)
+
+// MutableDenseHashTableV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this table is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func MutableDenseHashTableV2Container(value string) MutableDenseHashTableV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// MutableDenseHashTableV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this table is shared under the given name across
+// multiple sessions.
+// If not specified, defaults to ""
+func MutableDenseHashTableV2SharedName(value string) MutableDenseHashTableV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// MutableDenseHashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value.
+// If not specified, defaults to false
+func MutableDenseHashTableV2UseNodeNameSharing(value bool) MutableDenseHashTableV2Attr {
+	return func(m optionalAttr) {
+		m["use_node_name_sharing"] = value
+	}
+}
+
+// MutableDenseHashTableV2ValueShape sets the optional value_shape attribute to value.
+//
+// value: The shape of each value.
+// If not specified, defaults to {}
+func MutableDenseHashTableV2ValueShape(value tf.Shape) MutableDenseHashTableV2Attr {
+	return func(m optionalAttr) {
+		m["value_shape"] = value
+	}
+}
+
+// MutableDenseHashTableV2InitialNumBuckets sets the optional initial_num_buckets attribute to value.
+//
+// value: The initial number of hash table buckets. Must be a power
+// to 2.
+// If not specified, defaults to 131072
+func MutableDenseHashTableV2InitialNumBuckets(value int64) MutableDenseHashTableV2Attr {
+	return func(m optionalAttr) {
+		m["initial_num_buckets"] = value
+	}
+}
+
+// MutableDenseHashTableV2MaxLoadFactor sets the optional max_load_factor attribute to value.
+//
+// value: The maximum ratio between number of entries and number of
+// buckets before growing the table. Must be between 0 and 1.
+// If not specified, defaults to 0.8
+func MutableDenseHashTableV2MaxLoadFactor(value float32) MutableDenseHashTableV2Attr {
+	return func(m optionalAttr) {
+		m["max_load_factor"] = value
+	}
+}
+
+// Creates an empty hash table that uses tensors as the backing store.
+//
+// It uses "open addressing" with quadratic reprobing to resolve
+// collisions.
+//
+// This op creates a mutable hash table, specifying the type of its keys and
+// values. Each value must be a scalar. Data can be inserted into the table using
+// the insert operations. It does not support the initialization operation.
+//
+// Arguments:
+//	empty_key: The key used to represent empty key buckets internally. Must not
+// be used in insert or lookup operations.
+//
+//	value_dtype: Type of the table values.
+//
+// Returns Handle to a table.
+func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, deleted_key tf.Output, value_dtype tf.DataType, optional ...MutableDenseHashTableV2Attr) (table_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"value_dtype": value_dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MutableDenseHashTableV2",
+		Input: []tf.Input{
+			empty_key, deleted_key,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// FractionalAvgPoolGradAttr is an optional argument to FractionalAvgPoolGrad.
+type FractionalAvgPoolGradAttr func(optionalAttr)
+
+// FractionalAvgPoolGradOverlapping sets the optional overlapping attribute to value.
+//
+// value: When set to True, it means when pooling, the values at the boundary
+// of adjacent pooling cells are used by both cells. For example:
+//
+// `index  0  1  2  3  4`
+//
+// `value  20 5  16 3  7`
+//
+// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
+// The result would be [41/3, 26/3] for fractional avg pooling.
+// If not specified, defaults to false
+func FractionalAvgPoolGradOverlapping(value bool) FractionalAvgPoolGradAttr {
+	return func(m optionalAttr) {
+		m["overlapping"] = value
+	}
+}
+
+// Computes gradient of the FractionalAvgPool function.
+//
+// Unlike FractionalMaxPoolGrad, we don't need to find arg_max for
+// FractionalAvgPoolGrad, we just need to evenly back-propagate each element of
+// out_backprop to those indices that form the same pooling cell. Therefore, we
+// just need to know the shape of original input tensor, instead of the whole
+// tensor.
+//
+// Arguments:
+//	orig_input_tensor_shape: Original input tensor shape for `fractional_avg_pool`
+//	out_backprop: 4-D with shape `[batch, height, width, channels]`.  Gradients
+// w.r.t. the output of `fractional_avg_pool`.
+//	row_pooling_sequence: row pooling sequence, form pooling region with
+// col_pooling_sequence.
+//	col_pooling_sequence: column pooling sequence, form pooling region with
+// row_pooling sequence.
+//
+// Returns 4-D.  Gradients w.r.t. the input of `fractional_avg_pool`.
+func FractionalAvgPoolGrad(scope *Scope, orig_input_tensor_shape tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalAvgPoolGradAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "FractionalAvgPoolGrad",
+		Input: []tf.Input{
+			orig_input_tensor_shape, out_backprop, row_pooling_sequence, col_pooling_sequence,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// FractionalMaxPoolGradAttr is an optional argument to FractionalMaxPoolGrad.
+type FractionalMaxPoolGradAttr func(optionalAttr)
+
+// FractionalMaxPoolGradOverlapping sets the optional overlapping attribute to value.
+//
+// value: When set to True, it means when pooling, the values at the boundary
+// of adjacent pooling cells are used by both cells. For example:
+//
+// `index  0  1  2  3  4`
+//
+// `value  20 5  16 3  7`
+//
+// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
+// The result would be [20, 16] for fractional max pooling.
+// If not specified, defaults to false
+func FractionalMaxPoolGradOverlapping(value bool) FractionalMaxPoolGradAttr {
+	return func(m optionalAttr) {
+		m["overlapping"] = value
+	}
+}
+
+// Computes gradient of the FractionalMaxPool function.
+//
+// Arguments:
+//	orig_input: Original input for `fractional_max_pool`
+//	orig_output: Original output for `fractional_max_pool`
+//	out_backprop: 4-D with shape `[batch, height, width, channels]`.  Gradients
+// w.r.t. the output of `fractional_max_pool`.
+//	row_pooling_sequence: row pooling sequence, form pooling region with
+// col_pooling_sequence.
+//	col_pooling_sequence: column pooling sequence, form pooling region with
+// row_pooling sequence.
+//
+// Returns 4-D.  Gradients w.r.t. the input of `fractional_max_pool`.
+func FractionalMaxPoolGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, out_backprop tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output, optional ...FractionalMaxPoolGradAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "FractionalMaxPoolGrad",
+		Input: []tf.Input{
+			orig_input, orig_output, out_backprop, row_pooling_sequence, col_pooling_sequence,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// NthElementAttr is an optional argument to NthElement.
+type NthElementAttr func(optionalAttr)
+
+// NthElementReverse sets the optional reverse attribute to value.
+//
+// value: When set to True, find the nth-largest value in the vector and vice
+// versa.
+// If not specified, defaults to false
+func NthElementReverse(value bool) NthElementAttr {
+	return func(m optionalAttr) {
+		m["reverse"] = value
+	}
+}
+
+// Finds values of the `n`-th order statistic for the last dimension.
+//
+// If the input is a vector (rank-1), finds the entries which is the nth-smallest
+// value in the vector and outputs their values as scalar tensor.
+//
+// For matrices (resp. higher rank input), computes the entries which is the
+// nth-smallest value in each row (resp. vector along the last dimension). Thus,
+//
+//     values.shape = input.shape[:-1]
+//
+// Arguments:
+//	input: 1-D or higher with last dimension at least `n+1`.
+//	n: 0-D. Position of sorted vector to select along the last dimension (along
+// each row for matrices). Valid range of n is `[0, input.shape[:-1])`
+//
+// Returns The `n`-th order statistic along each last dimensional slice.
+func NthElement(scope *Scope, input tf.Output, n tf.Output, optional ...NthElementAttr) (values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "NthElement",
+		Input: []tf.Input{
+			input, n,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Pads a tensor.
+//
+// This operation pads `input` according to the `paddings` and `constant_values`
+// you specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is
+// the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
+// how many padding values to add before the contents of `input` in that dimension,
+// and `paddings[D, 1]` indicates how many padding values to add after the contents
+// of `input` in that dimension. `constant_values` is a scalar tensor of the same
+// type as `input` that indicates the value to use for padding `input`.
+//
+// The padded size of each dimension D of the output is:
+//
+// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
+//
+// For example:
+//
+// ```
+// # 't' is [[1, 1], [2, 2]]
+// # 'paddings' is [[1, 1], [2, 2]]
+// # 'constant_values' is 0
+// # rank of 't' is 2
+// pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
+//                       [0, 0, 1, 1, 0, 0]
+//                       [0, 0, 2, 2, 0, 0]
+//                       [0, 0, 0, 0, 0, 0]]
+// ```
+func PadV2(scope *Scope, input tf.Output, paddings tf.Output, constant_values tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "PadV2",
+		Input: []tf.Input{
+			input, paddings, constant_values,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes cos of x element-wise.
+//
+//   Given an input tensor, this function computes cosine of every
+//   element in the tensor. Input range is `(-inf, inf)` and
+//   output range is `[-1,1]`. If input lies outside the boundary, `nan`
+//   is returned.
+//
+//   ```python
+//   x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 200, 10000, float("inf")])
+//   tf.math.cos(x) ==> [nan -0.91113025 0.87758255 0.5403023 0.36235774 0.48718765 -0.95215535 nan]
+//   ```
+func Cos(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Cos",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// TopKV2Attr is an optional argument to TopKV2.
+type TopKV2Attr func(optionalAttr)
+
+// TopKV2Sorted sets the optional sorted attribute to value.
+//
+// value: If true the resulting `k` elements will be sorted by the values in
+// descending order.
+// If not specified, defaults to true
+func TopKV2Sorted(value bool) TopKV2Attr {
+	return func(m optionalAttr) {
+		m["sorted"] = value
+	}
+}
+
+// Finds values and indices of the `k` largest elements for the last dimension.
+//
+// If the input is a vector (rank-1), finds the `k` largest entries in the vector
+// and outputs their values and indices as vectors.  Thus `values[j]` is the
+// `j`-th largest entry in `input`, and its index is `indices[j]`.
+//
+// For matrices (resp. higher rank input), computes the top `k` entries in each
+// row (resp. vector along the last dimension).  Thus,
+//
+//     values.shape = indices.shape = input.shape[:-1] + [k]
+//
+// If two elements are equal, the lower-index element appears first.
+//
+// Arguments:
+//	input: 1-D or higher with last dimension at least `k`.
+//	k: 0-D.  Number of top elements to look for along the last dimension (along each
+// row for matrices).
+//
+// Returns:
+//	values: The `k` largest elements along each last dimensional slice.
+//	indices: The indices of `values` within the last dimension of `input`.
+func TopKV2(scope *Scope, input tf.Output, k tf.Output, optional ...TopKV2Attr) (values tf.Output, indices tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TopKV2",
+		Input: []tf.Input{
+			input, k,
 		},
 		Attrs: attrs,
 	}
@@ -33655,6 +24192,6934 @@ func GenerateVocabRemapping(scope *Scope, new_vocab_file tf.Output, old_vocab_fi
 	return op.Output(0), op.Output(1)
 }
 
+// TopKAttr is an optional argument to TopK.
+type TopKAttr func(optionalAttr)
+
+// TopKSorted sets the optional sorted attribute to value.
+//
+// value: If true the resulting `k` elements will be sorted by the values in
+// descending order.
+// If not specified, defaults to true
+func TopKSorted(value bool) TopKAttr {
+	return func(m optionalAttr) {
+		m["sorted"] = value
+	}
+}
+
+// Finds values and indices of the `k` largest elements for the last dimension.
+//
+// DEPRECATED at GraphDef version 7: Use TopKV2 instead
+//
+// If the input is a vector (rank-1), finds the `k` largest entries in the vector
+// and outputs their values and indices as vectors.  Thus `values[j]` is the
+// `j`-th largest entry in `input`, and its index is `indices[j]`.
+//
+// For matrices (resp. higher rank input), computes the top `k` entries in each
+// row (resp. vector along the last dimension).  Thus,
+//
+//     values.shape = indices.shape = input.shape[:-1] + [k]
+//
+// If two elements are equal, the lower-index element appears first.
+//
+// If `k` varies dynamically, use `TopKV2` below.
+//
+// Arguments:
+//	input: 1-D or higher with last dimension at least `k`.
+//	k: Number of top elements to look for along the last dimension (along each
+// row for matrices).
+//
+// Returns:
+//	values: The `k` largest elements along each last dimensional slice.
+//	indices: The indices of `values` within the last dimension of `input`.
+func TopK(scope *Scope, input tf.Output, k int64, optional ...TopKAttr) (values tf.Output, indices tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"k": k}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TopK",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Outputs the single element from the given dataset.
+//
+// Arguments:
+//	dataset: A handle to a dataset that contains a single element.
+//
+//
+//
+// Returns The components of the single element of `input`.
+func DatasetToSingleElement(scope *Scope, dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "DatasetToSingleElement",
+		Input: []tf.Input{
+			dataset,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("DatasetToSingleElement", err)
+		return
+	}
+	return components
+}
+
+// Computes softmax cross entropy cost and gradients to backpropagate.
+//
+// Inputs are the logits, not probabilities.
+//
+// Arguments:
+//	features: batch_size x num_classes matrix
+//	labels: batch_size x num_classes matrix
+// The caller must ensure that each batch of labels represents a valid
+// probability distribution.
+//
+// Returns:
+//	loss: Per example loss (batch_size vector).
+//	backprop: backpropagated gradients (batch_size x num_classes matrix).
+func SoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SoftmaxCrossEntropyWithLogits",
+		Input: []tf.Input{
+			features, labels,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Computes log softmax activations.
+//
+// For each batch `i` and class `j` we have
+//
+//     logsoftmax[i, j] = logits[i, j] - log(sum(exp(logits[i])))
+//
+// Arguments:
+//	logits: 2-D with shape `[batch_size, num_classes]`.
+//
+// Returns Same shape as `logits`.
+func LogSoftmax(scope *Scope, logits tf.Output) (logsoftmax tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "LogSoftmax",
+		Input: []tf.Input{
+			logits,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes softmax activations.
+//
+// For each batch `i` and class `j` we have
+//
+//     $$softmax[i, j] = exp(logits[i, j]) / sum_j(exp(logits[i, j]))$$
+//
+// Arguments:
+//	logits: 2-D with shape `[batch_size, num_classes]`.
+//
+// Returns Same shape as `logits`.
+func Softmax(scope *Scope, logits tf.Output) (softmax tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Softmax",
+		Input: []tf.Input{
+			logits,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes softsign gradients for a softsign operation.
+//
+// Arguments:
+//	gradients: The backpropagated gradients to the corresponding softsign operation.
+//	features: The features passed as input to the corresponding softsign operation.
+//
+// Returns The gradients: `gradients / (1 + abs(features)) ** 2`.
+func SoftsignGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SoftsignGrad",
+		Input: []tf.Input{
+			gradients, features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// QuantizeAndDequantizeV3Attr is an optional argument to QuantizeAndDequantizeV3.
+type QuantizeAndDequantizeV3Attr func(optionalAttr)
+
+// QuantizeAndDequantizeV3SignedInput sets the optional signed_input attribute to value.
+// If not specified, defaults to true
+func QuantizeAndDequantizeV3SignedInput(value bool) QuantizeAndDequantizeV3Attr {
+	return func(m optionalAttr) {
+		m["signed_input"] = value
+	}
+}
+
+// QuantizeAndDequantizeV3RangeGiven sets the optional range_given attribute to value.
+// If not specified, defaults to true
+func QuantizeAndDequantizeV3RangeGiven(value bool) QuantizeAndDequantizeV3Attr {
+	return func(m optionalAttr) {
+		m["range_given"] = value
+	}
+}
+
+// QuantizeAndDequantizeV3NarrowRange sets the optional narrow_range attribute to value.
+// If not specified, defaults to false
+func QuantizeAndDequantizeV3NarrowRange(value bool) QuantizeAndDequantizeV3Attr {
+	return func(m optionalAttr) {
+		m["narrow_range"] = value
+	}
+}
+
+// QuantizeAndDequantizeV3Axis sets the optional axis attribute to value.
+// If not specified, defaults to -1
+func QuantizeAndDequantizeV3Axis(value int64) QuantizeAndDequantizeV3Attr {
+	return func(m optionalAttr) {
+		m["axis"] = value
+	}
+}
+
+// Quantizes then dequantizes a tensor.
+//
+// This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a
+// tensor, so its value can change during training.
+func QuantizeAndDequantizeV3(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, num_bits tf.Output, optional ...QuantizeAndDequantizeV3Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizeAndDequantizeV3",
+		Input: []tf.Input{
+			input, input_min, input_max, num_bits,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns x * y element-wise.
+//
+// *NOTE*: `Multiply` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Mul(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Mul",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes softplus gradients for a softplus operation.
+//
+// Arguments:
+//	gradients: The backpropagated gradients to the corresponding softplus operation.
+//	features: The features passed as input to the corresponding softplus operation.
+//
+// Returns The gradients: `gradients / (1 + exp(-features))`.
+func SoftplusGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SoftplusGrad",
+		Input: []tf.Input{
+			gradients, features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the LSTM cell backward propagation for 1 timestep.
+//
+// This implementation is to be used in conjunction of LSTMBlockCell.
+//
+// Arguments:
+//	x: The input to the LSTM cell, shape (batch_size, num_inputs).
+//	cs_prev: The previous cell state.
+//	h_prev: The previous h state.
+//	w: The weight matrix.
+//	wci: The weight matrix for input gate peephole connection.
+//	wcf: The weight matrix for forget gate peephole connection.
+//	wco: The weight matrix for output gate peephole connection.
+//	b: The bias vector.
+//	i: The input gate.
+//	cs: The cell state before the tanh.
+//	f: The forget gate.
+//	o: The output gate.
+//	ci: The cell input.
+//	co: The cell after the tanh.
+//	cs_grad: The current gradient of cs.
+//	h_grad: The gradient of h vector.
+//	use_peephole: Whether the cell uses peephole connections.
+//
+// Returns:
+//	cs_prev_grad: The gradient of cs to be back-propped.
+//	dicfo: The derivative wrt to [i, cs, f, o].
+//	wci_grad: The gradient for wci to be back-propped.
+//	wcf_grad: The gradient for wcf to be back-propped.
+//	wco_grad: The gradient for wco to be back-propped.
+func LSTMBlockCellGrad(scope *Scope, x tf.Output, cs_prev tf.Output, h_prev tf.Output, w tf.Output, wci tf.Output, wcf tf.Output, wco tf.Output, b tf.Output, i tf.Output, cs tf.Output, f tf.Output, o tf.Output, ci tf.Output, co tf.Output, cs_grad tf.Output, h_grad tf.Output, use_peephole bool) (cs_prev_grad tf.Output, dicfo tf.Output, wci_grad tf.Output, wcf_grad tf.Output, wco_grad tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"use_peephole": use_peephole}
+	opspec := tf.OpSpec{
+		Type: "LSTMBlockCellGrad",
+		Input: []tf.Input{
+			x, cs_prev, h_prev, w, wci, wcf, wco, b, i, cs, f, o, ci, co, cs_grad, h_grad,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+}
+
+// Computes gradients for the scaled exponential linear (Selu) operation.
+//
+// Arguments:
+//	gradients: The backpropagated gradients to the corresponding Selu operation.
+//	outputs: The outputs of the corresponding Selu operation.
+//
+// Returns The gradients: `gradients * (outputs + scale * alpha)`
+// if outputs < 0, `scale * gradients` otherwise.
+func SeluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SeluGrad",
+		Input: []tf.Input{
+			gradients, outputs,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes gradients for the exponential linear (Elu) operation.
+//
+// Arguments:
+//	gradients: The backpropagated gradients to the corresponding Elu operation.
+//	outputs: The outputs of the corresponding Elu operation.
+//
+// Returns The gradients: `gradients * (outputs + 1)` if outputs < 0,
+// `gradients` otherwise.
+func EluGrad(scope *Scope, gradients tf.Output, outputs tf.Output) (backprops tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "EluGrad",
+		Input: []tf.Input{
+			gradients, outputs,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LeakyReluGradAttr is an optional argument to LeakyReluGrad.
+type LeakyReluGradAttr func(optionalAttr)
+
+// LeakyReluGradAlpha sets the optional alpha attribute to value.
+// If not specified, defaults to 0.2
+func LeakyReluGradAlpha(value float32) LeakyReluGradAttr {
+	return func(m optionalAttr) {
+		m["alpha"] = value
+	}
+}
+
+// Computes rectified linear gradients for a LeakyRelu operation.
+//
+// Arguments:
+//	gradients: The backpropagated gradients to the corresponding LeakyRelu operation.
+//	features: The features passed as input to the corresponding LeakyRelu operation,
+// OR the outputs of that operation (both work equivalently).
+//
+// Returns `gradients * (features > 0) + alpha * gradients * (features <= 0)`.
+func LeakyReluGrad(scope *Scope, gradients tf.Output, features tf.Output, optional ...LeakyReluGradAttr) (backprops tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LeakyReluGrad",
+		Input: []tf.Input{
+			gradients, features,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the gradient of morphological 2-D dilation with respect to the filter.
+//
+// Arguments:
+//	input: 4-D with shape `[batch, in_height, in_width, depth]`.
+//	filter: 3-D with shape `[filter_height, filter_width, depth]`.
+//	out_backprop: 4-D with shape `[batch, out_height, out_width, depth]`.
+//	strides: 1-D of length 4. The stride of the sliding window for each dimension of
+// the input tensor. Must be: `[1, stride_height, stride_width, 1]`.
+//	rates: 1-D of length 4. The input stride for atrous morphological dilation.
+// Must be: `[1, rate_height, rate_width, 1]`.
+//	padding: The type of padding algorithm to use.
+//
+// Returns 3-D with shape `[filter_height, filter_width, depth]`.
+func Dilation2DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, rates []int64, padding string) (filter_backprop tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "rates": rates, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "Dilation2DBackpropFilter",
+		Input: []tf.Input{
+			input, filter, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Converts the given variant tensor to an iterator and stores it in the given resource.
+//
+// Arguments:
+//	resource_handle: A handle to an iterator resource.
+//	serialized: A variant tensor storing the state of the iterator contained in the
+// resource.
+//
+// Returns the created operation.
+func DeserializeIterator(scope *Scope, resource_handle tf.Output, serialized tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DeserializeIterator",
+		Input: []tf.Input{
+			resource_handle, serialized,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes the gradient for the rsqrt of `x` wrt its input.
+//
+// Specifically, `grad = dy * -0.5 * y^3`, where `y = rsqrt(x)`, and `dy`
+// is the corresponding input gradient.
+func RsqrtGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "RsqrtGrad",
+		Input: []tf.Input{
+			y, dy,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MaxPoolWithArgmaxAttr is an optional argument to MaxPoolWithArgmax.
+type MaxPoolWithArgmaxAttr func(optionalAttr)
+
+// MaxPoolWithArgmaxTargmax sets the optional Targmax attribute to value.
+// If not specified, defaults to DT_INT64
+func MaxPoolWithArgmaxTargmax(value tf.DataType) MaxPoolWithArgmaxAttr {
+	return func(m optionalAttr) {
+		m["Targmax"] = value
+	}
+}
+
+// MaxPoolWithArgmaxIncludeBatchInIndex sets the optional include_batch_in_index attribute to value.
+//
+// value: Whether to include batch dimension in flattened index of `argmax`.
+// If not specified, defaults to false
+func MaxPoolWithArgmaxIncludeBatchInIndex(value bool) MaxPoolWithArgmaxAttr {
+	return func(m optionalAttr) {
+		m["include_batch_in_index"] = value
+	}
+}
+
+// Performs max pooling on the input and outputs both max values and indices.
+//
+// The indices in `argmax` are flattened, so that a maximum value at position
+// `[b, y, x, c]` becomes flattened index:
+// `(y * width + x) * channels + c` if `include_batch_in_index` is False;
+// `((b * height + y) * width + x) * channels + c` if `include_batch_in_index` is True.
+//
+// The indices returned are always in `[0, height) x [0, width)` before flattening,
+// even if padding is involved and the mathematically correct answer is outside
+// (either negative or too large).  This is a bug, but fixing it is difficult to do
+// in a safe backwards compatible way, especially due to flattening.
+//
+// Arguments:
+//	input: 4-D with shape `[batch, height, width, channels]`.  Input to pool over.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns:
+//	output: The max pooled output tensor.
+//	argmax: 4-D.  The flattened indices of the max values chosen for each output.
+func MaxPoolWithArgmax(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolWithArgmaxAttr) (output tf.Output, argmax tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MaxPoolWithArgmax",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// MaxPoolGradGradAttr is an optional argument to MaxPoolGradGrad.
+type MaxPoolGradGradAttr func(optionalAttr)
+
+// MaxPoolGradGradDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func MaxPoolGradGradDataFormat(value string) MaxPoolGradGradAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Computes second-order gradients of the maxpooling function.
+//
+// Arguments:
+//	orig_input: The original input tensor.
+//	orig_output: The original output tensor.
+//	grad: 4-D.  Gradients of gradients w.r.t. the input of `max_pool`.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns Gradients of gradients w.r.t. the input to `max_pool`.
+func MaxPoolGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradGradAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MaxPoolGradGrad",
+		Input: []tf.Input{
+			orig_input, orig_output, grad,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MaxPoolGradV2Attr is an optional argument to MaxPoolGradV2.
+type MaxPoolGradV2Attr func(optionalAttr)
+
+// MaxPoolGradV2DataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func MaxPoolGradV2DataFormat(value string) MaxPoolGradV2Attr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Computes gradients of the maxpooling function.
+//
+// Arguments:
+//	orig_input: The original input tensor.
+//	orig_output: The original output tensor.
+//	grad: 4-D.  Gradients w.r.t. the output of `max_pool`.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns Gradients w.r.t. the input to `max_pool`.
+func MaxPoolGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradV2Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MaxPoolGradV2",
+		Input: []tf.Input{
+			orig_input, orig_output, grad, ksize, strides,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Concats all tensors in the list along the 0th dimension.
+//
+// Requires that all tensors have the same shape except the first dimension.
+//
+// input_handle: The input list.
+// element_shape: The shape of the uninitialized elements in the list. If the first
+//   dimension is not -1, it is assumed that all list elements have the same
+//   leading dim.
+// leading_dims: The list of leading dims of uninitialized list elements. Used if
+//   the leading dim of input_handle.element_shape or the element_shape input arg
+//   is not already set.
+// tensor: The concated result.
+// lengths: Output tensor containing sizes of the 0th dimension of tensors in the list, used for computing the gradient.
+//
+func TensorListConcatV2(scope *Scope, input_handle tf.Output, element_shape tf.Output, leading_dims tf.Output, element_dtype tf.DataType) (tensor tf.Output, lengths tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"element_dtype": element_dtype}
+	opspec := tf.OpSpec{
+		Type: "TensorListConcatV2",
+		Input: []tf.Input{
+			input_handle, element_shape, leading_dims,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// MaxPoolV2Attr is an optional argument to MaxPoolV2.
+type MaxPoolV2Attr func(optionalAttr)
+
+// MaxPoolV2DataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func MaxPoolV2DataFormat(value string) MaxPoolV2Attr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Performs max pooling on the input.
+//
+// Arguments:
+//	input: 4-D input to pool over.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns The max pooled output tensor.
+func MaxPoolV2(scope *Scope, input tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolV2Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MaxPoolV2",
+		Input: []tf.Input{
+			input, ksize, strides,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SparseReduceSumAttr is an optional argument to SparseReduceSum.
+type SparseReduceSumAttr func(optionalAttr)
+
+// SparseReduceSumKeepDims sets the optional keep_dims attribute to value.
+//
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func SparseReduceSumKeepDims(value bool) SparseReduceSumAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// Computes the sum of elements across dimensions of a SparseTensor.
+//
+// This Op takes a SparseTensor and is the sparse counterpart to
+// `tf.reduce_sum()`.  In particular, this Op also returns a dense `Tensor`
+// instead of a sparse one.
+//
+// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
+// with length 1.
+//
+// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
+// with a single element is returned.  Additionally, the axes can be negative,
+// which are interpreted according to the indexing rules in Python.
+//
+// Arguments:
+//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+//	input_shape: 1-D.  Shape of the input SparseTensor.
+//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
+//
+// Returns `R-K`-D.  The reduced Tensor.
+func SparseReduceSum(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseReduceSum",
+		Input: []tf.Input{
+			input_indices, input_values, input_shape, reduction_axes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Compute the Hurwitz zeta function \\(\zeta(x, q)\\).
+//
+// The Hurwitz zeta function is defined as:
+//
+//
+// \\(\zeta(x, q) = \sum_{n=0}^{\infty} (q + n)^{-x}\\)
+func Zeta(scope *Scope, x tf.Output, q tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Zeta",
+		Input: []tf.Input{
+			x, q,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Deprecated. Use TensorArrayCloseV3
+//
+// DEPRECATED at GraphDef version 26: Use TensorArrayCloseV3
+//
+// Returns the created operation.
+func TensorArrayCloseV2(scope *Scope, handle tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayCloseV2",
+		Input: []tf.Input{
+			handle,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// AvgPool3DGradAttr is an optional argument to AvgPool3DGrad.
+type AvgPool3DGradAttr func(optionalAttr)
+
+// AvgPool3DGradDataFormat sets the optional data_format attribute to value.
+//
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func AvgPool3DGradDataFormat(value string) AvgPool3DGradAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Computes gradients of average pooling function.
+//
+// Arguments:
+//	orig_input_shape: The original input dimensions.
+//	grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
+//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
+// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+//
+// Returns The backprop for input.
+func AvgPool3DGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DGradAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "AvgPool3DGrad",
+		Input: []tf.Input{
+			orig_input_shape, grad,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates an all-zeros CSRSparseMatrix with shape `dense_shape`.
+//
+// Arguments:
+//	dense_shape: The desired matrix shape.
+//
+//
+// Returns An empty CSR matrix with shape `dense_shape`.
+func SparseMatrixZeros(scope *Scope, dense_shape tf.Output, type_ tf.DataType) (sparse_matrix tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"type": type_}
+	opspec := tf.OpSpec{
+		Type: "SparseMatrixZeros",
+		Input: []tf.Input{
+			dense_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Forwards `data` to the output port determined by `pred`.
+//
+// If `pred` is true, the `data` input is forwarded to `output_true`. Otherwise,
+// the data goes to `output_false`.
+//
+// See also `RefSwitch` and `Merge`.
+//
+// Arguments:
+//	data: The tensor to be forwarded to the appropriate output.
+//	pred: A scalar that specifies which output port will receive data.
+//
+// Returns:
+//	output_false: If `pred` is false, data will be forwarded to this output.
+//	output_true: If `pred` is true, data will be forwarded to this output.
+func Switch(scope *Scope, data tf.Output, pred tf.Output) (output_false tf.Output, output_true tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Switch",
+		Input: []tf.Input{
+			data, pred,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// UnicodeEncodeAttr is an optional argument to UnicodeEncode.
+type UnicodeEncodeAttr func(optionalAttr)
+
+// UnicodeEncodeErrors sets the optional errors attribute to value.
+//
+// value: Error handling policy when there is invalid formatting found in the input.
+// The value of 'strict' will cause the operation to produce a InvalidArgument
+// error on any invalid input formatting. A value of 'replace' (the default) will
+// cause the operation to replace any invalid formatting in the input with the
+// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
+// skip any invalid formatting in the input and produce no corresponding output
+// character.
+// If not specified, defaults to "replace"
+func UnicodeEncodeErrors(value string) UnicodeEncodeAttr {
+	return func(m optionalAttr) {
+		m["errors"] = value
+	}
+}
+
+// UnicodeEncodeReplacementChar sets the optional replacement_char attribute to value.
+//
+// value: The replacement character codepoint to be used in place of any invalid
+// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
+// be used. The default value is the default unicode replacement character is
+// 0xFFFD (U+65533).
+// If not specified, defaults to 65533
+func UnicodeEncodeReplacementChar(value int64) UnicodeEncodeAttr {
+	return func(m optionalAttr) {
+		m["replacement_char"] = value
+	}
+}
+
+// Encode a tensor of ints into unicode strings.
+//
+// Returns a vector of strings, where `output[i]` is constructed by encoding the
+// Unicode codepoints in `input_values[input_splits[i]:input_splits[i+1]]`
+// using `output_encoding`.
+//
+// ---
+//
+// Example:
+//
+// ```
+// input_values = [72, 101, 108, 108, 111, 87, 111, 114, 108, 100]
+// input_splits = [0, 5, 10]
+// output_encoding = 'UTF-8'
+//
+// output = ['Hello', 'World']
+// ```
+//
+// Arguments:
+//	input_values: A 1D tensor containing the unicode codepoints that should be encoded.
+//	input_splits: A 1D tensor specifying how the unicode codepoints should be split into strings.
+// In particular, `output[i]` is constructed by encoding the codepoints in the
+// slice `input_values[input_splits[i]:input_splits[i+1]]`.
+//	output_encoding: Unicode encoding of the output strings. Valid encodings are: `"UTF-8",
+// "UTF-16-BE", and "UTF-32-BE"`.
+//
+// Returns The 1-D Tensor of strings encoded from the provided unicode codepoints.
+func UnicodeEncode(scope *Scope, input_values tf.Output, input_splits tf.Output, output_encoding string, optional ...UnicodeEncodeAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_encoding": output_encoding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "UnicodeEncode",
+		Input: []tf.Input{
+			input_values, input_splits,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingFTRLParametersGradAccumDebug.
+type RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingFTRLParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingFTRLParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingFTRLParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingFTRLParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingFTRLParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingFTRLParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve FTRL embedding parameters with debug support.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the FTRL optimization algorithm.
+//	accumulators: Parameter accumulators updated by the FTRL optimization algorithm.
+//	linears: Parameter linears updated by the FTRL optimization algorithm.
+//	gradient_accumulators: Parameter gradient_accumulators updated by the FTRL optimization algorithm.
+func RetrieveTPUEmbeddingFTRLParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, linears tf.Output, gradient_accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingFTRLParametersGradAccumDebug",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// DepthwiseConv2dNativeAttr is an optional argument to DepthwiseConv2dNative.
+type DepthwiseConv2dNativeAttr func(optionalAttr)
+
+// DepthwiseConv2dNativeDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, height, width, channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, channels, height, width].
+// If not specified, defaults to "NHWC"
+func DepthwiseConv2dNativeDataFormat(value string) DepthwiseConv2dNativeAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// DepthwiseConv2dNativeDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to {i:1 i:1 i:1 i:1}
+func DepthwiseConv2dNativeDilations(value []int64) DepthwiseConv2dNativeAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors.
+//
+// Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
+// and a filter / kernel tensor of shape
+// `[filter_height, filter_width, in_channels, channel_multiplier]`, containing
+// `in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies
+// a different filter to each input channel (expanding from 1 channel to
+// `channel_multiplier` channels for each), then concatenates the results
+// together. Thus, the output has `in_channels * channel_multiplier` channels.
+//
+// ```
+// for k in 0..in_channels-1
+//   for q in 0..channel_multiplier-1
+//     output[b, i, j, k * channel_multiplier + q] =
+//       sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] *
+//                         filter[di, dj, k, q]
+// ```
+//
+// Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
+// horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
+//
+// Arguments:
+//
+//
+//	strides: 1-D of length 4.  The stride of the sliding window for each dimension
+// of `input`.
+//	padding: The type of padding algorithm to use.
+func DepthwiseConv2dNative(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DepthwiseConv2dNative",
+		Input: []tf.Input{
+			input, filter,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Conv3DBackpropInputAttr is an optional argument to Conv3DBackpropInput.
+type Conv3DBackpropInputAttr func(optionalAttr)
+
+// Conv3DBackpropInputDilations sets the optional dilations attribute to value.
+// If not specified, defaults to {i:1 i:1 i:1 i:1 i:1}
+func Conv3DBackpropInputDilations(value []int64) Conv3DBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of 3-D convolution with respect to the input.
+//
+// DEPRECATED at GraphDef version 10: Use Conv3DBackpropInputV2
+//
+// Arguments:
+//	input: Shape `[batch, depth, rows, cols, in_channels]`.
+//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
+// `in_channels` must match between `input` and `filter`.
+//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
+// out_channels]`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Conv3DBackpropInput",
+		Input: []tf.Input{
+			input, filter, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Compute the lower regularized incomplete Gamma function `P(a, x)`.
+//
+// The lower regularized incomplete Gamma function is defined as:
+//
+//
+// \\(P(a, x) = gamma(a, x) / Gamma(a) = 1 - Q(a, x)\\)
+//
+// where
+//
+// \\(gamma(a, x) = \\int_{0}^{x} t^{a-1} exp(-t) dt\\)
+//
+// is the lower incomplete Gamma function.
+//
+// Note, above `Q(a, x)` (`Igammac`) is the upper regularized complete
+// Gamma function.
+func Igamma(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Igamma",
+		Input: []tf.Input{
+			a, x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// StringSplitV2Attr is an optional argument to StringSplitV2.
+type StringSplitV2Attr func(optionalAttr)
+
+// StringSplitV2Maxsplit sets the optional maxsplit attribute to value.
+//
+// value: An `int`. If `maxsplit > 0`, limit of the split of the result.
+// If not specified, defaults to -1
+func StringSplitV2Maxsplit(value int64) StringSplitV2Attr {
+	return func(m optionalAttr) {
+		m["maxsplit"] = value
+	}
+}
+
+// Split elements of `source` based on `sep` into a `SparseTensor`.
+//
+// Let N be the size of source (typically N will be the batch size). Split each
+// element of `source` based on `sep` and return a `SparseTensor`
+// containing the split tokens. Empty tokens are ignored.
+//
+// For example, N = 2, source[0] is 'hello world' and source[1] is 'a b c',
+// then the output will be
+// ```
+// st.indices = [0, 0;
+//               0, 1;
+//               1, 0;
+//               1, 1;
+//               1, 2]
+// st.shape = [2, 3]
+// st.values = ['hello', 'world', 'a', 'b', 'c']
+// ```
+//
+// If `sep` is given, consecutive delimiters are not grouped together and are
+// deemed to delimit empty strings. For example, source of `"1<>2<><>3"` and
+// sep of `"<>"` returns `["1", "2", "", "3"]`. If `sep` is None or an empty
+// string, consecutive whitespace are regarded as a single separator, and the
+// result will contain no empty strings at the startor end if the string has
+// leading or trailing whitespace.
+//
+// Note that the above mentioned behavior matches python's str.split.
+//
+// Arguments:
+//	input: `1-D` string `Tensor`, the strings to split.
+//	sep: `0-D` string `Tensor`, the delimiter character.
+func StringSplitV2(scope *Scope, input tf.Output, sep tf.Output, optional ...StringSplitV2Attr) (indices tf.Output, values tf.Output, shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StringSplitV2",
+		Input: []tf.Input{
+			input, sep,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// UniqueWithCountsAttr is an optional argument to UniqueWithCounts.
+type UniqueWithCountsAttr func(optionalAttr)
+
+// UniqueWithCountsOutIdx sets the optional out_idx attribute to value.
+// If not specified, defaults to DT_INT32
+func UniqueWithCountsOutIdx(value tf.DataType) UniqueWithCountsAttr {
+	return func(m optionalAttr) {
+		m["out_idx"] = value
+	}
+}
+
+// Finds unique elements in a 1-D tensor.
+//
+// This operation returns a tensor `y` containing all of the unique elements of `x`
+// sorted in the same order that they occur in `x`. This operation also returns a
+// tensor `idx` the same size as `x` that contains the index of each value of `x`
+// in the unique output `y`. Finally, it returns a third tensor `count` that
+// contains the count of each element of `y` in `x`. In other words:
+//
+// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
+//
+// For example:
+//
+// ```
+// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
+// y, idx, count = unique_with_counts(x)
+// y ==> [1, 2, 4, 7, 8]
+// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
+// count ==> [2, 1, 3, 1, 2]
+// ```
+//
+// Arguments:
+//	x: 1-D.
+//
+// Returns:
+//	y: 1-D.
+//	idx: 1-D.
+//	count: 1-D.
+func UniqueWithCounts(scope *Scope, x tf.Output, optional ...UniqueWithCountsAttr) (y tf.Output, idx tf.Output, count tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "UniqueWithCounts",
+		Input: []tf.Input{
+			x,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// ResizeBicubicGradAttr is an optional argument to ResizeBicubicGrad.
+type ResizeBicubicGradAttr func(optionalAttr)
+
+// ResizeBicubicGradAlignCorners sets the optional align_corners attribute to value.
+//
+// value: If true, the centers of the 4 corner pixels of the input and grad tensors are
+// aligned. Defaults to false.
+// If not specified, defaults to false
+func ResizeBicubicGradAlignCorners(value bool) ResizeBicubicGradAttr {
+	return func(m optionalAttr) {
+		m["align_corners"] = value
+	}
+}
+
+// ResizeBicubicGradHalfPixelCenters sets the optional half_pixel_centers attribute to value.
+// If not specified, defaults to false
+func ResizeBicubicGradHalfPixelCenters(value bool) ResizeBicubicGradAttr {
+	return func(m optionalAttr) {
+		m["half_pixel_centers"] = value
+	}
+}
+
+// Computes the gradient of bicubic interpolation.
+//
+// Arguments:
+//	grads: 4-D with shape `[batch, height, width, channels]`.
+//	original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`,
+// The image tensor that was resized.
+//
+// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`.
+// Gradients with respect to the input image. Input image must have been
+// float or double.
+func ResizeBicubicGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBicubicGradAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResizeBicubicGrad",
+		Input: []tf.Input{
+			grads, original_image,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns a list which has the passed-in `Tensor` as last element and the other elements of the given list in `input_handle`.
+//
+// tensor: The tensor to put on the list.
+// input_handle: The old list.
+// output_handle: A list with the elements of the old list followed by tensor.
+// element_dtype: the type of elements in the list.
+// element_shape: a shape compatible with that of elements in the list.
+func TensorListPushBack(scope *Scope, input_handle tf.Output, tensor tf.Output) (output_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorListPushBack",
+		Input: []tf.Input{
+			input_handle, tensor,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns which elements of x are NaN.
+//
+// @compatibility(numpy)
+// Equivalent to np.isnan
+// @end_compatibility
+//
+// Example:
+//
+// ```python
+// x = tf.constant([5.0, np.nan, 6.8, np.nan, np.inf])
+// tf.math.is_nan(x) ==> [False, True, False, True, False]
+// ```
+func IsNan(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "IsNan",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Adds a value to the current value of a variable.
+//
+// Any ReadVariableOp with a control dependency on this op is guaranteed to
+// see the incremented value or a subsequent newer one.
+//
+// Arguments:
+//	resource: handle to the resource in which to store the variable.
+//	value: the value by which the variable will be incremented.
+//
+// Returns the created operation.
+func AssignAddVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "AssignAddVariableOp",
+		Input: []tf.Input{
+			resource, value,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// DepthwiseConv2dNativeBackpropInputAttr is an optional argument to DepthwiseConv2dNativeBackpropInput.
+type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr)
+
+// DepthwiseConv2dNativeBackpropInputDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, height, width, channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, channels, height, width].
+// If not specified, defaults to "NHWC"
+func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dNativeBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// DepthwiseConv2dNativeBackpropInputDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to {i:1 i:1 i:1 i:1}
+func DepthwiseConv2dNativeBackpropInputDilations(value []int64) DepthwiseConv2dNativeBackpropInputAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of depthwise convolution with respect to the input.
+//
+// Arguments:
+//	input_sizes: An integer vector representing the shape of `input`, based
+// on `data_format`.  For example, if `data_format` is 'NHWC' then
+//  `input` is a 4-D `[batch, height, width, channels]` tensor.
+//	filter: 4-D with shape
+// `[filter_height, filter_width, in_channels, depthwise_multiplier]`.
+//	out_backprop: 4-D with shape  based on `data_format`.
+// For example, if `data_format` is 'NHWC' then
+// out_backprop shape is `[batch, out_height, out_width, out_channels]`.
+// Gradients w.r.t. the output of the convolution.
+//	strides: The stride of the sliding window for each dimension of the input
+// of the convolution.
+//	padding: The type of padding algorithm to use.
+//
+// Returns 4-D with shape according to `data_format`.  For example, if
+// `data_format` is 'NHWC', output shape is `[batch, in_height,
+// in_width, in_channels]`.  Gradient w.r.t. the input of the
+// convolution.
+func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropInputAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DepthwiseConv2dNativeBackpropInput",
+		Input: []tf.Input{
+			input_sizes, filter, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Updates the table to associates keys with values.
+//
+// The tensor `keys` must be of the same type as the keys of the table.
+// The tensor `values` must be of the type of the table values.
+//
+// Arguments:
+//	table_handle: Handle to the table.
+//	keys: Any shape.  Keys to look up.
+//	values: Values to associate with keys.
+//
+// Returns the created operation.
+func LookupTableInsertV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "LookupTableInsertV2",
+		Input: []tf.Input{
+			table_handle, keys, values,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Component-wise multiplies a SparseTensor by a dense Tensor.
+//
+// The output locations corresponding to the implicitly zero elements in the sparse
+// tensor will be zero (i.e., will not take up storage space), regardless of the
+// contents of the dense tensor (even if it's +/-INF and that INF*0 == NaN).
+//
+// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not
+// the other direction.
+//
+// Arguments:
+//	sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
+//	sp_shape: 1-D.  Shape of the input SparseTensor.
+//	dense: `R`-D.  The dense Tensor operand.
+//
+// Returns 1-D.  The `N` values that are operated on.
+func SparseDenseCwiseMul(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseDenseCwiseMul",
+		Input: []tf.Input{
+			sp_indices, sp_values, sp_shape, dense,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Performs a padding as a preprocess during a convolution.
+//
+// Similar to FusedResizeAndPadConv2d, this op allows for an optimized
+// implementation where the spatial padding transformation stage is fused with the
+// im2col lookup, but in this case without the bilinear filtering required for
+// resizing. Fusing the padding prevents the need to write out the intermediate
+// results as whole tensors, reducing memory pressure, and we can get some latency
+// gains by merging the transformation calculations.
+// The data_format attribute for Conv2D isn't supported by this op, and 'NHWC'
+// order is used instead.
+// Internally this op uses a single per-graph scratch buffer, which means that it
+// will block if multiple versions are being run in parallel. This is because this
+// operator is primarily an optimization to minimize memory usage.
+//
+// Arguments:
+//	input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
+//	paddings: A two-column matrix specifying the padding sizes. The number of
+// rows must be the same as the rank of `input`.
+//	filter: 4-D with shape
+// `[filter_height, filter_width, in_channels, out_channels]`.
+//
+//	strides: 1-D of length 4.  The stride of the sliding window for each dimension
+// of `input`. Must be in the same order as the dimension specified with format.
+//	padding: The type of padding algorithm to use.
+func FusedPadConv2D(scope *Scope, input tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "FusedPadConv2D",
+		Input: []tf.Input{
+			input, paddings, filter,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the cardinality of `input_dataset`.
+//
+// Returns the cardinality of `input_dataset`.
+//
+// Arguments:
+//	input_dataset: A variant tensor representing the dataset to return cardinality for.
+//
+// Returns The cardinality of `input_dataset`. Named constants are used to represent
+// infinite and unknown cardinality.
+func DatasetCardinality(scope *Scope, input_dataset tf.Output) (cardinality tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DatasetCardinality",
+		Input: []tf.Input{
+			input_dataset,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that emits each dim-0 slice of `components` once.
+func TensorSliceDataset(scope *Scope, components []tf.Output, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "TensorSliceDataset",
+		Input: []tf.Input{
+			tf.OutputList(components),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingMDLAdagradLightParametersAttr is an optional argument to RetrieveTPUEmbeddingMDLAdagradLightParameters.
+type RetrieveTPUEmbeddingMDLAdagradLightParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingMDLAdagradLightParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingMDLAdagradLightParametersTableId(value int64) RetrieveTPUEmbeddingMDLAdagradLightParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingMDLAdagradLightParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingMDLAdagradLightParametersTableName(value string) RetrieveTPUEmbeddingMDLAdagradLightParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingMDLAdagradLightParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingMDLAdagradLightParametersConfig(value string) RetrieveTPUEmbeddingMDLAdagradLightParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve MDL Adagrad Light embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the MDL Adagrad Light optimization algorithm.
+//	accumulators: Parameter accumulators updated by the MDL Adagrad Light optimization algorithm.
+//	weights: Parameter weights updated by the MDL Adagrad Light optimization algorithm.
+//	benefits: Parameter benefits updated by the MDL Adagrad Light optimization algorithm.
+func RetrieveTPUEmbeddingMDLAdagradLightParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMDLAdagradLightParametersAttr) (parameters tf.Output, accumulators tf.Output, weights tf.Output, benefits tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingMDLAdagradLightParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// Adds sparse updates to the variable referenced by `resource`.
+//
+// This operation computes
+//
+//     # Scalar indices
+//     ref[indices, ...] += updates[...]
+//
+//     # Vector indices (for each i)
+//     ref[indices[i], ...] += updates[i, ...]
+//
+//     # High rank indices (for each i, ..., j)
+//     ref[indices[i, ..., j], ...] += updates[i, ..., j, ...]
+//
+// Duplicate entries are handled correctly: if multiple `indices` reference
+// the same location, their contributions add.
+//
+// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
+// </div>
+//
+// Arguments:
+//	resource: Should be from a `Variable` node.
+//	indices: A tensor of indices into the first dimension of `ref`.
+//	updates: A tensor of updated values to add to `ref`.
+//
+// Returns the created operation.
+func ResourceScatterAdd(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterAdd",
+		Input: []tf.Input{
+			resource, indices, updates,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// ConfigureDistributedTPUAttr is an optional argument to ConfigureDistributedTPU.
+type ConfigureDistributedTPUAttr func(optionalAttr)
+
+// ConfigureDistributedTPUEmbeddingConfig sets the optional embedding_config attribute to value.
+//
+// value: Reserved. Do not use.
+// If not specified, defaults to ""
+func ConfigureDistributedTPUEmbeddingConfig(value string) ConfigureDistributedTPUAttr {
+	return func(m optionalAttr) {
+		m["embedding_config"] = value
+	}
+}
+
+// ConfigureDistributedTPUTpuEmbeddingConfig sets the optional tpu_embedding_config attribute to value.
+//
+// value: Serialized tensorflow.tpu.TPUEmbeddingConfiguration that
+// describes the embedding lookups of the program.
+// If not specified, defaults to ""
+func ConfigureDistributedTPUTpuEmbeddingConfig(value string) ConfigureDistributedTPUAttr {
+	return func(m optionalAttr) {
+		m["tpu_embedding_config"] = value
+	}
+}
+
+// ConfigureDistributedTPUIsGlobalInit sets the optional is_global_init attribute to value.
+//
+// value: Reserved. Do not use.
+// If not specified, defaults to false
+func ConfigureDistributedTPUIsGlobalInit(value bool) ConfigureDistributedTPUAttr {
+	return func(m optionalAttr) {
+		m["is_global_init"] = value
+	}
+}
+
+// ConfigureDistributedTPUEnableWholeMeshCompilations sets the optional enable_whole_mesh_compilations attribute to value.
+// If not specified, defaults to false
+func ConfigureDistributedTPUEnableWholeMeshCompilations(value bool) ConfigureDistributedTPUAttr {
+	return func(m optionalAttr) {
+		m["enable_whole_mesh_compilations"] = value
+	}
+}
+
+// ConfigureDistributedTPUCompilationFailureClosesChips sets the optional compilation_failure_closes_chips attribute to value.
+// If not specified, defaults to true
+func ConfigureDistributedTPUCompilationFailureClosesChips(value bool) ConfigureDistributedTPUAttr {
+	return func(m optionalAttr) {
+		m["compilation_failure_closes_chips"] = value
+	}
+}
+
+// Sets up the centralized structures for a distributed TPU system.
+//
+// Returns A serialized tensorflow.tpu.TopologyProto that describes the TPU
+// topology.
+func ConfigureDistributedTPU(scope *Scope, optional ...ConfigureDistributedTPUAttr) (topology tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ConfigureDistributedTPU",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// BiasAddAttr is an optional argument to BiasAdd.
+type BiasAddAttr func(optionalAttr)
+
+// BiasAddDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the bias tensor will be added to the last dimension
+// of the value tensor.
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// The tensor will be added to "in_channels", the third-to-the-last
+//     dimension.
+// If not specified, defaults to "NHWC"
+func BiasAddDataFormat(value string) BiasAddAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Adds `bias` to `value`.
+//
+// This is a special case of `tf.add` where `bias` is restricted to be 1-D.
+// Broadcasting is supported, so `value` may have any number of dimensions.
+//
+// Arguments:
+//	value: Any number of dimensions.
+//	bias: 1-D with size the last dimension of `value`.
+//
+// Returns Broadcasted sum of `value` and `bias`.
+func BiasAdd(scope *Scope, value tf.Output, bias tf.Output, optional ...BiasAddAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "BiasAdd",
+		Input: []tf.Input{
+			value, bias,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the gradient for the inverse of `x` wrt its input.
+//
+// Specifically, `grad = -dy * y*y`, where `y = 1/x`, and `dy`
+// is the corresponding input gradient.
+func InvGrad(scope *Scope, y tf.Output, dy tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "InvGrad",
+		Input: []tf.Input{
+			y, dy,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Adds up a SparseTensor and a dense Tensor, using these special rules:
+//
+// (1) Broadcasts the dense side to have the same shape as the sparse side, if
+//     eligible;
+// (2) Then, only the dense values pointed to by the indices of the SparseTensor
+//     participate in the cwise addition.
+//
+// By these rules, the result is a logical SparseTensor with exactly the same
+// indices and shape, but possibly with different non-zero values.  The output of
+// this Op is the resultant non-zero values.
+//
+// Arguments:
+//	sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
+//	sp_shape: 1-D.  Shape of the input SparseTensor.
+//	dense: `R`-D.  The dense Tensor operand.
+//
+// Returns 1-D.  The `N` values that are operated on.
+func SparseDenseCwiseAdd(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseDenseCwiseAdd",
+		Input: []tf.Input{
+			sp_indices, sp_values, sp_shape, dense,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// EncodeBase64Attr is an optional argument to EncodeBase64.
+type EncodeBase64Attr func(optionalAttr)
+
+// EncodeBase64Pad sets the optional pad attribute to value.
+//
+// value: Bool whether padding is applied at the ends.
+// If not specified, defaults to false
+func EncodeBase64Pad(value bool) EncodeBase64Attr {
+	return func(m optionalAttr) {
+		m["pad"] = value
+	}
+}
+
+// Encode strings into web-safe base64 format.
+//
+// Refer to the following article for more information on base64 format:
+// en.wikipedia.org/wiki/Base64. Base64 strings may have padding with '=' at the
+// end so that the encoded has length multiple of 4. See Padding section of the
+// link above.
+//
+// Web-safe means that the encoder uses - and _ instead of + and /.
+//
+// Arguments:
+//	input: Strings to be encoded.
+//
+// Returns Input strings encoded in base64.
+func EncodeBase64(scope *Scope, input tf.Output, optional ...EncodeBase64Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "EncodeBase64",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// FakeQuantWithMinMaxArgsAttr is an optional argument to FakeQuantWithMinMaxArgs.
+type FakeQuantWithMinMaxArgsAttr func(optionalAttr)
+
+// FakeQuantWithMinMaxArgsMin sets the optional min attribute to value.
+// If not specified, defaults to -6
+func FakeQuantWithMinMaxArgsMin(value float32) FakeQuantWithMinMaxArgsAttr {
+	return func(m optionalAttr) {
+		m["min"] = value
+	}
+}
+
+// FakeQuantWithMinMaxArgsMax sets the optional max attribute to value.
+// If not specified, defaults to 6
+func FakeQuantWithMinMaxArgsMax(value float32) FakeQuantWithMinMaxArgsAttr {
+	return func(m optionalAttr) {
+		m["max"] = value
+	}
+}
+
+// FakeQuantWithMinMaxArgsNumBits sets the optional num_bits attribute to value.
+// If not specified, defaults to 8
+func FakeQuantWithMinMaxArgsNumBits(value int64) FakeQuantWithMinMaxArgsAttr {
+	return func(m optionalAttr) {
+		m["num_bits"] = value
+	}
+}
+
+// FakeQuantWithMinMaxArgsNarrowRange sets the optional narrow_range attribute to value.
+// If not specified, defaults to false
+func FakeQuantWithMinMaxArgsNarrowRange(value bool) FakeQuantWithMinMaxArgsAttr {
+	return func(m optionalAttr) {
+		m["narrow_range"] = value
+	}
+}
+
+// Fake-quantize the 'inputs' tensor, type float to 'outputs' tensor of same type.
+//
+// Attributes `[min; max]` define the clamping range for the `inputs` data.
+// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
+// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
+// then de-quantized and output as floats in `[min; max]` interval.
+// `num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive.
+//
+// Before quantization, `min` and `max` values are adjusted with the following
+// logic.
+// It is suggested to have `min <= 0 <= max`. If `0` is not in the range of values,
+// the behavior can be unexpected:
+// If `0 < min < max`: `min_adj = 0` and `max_adj = max - min`.
+// If `min < max < 0`: `min_adj = min - max` and `max_adj = 0`.
+// If `min <= 0 <= max`: `scale = (max - min) / (2^num_bits - 1) `,
+// `min_adj = scale * round(min / scale)` and `max_adj = max + min_adj - min`.
+//
+// Quantization is called fake since the output is still in floating point.
+func FakeQuantWithMinMaxArgs(scope *Scope, inputs tf.Output, optional ...FakeQuantWithMinMaxArgsAttr) (outputs tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "FakeQuantWithMinMaxArgs",
+		Input: []tf.Input{
+			inputs,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Batch normalization.
+//
+// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization()
+//
+// This op is deprecated. Prefer `tf.nn.batch_normalization`.
+//
+// Arguments:
+//	t: A 4D input Tensor.
+//	m: A 1D mean Tensor with size matching the last dimension of t.
+// This is the first output from tf.nn.moments,
+// or a saved moving average thereof.
+//	v: A 1D variance Tensor with size matching the last dimension of t.
+// This is the second output from tf.nn.moments,
+// or a saved moving average thereof.
+//	beta: A 1D beta Tensor with size matching the last dimension of t.
+// An offset to be added to the normalized tensor.
+//	gamma: A 1D gamma Tensor with size matching the last dimension of t.
+// If "scale_after_normalization" is true, this tensor will be multiplied
+// with the normalized tensor.
+//	variance_epsilon: A small float number to avoid dividing by 0.
+//	scale_after_normalization: A bool indicating whether the resulted tensor
+// needs to be multiplied with gamma.
+func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization}
+	opspec := tf.OpSpec{
+		Type: "BatchNormWithGlobalNormalization",
+		Input: []tf.Input{
+			t, m, v, beta, gamma,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// QuantizedConv2DAttr is an optional argument to QuantizedConv2D.
+type QuantizedConv2DAttr func(optionalAttr)
+
+// QuantizedConv2DOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_QINT32
+func QuantizedConv2DOutType(value tf.DataType) QuantizedConv2DAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// QuantizedConv2DDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each
+// filter element on that dimension. The dimension order is determined by the
+// value of `data_format`, see above for details. Dilations in the batch and
+// depth dimensions must be 1.
+// If not specified, defaults to {i:1 i:1 i:1 i:1}
+func QuantizedConv2DDilations(value []int64) QuantizedConv2DAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes a 2D convolution given quantized 4D input and filter tensors.
+//
+// The inputs are quantized tensors where the lowest value represents the real
+// number of the associated minimum, and the highest represents the maximum.
+// This means that you can only interpret the quantized output in the same way, by
+// taking the returned minimum and maximum values into account.
+//
+// Arguments:
+//
+//	filter: filter's input_depth dimension must match input's depth dimensions.
+//	min_input: The float value that the lowest quantized input value represents.
+//	max_input: The float value that the highest quantized input value represents.
+//	min_filter: The float value that the lowest quantized filter value represents.
+//	max_filter: The float value that the highest quantized filter value represents.
+//	strides: The stride of the sliding window for each dimension of the input
+// tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns:
+//	output
+//	min_output: The float value that the lowest quantized output value represents.
+//	max_output: The float value that the highest quantized output value represents.
+func QuantizedConv2D(scope *Scope, input tf.Output, filter tf.Output, min_input tf.Output, max_input tf.Output, min_filter tf.Output, max_filter tf.Output, strides []int64, padding string, optional ...QuantizedConv2DAttr) (output tf.Output, min_output tf.Output, max_output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizedConv2D",
+		Input: []tf.Input{
+			input, filter, min_input, max_input, min_filter, max_filter,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Computes rectified linear 6 gradients for a Relu6 operation.
+//
+// Arguments:
+//	gradients: The backpropagated gradients to the corresponding Relu6 operation.
+//	features: The features passed as input to the corresponding Relu6 operation, or
+// its output; using either one produces the same result.
+//
+// Returns The gradients:
+// `gradients * (features > 0) * (features < 6)`.
+func Relu6Grad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Relu6Grad",
+		Input: []tf.Input{
+			gradients, features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// StringSplitAttr is an optional argument to StringSplit.
+type StringSplitAttr func(optionalAttr)
+
+// StringSplitSkipEmpty sets the optional skip_empty attribute to value.
+//
+// value: A `bool`. If `True`, skip the empty strings from the result.
+// If not specified, defaults to true
+func StringSplitSkipEmpty(value bool) StringSplitAttr {
+	return func(m optionalAttr) {
+		m["skip_empty"] = value
+	}
+}
+
+// Split elements of `input` based on `delimiter` into a `SparseTensor`.
+//
+// Let N be the size of source (typically N will be the batch size). Split each
+// element of `input` based on `delimiter` and return a `SparseTensor`
+// containing the splitted tokens. Empty tokens are ignored.
+//
+// `delimiter` can be empty, or a string of split characters. If `delimiter` is an
+//  empty string, each element of `input` is split into individual single-byte
+//  character strings, including splitting of UTF-8 multibyte sequences. Otherwise
+//  every character of `delimiter` is a potential split point.
+//
+// For example:
+//   N = 2, input[0] is 'hello world' and input[1] is 'a b c', then the output
+//   will be
+//
+//   indices = [0, 0;
+//              0, 1;
+//              1, 0;
+//              1, 1;
+//              1, 2]
+//   shape = [2, 3]
+//   values = ['hello', 'world', 'a', 'b', 'c']
+//
+// Arguments:
+//	input: 1-D. Strings to split.
+//	delimiter: 0-D. Delimiter characters (bytes), or empty string.
+//
+// Returns:
+//	indices: A dense matrix of int64 representing the indices of the sparse tensor.
+//	values: A vector of strings corresponding to the splited values.
+//	shape: a length-2 vector of int64 representing the shape of the sparse
+// tensor, where the first value is N and the second value is the maximum number
+// of tokens in a single input entry.
+func StringSplit(scope *Scope, input tf.Output, delimiter tf.Output, optional ...StringSplitAttr) (indices tf.Output, values tf.Output, shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StringSplit",
+		Input: []tf.Input{
+			input, delimiter,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Assigns sparse updates to the variable referenced by `resource`.
+//
+// This operation computes
+//
+//     # Scalar indices
+//     ref[indices, ...] = updates[...]
+//
+//     # Vector indices (for each i)
+//     ref[indices[i], ...] = updates[i, ...]
+//
+//     # High rank indices (for each i, ..., j)
+//     ref[indices[i, ..., j], ...] = updates[i, ..., j, ...]
+//
+// Arguments:
+//	resource: Should be from a `Variable` node.
+//	indices: A tensor of indices into the first dimension of `ref`.
+//	updates: A tensor of updated values to add to `ref`.
+//
+// Returns the created operation.
+func ResourceScatterUpdate(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterUpdate",
+		Input: []tf.Input{
+			resource, indices, updates,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// EditDistanceAttr is an optional argument to EditDistance.
+type EditDistanceAttr func(optionalAttr)
+
+// EditDistanceNormalize sets the optional normalize attribute to value.
+//
+// value: boolean (if true, edit distances are normalized by length of truth).
+//
+// The output is:
+// If not specified, defaults to true
+func EditDistanceNormalize(value bool) EditDistanceAttr {
+	return func(m optionalAttr) {
+		m["normalize"] = value
+	}
+}
+
+// Computes the (possibly normalized) Levenshtein Edit Distance.
+//
+// The inputs are variable-length sequences provided by SparseTensors
+//   (hypothesis_indices, hypothesis_values, hypothesis_shape)
+// and
+//   (truth_indices, truth_values, truth_shape).
+//
+// The inputs are:
+//
+// Arguments:
+//	hypothesis_indices: The indices of the hypothesis list SparseTensor.
+// This is an N x R int64 matrix.
+//	hypothesis_values: The values of the hypothesis list SparseTensor.
+// This is an N-length vector.
+//	hypothesis_shape: The shape of the hypothesis list SparseTensor.
+// This is an R-length vector.
+//	truth_indices: The indices of the truth list SparseTensor.
+// This is an M x R int64 matrix.
+//	truth_values: The values of the truth list SparseTensor.
+// This is an M-length vector.
+//	truth_shape: truth indices, vector.
+//
+// Returns A dense float tensor with rank R - 1.
+//
+// For the example input:
+//
+//     // hypothesis represents a 2x1 matrix with variable-length values:
+//     //   (0,0) = ["a"]
+//     //   (1,0) = ["b"]
+//     hypothesis_indices = [[0, 0, 0],
+//                           [1, 0, 0]]
+//     hypothesis_values = ["a", "b"]
+//     hypothesis_shape = [2, 1, 1]
+//
+//     // truth represents a 2x2 matrix with variable-length values:
+//     //   (0,0) = []
+//     //   (0,1) = ["a"]
+//     //   (1,0) = ["b", "c"]
+//     //   (1,1) = ["a"]
+//     truth_indices = [[0, 1, 0],
+//                      [1, 0, 0],
+//                      [1, 0, 1],
+//                      [1, 1, 0]]
+//     truth_values = ["a", "b", "c", "a"]
+//     truth_shape = [2, 2, 2]
+//     normalize = true
+//
+// The output will be:
+//
+//     // output is a 2x2 matrix with edit distances normalized by truth lengths.
+//     output = [[inf, 1.0],  // (0,0): no truth, (0,1): no hypothesis
+//               [0.5, 1.0]]  // (1,0): addition, (1,1): no hypothesis
+func EditDistance(scope *Scope, hypothesis_indices tf.Output, hypothesis_values tf.Output, hypothesis_shape tf.Output, truth_indices tf.Output, truth_values tf.Output, truth_shape tf.Output, optional ...EditDistanceAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "EditDistance",
+		Input: []tf.Input{
+			hypothesis_indices, hypothesis_values, hypothesis_shape, truth_indices, truth_values, truth_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Concatenates a list of `N` tensors along the first dimension.
+//
+// The input tensors are all required to have size 1 in the first dimension.
+//
+// For example:
+//
+// ```
+// # 'x' is [[1, 4]]
+// # 'y' is [[2, 5]]
+// # 'z' is [[3, 6]]
+// parallel_concat([x, y, z]) => [[1, 4], [2, 5], [3, 6]]  # Pack along first dim.
+// ```
+//
+// The difference between concat and parallel_concat is that concat requires all
+// of the inputs be computed before the operation will begin but doesn't require
+// that the input shapes be known during graph construction.  Parallel concat
+// will copy pieces of the input into the output as they become available, in
+// some situations this can provide a performance benefit.
+//
+// Arguments:
+//	values: Tensors to be concatenated. All must have size 1 in the first dimension
+// and same shape.
+//	shape: the final shape of the result; should be equal to the shapes of any input
+// but with the number of input values in the first dimension.
+//
+// Returns The concatenated tensor.
+func ParallelConcat(scope *Scope, values []tf.Output, shape tf.Shape) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"shape": shape}
+	opspec := tf.OpSpec{
+		Type: "ParallelConcat",
+		Input: []tf.Input{
+			tf.OutputList(values),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// AvgPoolGradAttr is an optional argument to AvgPoolGrad.
+type AvgPoolGradAttr func(optionalAttr)
+
+// AvgPoolGradDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func AvgPoolGradDataFormat(value string) AvgPoolGradAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Computes gradients of the average pooling function.
+//
+// Arguments:
+//	orig_input_shape: 1-D.  Shape of the original input to `avg_pool`.
+//	grad: 4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t.
+// the output of `avg_pool`.
+//	ksize: The size of the sliding window for each dimension of the input.
+//	strides: The stride of the sliding window for each dimension of the input.
+//	padding: The type of padding algorithm to use.
+//
+// Returns 4-D.  Gradients w.r.t. the input of `avg_pool`.
+func AvgPoolGrad(scope *Scope, orig_input_shape tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPoolGradAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "AvgPoolGrad",
+		Input: []tf.Input{
+			orig_input_shape, grad,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadAndRemapMatrixAttr is an optional argument to LoadAndRemapMatrix.
+type LoadAndRemapMatrixAttr func(optionalAttr)
+
+// LoadAndRemapMatrixMaxRowsInMemory sets the optional max_rows_in_memory attribute to value.
+//
+// value: The maximum number of rows to load from the checkpoint at
+// once. If less than or equal to 0, the entire matrix will be loaded into
+// memory. Setting this arg trades increased disk reads for lower memory usage.
+// If not specified, defaults to -1
+func LoadAndRemapMatrixMaxRowsInMemory(value int64) LoadAndRemapMatrixAttr {
+	return func(m optionalAttr) {
+		m["max_rows_in_memory"] = value
+	}
+}
+
+// Loads a 2-D (matrix) `Tensor` with name `old_tensor_name` from the checkpoint
+//
+// at `ckpt_path` and potentially reorders its rows and columns using the
+// specified remappings.
+//
+// Most users should use one of the wrapper initializers (such as
+// `tf.contrib.framework.load_and_remap_matrix_initializer`) instead of this
+// function directly.
+//
+// The remappings are 1-D tensors with the following properties:
+//
+// * `row_remapping` must have exactly `num_rows` entries. Row `i` of the output
+//   matrix will be initialized from the row corresponding to index
+//   `row_remapping[i]` in the old `Tensor` from the checkpoint.
+// * `col_remapping` must have either 0 entries (indicating that no column
+//   reordering is needed) or `num_cols` entries. If specified, column `j` of the
+//   output matrix will be initialized from the column corresponding to index
+//   `col_remapping[j]` in the old `Tensor` from the checkpoint.
+// * A value of -1 in either of the remappings signifies a "missing" entry. In that
+//   case, values from the `initializing_values` tensor will be used to fill that
+//   missing row or column. If `row_remapping` has `r` missing entries and
+//   `col_remapping` has `c` missing entries, then the following condition must be
+//   true:
+//
+// `(r * num_cols) + (c * num_rows) - (r * c) == len(initializing_values)`
+//
+// The remapping tensors can be generated using the GenerateVocabRemapping op.
+//
+// As an example, with row_remapping = [1, 0, -1], col_remapping = [0, 2, -1],
+// initializing_values = [0.5, -0.5, 0.25, -0.25, 42], and w(i, j) representing
+// the value from row i, column j of the old tensor in the checkpoint, the output
+// matrix will look like the following:
+//
+// [[w(1, 0),  w(1, 2),  0.5],
+//  [w(0, 0),  w(0, 2), -0.5],
+//  [0.25,    -0.25,      42]]
+//
+// Arguments:
+//	ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`) from
+// which the old matrix `Tensor` will be loaded.
+//	old_tensor_name: Name of the 2-D `Tensor` to load from checkpoint.
+//	row_remapping: An int `Tensor` of row remappings (generally created by
+// `generate_vocab_remapping`).  Even if no row remapping is needed, this must
+// still be an index-valued Tensor (e.g. [0, 1, 2, ...]), or a shifted
+// index-valued `Tensor` (e.g. [8, 9, 10, ...], for partitioned `Variables`).
+//	col_remapping: An int `Tensor` of column remappings (generally created by
+// `generate_vocab_remapping`).  May be a size-0 `Tensor` if only row remapping
+// is to be done (e.g. column ordering is the same).
+//	initializing_values: A float `Tensor` containing  values to fill in for cells
+// in the output matrix that are not loaded from the checkpoint. Length must be
+// exactly the same as the number of missing / new cells.
+//	num_rows: Number of rows (length of the 1st dimension) in the output matrix.
+//	num_cols: Number of columns (length of the 2nd dimension) in the output matrix.
+//
+// Returns Output matrix containing existing values loaded from the
+// checkpoint, and with any missing values filled in from initializing_values.
+func LoadAndRemapMatrix(scope *Scope, ckpt_path tf.Output, old_tensor_name tf.Output, row_remapping tf.Output, col_remapping tf.Output, initializing_values tf.Output, num_rows int64, num_cols int64, optional ...LoadAndRemapMatrixAttr) (output_matrix tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_rows": num_rows, "num_cols": num_cols}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadAndRemapMatrix",
+		Input: []tf.Input{
+			ckpt_path, old_tensor_name, row_remapping, col_remapping, initializing_values,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Does nothing. Only useful as a placeholder for control edges.
+//
+// Returns the created operation.
+func NoOp(scope *Scope) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "NoOp",
+	}
+	return scope.AddOperation(opspec)
+}
+
+// ResourceSparseApplyRMSPropAttr is an optional argument to ResourceSparseApplyRMSProp.
+type ResourceSparseApplyRMSPropAttr func(optionalAttr)
+
+// ResourceSparseApplyRMSPropUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var, ms, and mom tensors is protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceSparseApplyRMSPropUseLocking(value bool) ResourceSparseApplyRMSPropAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the RMSProp algorithm.
+//
+// Note that in dense implementation of this algorithm, ms and mom will
+// update even if the grad is zero, but in this sparse implementation, ms
+// and mom will not update in iterations during which the grad is zero.
+//
+// mean_square = decay * mean_square + (1-decay) * gradient ** 2
+// Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
+//
+// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
+// var <- var - mom
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	ms: Should be from a Variable().
+//	mom: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	rho: Decay rate. Must be a scalar.
+//
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var, ms and mom.
+//
+// Returns the created operation.
+func ResourceSparseApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyRMSPropAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceSparseApplyRMSProp",
+		Input: []tf.Input{
+			var_, ms, mom, lr, rho, momentum, epsilon, grad, indices,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// StringToNumberAttr is an optional argument to StringToNumber.
+type StringToNumberAttr func(optionalAttr)
+
+// StringToNumberOutType sets the optional out_type attribute to value.
+//
+// value: The numeric type to interpret each string in `string_tensor` as.
+// If not specified, defaults to DT_FLOAT
+func StringToNumberOutType(value tf.DataType) StringToNumberAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Converts each string in the input Tensor to the specified numeric type.
+//
+// (Note that int32 overflow results in an error while float overflow
+// results in a rounded value.)
+//
+// Returns A Tensor of the same shape as the input `string_tensor`.
+func StringToNumber(scope *Scope, string_tensor tf.Output, optional ...StringToNumberAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StringToNumber",
+		Input: []tf.Input{
+			string_tensor,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Transforms a Tensor into a serialized TensorProto proto.
+//
+// Arguments:
+//	tensor: A Tensor of type `T`.
+//
+// Returns A serialized TensorProto proto of the input tensor.
+func SerializeTensor(scope *Scope, tensor tf.Output) (serialized tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SerializeTensor",
+		Input: []tf.Input{
+			tensor,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Transforms a serialized tensorflow.TensorProto proto into a Tensor.
+//
+// Arguments:
+//	serialized: A scalar string containing a serialized TensorProto proto.
+//	out_type: The type of the serialized tensor.  The provided type must match the
+// type of the serialized tensor and no implicit conversion will take place.
+//
+// Returns A Tensor of type `out_type`.
+func ParseTensor(scope *Scope, serialized tf.Output, out_type tf.DataType) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"out_type": out_type}
+	opspec := tf.OpSpec{
+		Type: "ParseTensor",
+		Input: []tf.Input{
+			serialized,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns element-wise integer closest to x.
+//
+// If the result is midway between two representable values,
+// the even representable is chosen.
+// For example:
+//
+// ```
+// rint(-1.5) ==> -2.0
+// rint(0.5000001) ==> 1.0
+// rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.]
+// ```
+func Rint(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Rint",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Reverses specific dimensions of a tensor.
+//
+// Given a `tensor`, and a `bool` tensor `dims` representing the dimensions
+// of `tensor`, this operation reverses each dimension i of `tensor` where
+// `dims[i]` is `True`.
+//
+// `tensor` can have up to 8 dimensions. The number of dimensions
+// of `tensor` must equal the number of elements in `dims`. In other words:
+//
+// `rank(tensor) = size(dims)`
+//
+// For example:
+//
+// ```
+// # tensor 't' is [[[[ 0,  1,  2,  3],
+// #                  [ 4,  5,  6,  7],
+// #                  [ 8,  9, 10, 11]],
+// #                 [[12, 13, 14, 15],
+// #                  [16, 17, 18, 19],
+// #                  [20, 21, 22, 23]]]]
+// # tensor 't' shape is [1, 2, 3, 4]
+//
+// # 'dims' is [False, False, False, True]
+// reverse(t, dims) ==> [[[[ 3,  2,  1,  0],
+//                         [ 7,  6,  5,  4],
+//                         [ 11, 10, 9, 8]],
+//                        [[15, 14, 13, 12],
+//                         [19, 18, 17, 16],
+//                         [23, 22, 21, 20]]]]
+//
+// # 'dims' is [False, True, False, False]
+// reverse(t, dims) ==> [[[[12, 13, 14, 15],
+//                         [16, 17, 18, 19],
+//                         [20, 21, 22, 23]
+//                        [[ 0,  1,  2,  3],
+//                         [ 4,  5,  6,  7],
+//                         [ 8,  9, 10, 11]]]]
+//
+// # 'dims' is [False, False, True, False]
+// reverse(t, dims) ==> [[[[8, 9, 10, 11],
+//                         [4, 5, 6, 7],
+//                         [0, 1, 2, 3]]
+//                        [[20, 21, 22, 23],
+//                         [16, 17, 18, 19],
+//                         [12, 13, 14, 15]]]]
+// ```
+//
+// Arguments:
+//	tensor: Up to 8-D.
+//	dims: 1-D. The dimensions to reverse.
+//
+// Returns The same shape as `tensor`.
+func Reverse(scope *Scope, tensor tf.Output, dims tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Reverse",
+		Input: []tf.Input{
+			tensor, dims,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Wraps an arbitrary MLIR computation expressed as a module with a main() function.
+//
+// This operation does not have an associated kernel and is not intended to be
+// executed in a regular TensorFlow session. Instead it is intended to be used for
+// testing or for special case where a user intends to pass custom MLIR computation
+// through a TensorFlow graph with the intent of having custom tooling processing
+// it downstream (when targeting a different environment, like TensorFlow lite for
+// example).
+// The MLIR module is expected to have a main() function that will be used as an
+// entry point. The inputs to the operations will be passed as argument to the
+// main() function and the returned values of the main function mapped to the
+// outputs.
+// Example usage:
+//
+// ```
+// import tensorflow as tf
+// from tensorflow.compiler.mlir.tensorflow.gen_mlir_passthrough_op import mlir_passthrough_op
+//
+// mlir_module = '''python
+// func @main(%arg0 : tensor<10xf32>, %arg1 : tensor<10xf32>) -> tensor<10x10xf32> {
+//    %add = "magic.op"(%arg0, %arg1) : (tensor<10xf32>, tensor<10xf32>) -> tensor<10x10xf32>
+//    return %ret : tensor<10x10xf32>
+// }
+// '''
+//
+// @tf.function
+// def foo(x, y):
+//   return = mlir_passthrough_op([x, y], mlir_module, Toutputs=[tf.float32])
+//
+// graph_def = foo.get_concrete_function(tf.TensorSpec([10], tf.float32), tf.TensorSpec([10], tf.float32)).graph.as_graph_def()
+// ```
+func MlirPassthroughOp(scope *Scope, inputs []tf.Output, mlir_module string, Toutputs []tf.DataType) (outputs []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"mlir_module": mlir_module, "Toutputs": Toutputs}
+	opspec := tf.OpSpec{
+		Type: "MlirPassthroughOp",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
+		scope.UpdateErr("MlirPassthroughOp", err)
+		return
+	}
+	return outputs
+}
+
+// ParseSequenceExampleV2Attr is an optional argument to ParseSequenceExampleV2.
+type ParseSequenceExampleV2Attr func(optionalAttr)
+
+// ParseSequenceExampleV2NcontextSparse sets the optional Ncontext_sparse attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func ParseSequenceExampleV2NcontextSparse(value int64) ParseSequenceExampleV2Attr {
+	return func(m optionalAttr) {
+		m["Ncontext_sparse"] = value
+	}
+}
+
+// ParseSequenceExampleV2ContextSparseTypes sets the optional context_sparse_types attribute to value.
+//
+// value: A list of Ncontext_sparse types; the data types of data in
+// each context Feature given in context_sparse_keys.
+// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
+// DT_INT64 (Int64List), and DT_STRING (BytesList).
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleV2ContextSparseTypes(value []tf.DataType) ParseSequenceExampleV2Attr {
+	return func(m optionalAttr) {
+		m["context_sparse_types"] = value
+	}
+}
+
+// ParseSequenceExampleV2ContextRaggedValueTypes sets the optional context_ragged_value_types attribute to value.
+//
+// value: RaggedTensor.value dtypes for the ragged context features.
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleV2ContextRaggedValueTypes(value []tf.DataType) ParseSequenceExampleV2Attr {
+	return func(m optionalAttr) {
+		m["context_ragged_value_types"] = value
+	}
+}
+
+// ParseSequenceExampleV2ContextRaggedSplitTypes sets the optional context_ragged_split_types attribute to value.
+//
+// value: RaggedTensor.row_split dtypes for the ragged context features.
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleV2ContextRaggedSplitTypes(value []tf.DataType) ParseSequenceExampleV2Attr {
+	return func(m optionalAttr) {
+		m["context_ragged_split_types"] = value
+	}
+}
+
+// ParseSequenceExampleV2ContextDenseShapes sets the optional context_dense_shapes attribute to value.
+//
+// value: A list of Ncontext_dense shapes; the shapes of data in
+// each context Feature given in context_dense_keys.
+// The number of elements in the Feature corresponding to context_dense_key[j]
+// must always equal context_dense_shapes[j].NumEntries().
+// The shape of context_dense_values[j] will match context_dense_shapes[j].
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleV2ContextDenseShapes(value []tf.Shape) ParseSequenceExampleV2Attr {
+	return func(m optionalAttr) {
+		m["context_dense_shapes"] = value
+	}
+}
+
+// ParseSequenceExampleV2NfeatureListSparse sets the optional Nfeature_list_sparse attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func ParseSequenceExampleV2NfeatureListSparse(value int64) ParseSequenceExampleV2Attr {
+	return func(m optionalAttr) {
+		m["Nfeature_list_sparse"] = value
+	}
+}
+
+// ParseSequenceExampleV2NfeatureListDense sets the optional Nfeature_list_dense attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func ParseSequenceExampleV2NfeatureListDense(value int64) ParseSequenceExampleV2Attr {
+	return func(m optionalAttr) {
+		m["Nfeature_list_dense"] = value
+	}
+}
+
+// ParseSequenceExampleV2FeatureListDenseTypes sets the optional feature_list_dense_types attribute to value.
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleV2FeatureListDenseTypes(value []tf.DataType) ParseSequenceExampleV2Attr {
+	return func(m optionalAttr) {
+		m["feature_list_dense_types"] = value
+	}
+}
+
+// ParseSequenceExampleV2FeatureListSparseTypes sets the optional feature_list_sparse_types attribute to value.
+//
+// value: A list of Nfeature_list_sparse types; the data types
+// of data in each FeatureList given in feature_list_sparse_keys.
+// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
+// DT_INT64 (Int64List), and DT_STRING (BytesList).
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleV2FeatureListSparseTypes(value []tf.DataType) ParseSequenceExampleV2Attr {
+	return func(m optionalAttr) {
+		m["feature_list_sparse_types"] = value
+	}
+}
+
+// ParseSequenceExampleV2FeatureListRaggedValueTypes sets the optional feature_list_ragged_value_types attribute to value.
+//
+// value: RaggedTensor.value dtypes for the ragged FeatureList features.
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleV2FeatureListRaggedValueTypes(value []tf.DataType) ParseSequenceExampleV2Attr {
+	return func(m optionalAttr) {
+		m["feature_list_ragged_value_types"] = value
+	}
+}
+
+// ParseSequenceExampleV2FeatureListRaggedSplitTypes sets the optional feature_list_ragged_split_types attribute to value.
+//
+// value: RaggedTensor.row_split dtypes for the ragged FeatureList features.
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleV2FeatureListRaggedSplitTypes(value []tf.DataType) ParseSequenceExampleV2Attr {
+	return func(m optionalAttr) {
+		m["feature_list_ragged_split_types"] = value
+	}
+}
+
+// ParseSequenceExampleV2FeatureListDenseShapes sets the optional feature_list_dense_shapes attribute to value.
+//
+// value: A list of Nfeature_list_dense shapes; the shapes of
+// data in each FeatureList given in feature_list_dense_keys.
+// The shape of each Feature in the FeatureList corresponding to
+// feature_list_dense_key[j] must always equal
+// feature_list_dense_shapes[j].NumEntries().
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleV2FeatureListDenseShapes(value []tf.Shape) ParseSequenceExampleV2Attr {
+	return func(m optionalAttr) {
+		m["feature_list_dense_shapes"] = value
+	}
+}
+
+// Transforms a vector of tf.io.SequenceExample protos (as strings) into
+// typed tensors.
+//
+// Arguments:
+//	serialized: A scalar or vector containing binary serialized SequenceExample protos.
+//	debug_name: A scalar or vector containing the names of the serialized protos.
+// May contain, for example, table key (descriptive) name for the
+// corresponding serialized proto.  This is purely useful for debugging
+// purposes, and the presence of values here has no effect on the output.
+// May also be an empty vector if no name is available.
+//	context_sparse_keys: The keys expected in the Examples' features associated with context_sparse
+// values.
+//	context_dense_keys: The keys expected in the SequenceExamples' context features associated with
+// dense values.
+//	context_ragged_keys: The keys expected in the Examples' features associated with context_ragged
+// values.
+//	feature_list_sparse_keys: The keys expected in the FeatureLists associated with sparse values.
+//	feature_list_dense_keys: The keys expected in the SequenceExamples' feature_lists associated
+// with lists of dense values.
+//	feature_list_ragged_keys: The keys expected in the FeatureLists associated with ragged values.
+//	feature_list_dense_missing_assumed_empty: A vector corresponding 1:1 with featue_list_dense_keys, indicating which
+// features may be missing from the SequenceExamples.  If the associated
+// FeatureList is missing, it is treated as empty.
+//	context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty).
+// context_dense_defaults[j] provides default values
+// when the SequenceExample's context map lacks context_dense_key[j].
+// If an empty Tensor is provided for context_dense_defaults[j],
+// then the Feature context_dense_keys[j] is required.
+// The input type is inferred from context_dense_defaults[j], even when it's
+// empty.  If context_dense_defaults[j] is not empty, its shape must match
+// context_dense_shapes[j].
+func ParseSequenceExampleV2(scope *Scope, serialized tf.Output, debug_name tf.Output, context_sparse_keys tf.Output, context_dense_keys tf.Output, context_ragged_keys tf.Output, feature_list_sparse_keys tf.Output, feature_list_dense_keys tf.Output, feature_list_ragged_keys tf.Output, feature_list_dense_missing_assumed_empty tf.Output, context_dense_defaults []tf.Output, optional ...ParseSequenceExampleV2Attr) (context_sparse_indices []tf.Output, context_sparse_values []tf.Output, context_sparse_shapes []tf.Output, context_dense_values []tf.Output, context_ragged_values []tf.Output, context_ragged_row_splits []tf.Output, feature_list_sparse_indices []tf.Output, feature_list_sparse_values []tf.Output, feature_list_sparse_shapes []tf.Output, feature_list_dense_values []tf.Output, feature_list_dense_lengths []tf.Output, feature_list_ragged_values []tf.Output, feature_list_ragged_outer_splits []tf.Output, feature_list_ragged_inner_splits []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ParseSequenceExampleV2",
+		Input: []tf.Input{
+			serialized, debug_name, context_sparse_keys, context_dense_keys, context_ragged_keys, feature_list_sparse_keys, feature_list_dense_keys, feature_list_ragged_keys, feature_list_dense_missing_assumed_empty, tf.OutputList(context_dense_defaults),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if context_sparse_indices, idx, err = makeOutputList(op, idx, "context_sparse_indices"); err != nil {
+		scope.UpdateErr("ParseSequenceExampleV2", err)
+		return
+	}
+	if context_sparse_values, idx, err = makeOutputList(op, idx, "context_sparse_values"); err != nil {
+		scope.UpdateErr("ParseSequenceExampleV2", err)
+		return
+	}
+	if context_sparse_shapes, idx, err = makeOutputList(op, idx, "context_sparse_shapes"); err != nil {
+		scope.UpdateErr("ParseSequenceExampleV2", err)
+		return
+	}
+	if context_dense_values, idx, err = makeOutputList(op, idx, "context_dense_values"); err != nil {
+		scope.UpdateErr("ParseSequenceExampleV2", err)
+		return
+	}
+	if context_ragged_values, idx, err = makeOutputList(op, idx, "context_ragged_values"); err != nil {
+		scope.UpdateErr("ParseSequenceExampleV2", err)
+		return
+	}
+	if context_ragged_row_splits, idx, err = makeOutputList(op, idx, "context_ragged_row_splits"); err != nil {
+		scope.UpdateErr("ParseSequenceExampleV2", err)
+		return
+	}
+	if feature_list_sparse_indices, idx, err = makeOutputList(op, idx, "feature_list_sparse_indices"); err != nil {
+		scope.UpdateErr("ParseSequenceExampleV2", err)
+		return
+	}
+	if feature_list_sparse_values, idx, err = makeOutputList(op, idx, "feature_list_sparse_values"); err != nil {
+		scope.UpdateErr("ParseSequenceExampleV2", err)
+		return
+	}
+	if feature_list_sparse_shapes, idx, err = makeOutputList(op, idx, "feature_list_sparse_shapes"); err != nil {
+		scope.UpdateErr("ParseSequenceExampleV2", err)
+		return
+	}
+	if feature_list_dense_values, idx, err = makeOutputList(op, idx, "feature_list_dense_values"); err != nil {
+		scope.UpdateErr("ParseSequenceExampleV2", err)
+		return
+	}
+	if feature_list_dense_lengths, idx, err = makeOutputList(op, idx, "feature_list_dense_lengths"); err != nil {
+		scope.UpdateErr("ParseSequenceExampleV2", err)
+		return
+	}
+	if feature_list_ragged_values, idx, err = makeOutputList(op, idx, "feature_list_ragged_values"); err != nil {
+		scope.UpdateErr("ParseSequenceExampleV2", err)
+		return
+	}
+	if feature_list_ragged_outer_splits, idx, err = makeOutputList(op, idx, "feature_list_ragged_outer_splits"); err != nil {
+		scope.UpdateErr("ParseSequenceExampleV2", err)
+		return
+	}
+	if feature_list_ragged_inner_splits, idx, err = makeOutputList(op, idx, "feature_list_ragged_inner_splits"); err != nil {
+		scope.UpdateErr("ParseSequenceExampleV2", err)
+		return
+	}
+	return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, context_ragged_values, context_ragged_row_splits, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values, feature_list_dense_lengths, feature_list_ragged_values, feature_list_ragged_outer_splits, feature_list_ragged_inner_splits
+}
+
+// Conv3DAttr is an optional argument to Conv3D.
+type Conv3DAttr func(optionalAttr)
+
+// Conv3DDataFormat sets the optional data_format attribute to value.
+//
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func Conv3DDataFormat(value string) Conv3DAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Conv3DDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 5.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each
+// filter element on that dimension. The dimension order is determined by the
+// value of `data_format`, see above for details. Dilations in the batch and
+// depth dimensions must be 1.
+// If not specified, defaults to {i:1 i:1 i:1 i:1 i:1}
+func Conv3DDilations(value []int64) Conv3DAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes a 3-D convolution given 5-D `input` and `filter` tensors.
+//
+// In signal processing, cross-correlation is a measure of similarity of
+// two waveforms as a function of a time-lag applied to one of them. This
+// is also known as a sliding dot product or sliding inner-product.
+//
+// Our Conv3D implements a form of cross-correlation.
+//
+// Arguments:
+//	input: Shape `[batch, in_depth, in_height, in_width, in_channels]`.
+//	filter: Shape `[filter_depth, filter_height, filter_width, in_channels,
+// out_channels]`. `in_channels` must match between `input` and `filter`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+func Conv3D(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...Conv3DAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Conv3D",
+		Input: []tf.Input{
+			input, filter,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Gives a guarantee to the TF runtime that the input tensor is a constant.
+//
+// The runtime is then free to make optimizations based on this.
+//
+// Only accepts value typed tensors as inputs and rejects resource variable handles
+// as input.
+//
+// Returns the input tensor without modification.
+func GuaranteeConst(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "GuaranteeConst",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Transforms a tf.Example proto (as a string) into typed tensors.
+//
+// Arguments:
+//	serialized: A vector containing a batch of binary serialized Example protos.
+//	dense_defaults: A list of Tensors (some may be empty), whose length matches
+// the length of `dense_keys`. dense_defaults[j] provides default values
+// when the example's feature_map lacks dense_key[j].  If an empty Tensor is
+// provided for dense_defaults[j], then the Feature dense_keys[j] is required.
+// The input type is inferred from dense_defaults[j], even when it's empty.
+// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined,
+// then the shape of dense_defaults[j] must match that of dense_shapes[j].
+// If dense_shapes[j] has an undefined major dimension (variable strides dense
+// feature), dense_defaults[j] must contain a single element:
+// the padding element.
+//	num_sparse: The number of sparse features to be parsed from the example. This
+// must match the lengths of `sparse_keys` and `sparse_types`.
+//	sparse_keys: A list of `num_sparse` strings.
+// The keys expected in the Examples' features associated with sparse values.
+//	dense_keys: The keys expected in the Examples' features associated with dense
+// values.
+//	sparse_types: A list of `num_sparse` types; the data types of data in each
+// Feature given in sparse_keys.
+// Currently the ParseSingleExample op supports DT_FLOAT (FloatList),
+// DT_INT64 (Int64List), and DT_STRING (BytesList).
+//	dense_shapes: The shapes of data in each Feature given in dense_keys.
+// The length of this list must match the length of `dense_keys`.  The
+// number of elements in the Feature corresponding to dense_key[j] must
+// always equal dense_shapes[j].NumEntries().  If dense_shapes[j] ==
+// (D0, D1, ..., DN) then the shape of output Tensor dense_values[j]
+// will be (D0, D1, ..., DN): In the case dense_shapes[j] = (-1, D1,
+// ..., DN), the shape of the output Tensor dense_values[j] will be (M,
+// D1, .., DN), where M is the number of blocks of elements of length
+// D1 * .... * DN, in the input.
+func ParseSingleExample(scope *Scope, serialized tf.Output, dense_defaults []tf.Output, num_sparse int64, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_sparse": num_sparse, "sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes}
+	opspec := tf.OpSpec{
+		Type: "ParseSingleExample",
+		Input: []tf.Input{
+			serialized, tf.OutputList(dense_defaults),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil {
+		scope.UpdateErr("ParseSingleExample", err)
+		return
+	}
+	if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil {
+		scope.UpdateErr("ParseSingleExample", err)
+		return
+	}
+	if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil {
+		scope.UpdateErr("ParseSingleExample", err)
+		return
+	}
+	if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil {
+		scope.UpdateErr("ParseSingleExample", err)
+		return
+	}
+	return sparse_indices, sparse_values, sparse_shapes, dense_values
+}
+
+// Scatter `updates` into a new tensor according to `indices`.
+//
+// Creates a new tensor by applying sparse `updates` to individual values or
+// slices within a tensor (initially zero for numeric, empty for string) of
+// the given `shape` according to indices.  This operator is the inverse of the
+// `tf.gather_nd` operator which extracts values or slices from a given tensor.
+//
+// This operation is similar to tensor_scatter_add, except that the tensor is
+// zero-initialized. Calling `tf.scatter_nd(indices, values, shape)` is identical
+// to `tensor_scatter_add(tf.zeros(shape, values.dtype), indices, values)`
+//
+// If `indices` contains duplicates, then their updates are accumulated (summed).
+//
+// **WARNING**: The order in which updates are applied is nondeterministic, so the
+// output will be nondeterministic if `indices` contains duplicates -- because
+// of some numerical approximation issues, numbers summed in different order
+// may yield different results.
+//
+// `indices` is an integer tensor containing indices into a new tensor of shape
+// `shape`.  The last dimension of `indices` can be at most the rank of `shape`:
+//
+//     indices.shape[-1] <= shape.rank
+//
+// The last dimension of `indices` corresponds to indices into elements
+// (if `indices.shape[-1] = shape.rank`) or slices
+// (if `indices.shape[-1] < shape.rank`) along dimension `indices.shape[-1]` of
+// `shape`.  `updates` is a tensor with shape
+//
+//     indices.shape[:-1] + shape[indices.shape[-1]:]
+//
+// The simplest form of scatter is to insert individual elements in a tensor by
+// index. For example, say we want to insert 4 scattered elements in a rank-1
+// tensor with 8 elements.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/ScatterNd1.png" alt>
+// </div>
+//
+// In Python, this scatter operation would look like this:
+//
+// ```python
+//     indices = tf.constant([[4], [3], [1], [7]])
+//     updates = tf.constant([9, 10, 11, 12])
+//     shape = tf.constant([8])
+//     scatter = tf.scatter_nd(indices, updates, shape)
+//     print(scatter)
+// ```
+//
+// The resulting tensor would look like this:
+//
+//     [0, 11, 0, 10, 9, 0, 0, 12]
+//
+// We can also, insert entire slices of a higher rank tensor all at once. For
+// example, if we wanted to insert two slices in the first dimension of a
+// rank-3 tensor with two matrices of new values.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/ScatterNd2.png" alt>
+// </div>
+//
+// In Python, this scatter operation would look like this:
+//
+// ```python
+//     indices = tf.constant([[0], [2]])
+//     updates = tf.constant([[[5, 5, 5, 5], [6, 6, 6, 6],
+//                             [7, 7, 7, 7], [8, 8, 8, 8]],
+//                            [[5, 5, 5, 5], [6, 6, 6, 6],
+//                             [7, 7, 7, 7], [8, 8, 8, 8]]])
+//     shape = tf.constant([4, 4, 4])
+//     scatter = tf.scatter_nd(indices, updates, shape)
+//     print(scatter)
+// ```
+//
+// The resulting tensor would look like this:
+//
+//     [[[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
+//      [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],
+//      [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]],
+//      [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]]
+//
+// Note that on CPU, if an out of bound index is found, an error is returned.
+// On GPU, if an out of bound index is found, the index is ignored.
+//
+// Arguments:
+//	indices: Index tensor.
+//	updates: Updates to scatter into output.
+//	shape: 1-D. The shape of the resulting tensor.
+//
+// Returns A new tensor with the given shape and updates applied according
+// to the indices.
+func ScatterNd(scope *Scope, indices tf.Output, updates tf.Output, shape tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ScatterNd",
+		Input: []tf.Input{
+			indices, updates, shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// UniqueAttr is an optional argument to Unique.
+type UniqueAttr func(optionalAttr)
+
+// UniqueOutIdx sets the optional out_idx attribute to value.
+// If not specified, defaults to DT_INT32
+func UniqueOutIdx(value tf.DataType) UniqueAttr {
+	return func(m optionalAttr) {
+		m["out_idx"] = value
+	}
+}
+
+// Finds unique elements in a 1-D tensor.
+//
+// This operation returns a tensor `y` containing all of the unique elements of `x`
+// sorted in the same order that they occur in `x`; `x` does not need to be sorted.
+// This operation also returns a tensor `idx` the same size as `x` that contains
+// the index of each value of `x` in the unique output `y`. In other words:
+//
+// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
+//
+// Examples:
+//
+// ```
+// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
+// y, idx = unique(x)
+// y ==> [1, 2, 4, 7, 8]
+// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
+// ```
+//
+// ```
+// # tensor 'x' is [4, 5, 1, 2, 3, 3, 4, 5]
+// y, idx = unique(x)
+// y ==> [4, 5, 1, 2, 3]
+// idx ==> [0, 1, 2, 3, 4, 4, 0, 1]
+// ```
+//
+// Arguments:
+//	x: 1-D.
+//
+// Returns:
+//	y: 1-D.
+//	idx: 1-D.
+func Unique(scope *Scope, x tf.Output, optional ...UniqueAttr) (y tf.Output, idx tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Unique",
+		Input: []tf.Input{
+			x,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Converts a `RaggedTensor` into a `SparseTensor` with the same values.
+//
+// input=ragged.from_nested_row_splits(rt_dense_values, rt_nested_splits)
+// output=SparseTensor(indices=sparse_indices, values=sparse_values,
+//                     dense_shape=sparse_dense_shape)
+//
+// Arguments:
+//	rt_nested_splits: The `row_splits` for the `RaggedTensor`.
+//	rt_dense_values: The `flat_values` for the `RaggedTensor`.
+//
+// Returns:
+//	sparse_indices: The indices for the `SparseTensor`.
+//	sparse_values: The values of the `SparseTensor`.
+//	sparse_dense_shape: `sparse_dense_shape` is a tight bounding box of the input `RaggedTensor`.
+func RaggedTensorToSparse(scope *Scope, rt_nested_splits []tf.Output, rt_dense_values tf.Output) (sparse_indices tf.Output, sparse_values tf.Output, sparse_dense_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "RaggedTensorToSparse",
+		Input: []tf.Input{
+			tf.OutputList(rt_nested_splits), rt_dense_values,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Returns the name of the device on which `resource` has been placed.
+func ExperimentalIteratorGetDevice(scope *Scope, resource tf.Output) (device tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalIteratorGetDevice",
+		Input: []tf.Input{
+			resource,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Records the bytes size of each element of `input_dataset` in a StatsAggregator.
+func ExperimentalBytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalBytesProducedStatsDataset",
+		Input: []tf.Input{
+			input_dataset, tag,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes exponential linear: `exp(features) - 1` if < 0, `features` otherwise.
+//
+// See [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)
+// ](http://arxiv.org/abs/1511.07289)
+func Elu(scope *Scope, features tf.Output) (activations tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Elu",
+		Input: []tf.Input{
+			features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// AddSparseToTensorsMapAttr is an optional argument to AddSparseToTensorsMap.
+type AddSparseToTensorsMapAttr func(optionalAttr)
+
+// AddSparseToTensorsMapContainer sets the optional container attribute to value.
+//
+// value: The container name for the `SparseTensorsMap` created by this op.
+// If not specified, defaults to ""
+func AddSparseToTensorsMapContainer(value string) AddSparseToTensorsMapAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// AddSparseToTensorsMapSharedName sets the optional shared_name attribute to value.
+//
+// value: The shared name for the `SparseTensorsMap` created by this op.
+// If blank, the new Operation's unique name is used.
+// If not specified, defaults to ""
+func AddSparseToTensorsMapSharedName(value string) AddSparseToTensorsMapAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Add a `SparseTensor` to a `SparseTensorsMap` return its handle.
+//
+// A `SparseTensor` is represented by three tensors: `sparse_indices`,
+// `sparse_values`, and `sparse_shape`.
+//
+// This operator takes the given `SparseTensor` and adds it to a container
+// object (a `SparseTensorsMap`).  A unique key within this container is generated
+// in the form of an `int64`, and this is the value that is returned.
+//
+// The `SparseTensor` can then be read out as part of a minibatch by passing
+// the key as a vector element to `TakeManySparseFromTensorsMap`.  To ensure
+// the correct `SparseTensorsMap` is accessed, ensure that the same
+// `container` and `shared_name` are passed to that Op.  If no `shared_name`
+// is provided here, instead use the *name* of the Operation created by calling
+// `AddSparseToTensorsMap` as the `shared_name` passed to
+// `TakeManySparseFromTensorsMap`.  Ensure the Operations are colocated.
+//
+// Arguments:
+//	sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
+//	sparse_values: 1-D.  The `values` of the `SparseTensor`.
+//	sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
+//
+// Returns 0-D.  The handle of the `SparseTensor` now stored in the
+// `SparseTensorsMap`.
+func AddSparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddSparseToTensorsMapAttr) (sparse_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "AddSparseToTensorsMap",
+		Input: []tf.Input{
+			sparse_indices, sparse_values, sparse_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Transforms a vector of tf.Example protos (as strings) into typed tensors.
+//
+// Arguments:
+//	serialized: A scalar or vector containing binary serialized Example protos.
+//	names: A tensor containing the names of the serialized protos.
+// Corresponds 1:1 with the `serialized` tensor.
+// May contain, for example, table key (descriptive) names for the
+// corresponding serialized protos.  These are purely useful for debugging
+// purposes, and the presence of values here has no effect on the output.
+// May also be an empty vector if no names are available.
+// If non-empty, this tensor must have the same shape as "serialized".
+//	sparse_keys: Vector of strings.
+// The keys expected in the Examples' features associated with sparse values.
+//	dense_keys: Vector of strings.
+// The keys expected in the Examples' features associated with dense values.
+//	ragged_keys: Vector of strings.
+// The keys expected in the Examples' features associated with ragged values.
+//	dense_defaults: A list of Tensors (some may be empty).  Corresponds 1:1 with `dense_keys`.
+// dense_defaults[j] provides default values
+// when the example's feature_map lacks dense_key[j].  If an empty Tensor is
+// provided for dense_defaults[j], then the Feature dense_keys[j] is required.
+// The input type is inferred from dense_defaults[j], even when it's empty.
+// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined,
+// then the shape of dense_defaults[j] must match that of dense_shapes[j].
+// If dense_shapes[j] has an undefined major dimension (variable strides dense
+// feature), dense_defaults[j] must contain a single element:
+// the padding element.
+//	num_sparse: The number of sparse keys.
+//	sparse_types: A list of `num_sparse` types; the data types of data in each Feature
+// given in sparse_keys.
+// Currently the ParseExample supports DT_FLOAT (FloatList),
+// DT_INT64 (Int64List), and DT_STRING (BytesList).
+//	ragged_value_types: A list of `num_ragged` types; the data types of data in each Feature
+// given in ragged_keys (where `num_ragged = sparse_keys.size()`).
+// Currently the ParseExample supports DT_FLOAT (FloatList),
+// DT_INT64 (Int64List), and DT_STRING (BytesList).
+//	ragged_split_types: A list of `num_ragged` types; the data types of row_splits in each Feature
+// given in ragged_keys (where `num_ragged = sparse_keys.size()`).
+// May be DT_INT32 or DT_INT64.
+//	dense_shapes: A list of `num_dense` shapes; the shapes of data in each Feature
+// given in dense_keys (where `num_dense = dense_keys.size()`).
+// The number of elements in the Feature corresponding to dense_key[j]
+// must always equal dense_shapes[j].NumEntries().
+// If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output
+// Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN):
+// The dense outputs are just the inputs row-stacked by batch.
+// This works for dense_shapes[j] = (-1, D1, ..., DN).  In this case
+// the shape of the output Tensor dense_values[j] will be
+// (|serialized|, M, D1, .., DN), where M is the maximum number of blocks
+// of elements of length D1 * .... * DN, across all minibatch entries
+// in the input.  Any minibatch entry with less than M blocks of elements of
+// length D1 * ... * DN will be padded with the corresponding default_value
+// scalar element along the second dimension.
+func ParseExampleV2(scope *Scope, serialized tf.Output, names tf.Output, sparse_keys tf.Output, dense_keys tf.Output, ragged_keys tf.Output, dense_defaults []tf.Output, num_sparse int64, sparse_types []tf.DataType, ragged_value_types []tf.DataType, ragged_split_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output, ragged_values []tf.Output, ragged_row_splits []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_sparse": num_sparse, "sparse_types": sparse_types, "ragged_value_types": ragged_value_types, "ragged_split_types": ragged_split_types, "dense_shapes": dense_shapes}
+	opspec := tf.OpSpec{
+		Type: "ParseExampleV2",
+		Input: []tf.Input{
+			serialized, names, sparse_keys, dense_keys, ragged_keys, tf.OutputList(dense_defaults),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil {
+		scope.UpdateErr("ParseExampleV2", err)
+		return
+	}
+	if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil {
+		scope.UpdateErr("ParseExampleV2", err)
+		return
+	}
+	if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil {
+		scope.UpdateErr("ParseExampleV2", err)
+		return
+	}
+	if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil {
+		scope.UpdateErr("ParseExampleV2", err)
+		return
+	}
+	if ragged_values, idx, err = makeOutputList(op, idx, "ragged_values"); err != nil {
+		scope.UpdateErr("ParseExampleV2", err)
+		return
+	}
+	if ragged_row_splits, idx, err = makeOutputList(op, idx, "ragged_row_splits"); err != nil {
+		scope.UpdateErr("ParseExampleV2", err)
+		return
+	}
+	return sparse_indices, sparse_values, sparse_shapes, dense_values, ragged_values, ragged_row_splits
+}
+
+// Saves input tensors slices to disk.
+//
+// This is like `Save` except that tensors can be listed in the saved file as being
+// a slice of a larger tensor.  `shapes_and_slices` specifies the shape of the
+// larger tensor and the slice that this tensor covers. `shapes_and_slices` must
+// have as many elements as `tensor_names`.
+//
+// Elements of the `shapes_and_slices` input must either be:
+//
+// *  The empty string, in which case the corresponding tensor is
+//    saved normally.
+// *  A string of the form `dim0 dim1 ... dimN-1 slice-spec` where the
+//    `dimI` are the dimensions of the larger tensor and `slice-spec`
+//    specifies what part is covered by the tensor to save.
+//
+// `slice-spec` itself is a `:`-separated list: `slice0:slice1:...:sliceN-1`
+// where each `sliceI` is either:
+//
+// *  The string `-` meaning that the slice covers all indices of this dimension
+// *  `start,length` where `start` and `length` are integers.  In that
+//    case the slice covers `length` indices starting at `start`.
+//
+// See also `Save`.
+//
+// Arguments:
+//	filename: Must have a single element. The name of the file to which we write the
+// tensor.
+//	tensor_names: Shape `[N]`. The names of the tensors to be saved.
+//	shapes_and_slices: Shape `[N]`.  The shapes and slice specifications to use when
+// saving the tensors.
+//	data: `N` tensors to save.
+//
+// Returns the created operation.
+func SaveSlices(scope *Scope, filename tf.Output, tensor_names tf.Output, shapes_and_slices tf.Output, data []tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SaveSlices",
+		Input: []tf.Input{
+			filename, tensor_names, shapes_and_slices, tf.OutputList(data),
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// AvgPool3DAttr is an optional argument to AvgPool3D.
+type AvgPool3DAttr func(optionalAttr)
+
+// AvgPool3DDataFormat sets the optional data_format attribute to value.
+//
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func AvgPool3DDataFormat(value string) AvgPool3DAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Performs 3D average pooling on the input.
+//
+// Arguments:
+//	input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over.
+//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
+// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+//
+// Returns The average pooled output tensor.
+func AvgPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...AvgPool3DAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "AvgPool3D",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// FusedBatchNormGradV3Attr is an optional argument to FusedBatchNormGradV3.
+type FusedBatchNormGradV3Attr func(optionalAttr)
+
+// FusedBatchNormGradV3Epsilon sets the optional epsilon attribute to value.
+//
+// value: A small float number added to the variance of x.
+// If not specified, defaults to 0.0001
+func FusedBatchNormGradV3Epsilon(value float32) FusedBatchNormGradV3Attr {
+	return func(m optionalAttr) {
+		m["epsilon"] = value
+	}
+}
+
+// FusedBatchNormGradV3DataFormat sets the optional data_format attribute to value.
+//
+// value: The data format for y_backprop, x, x_backprop.
+// Either "NHWC" (default) or "NCHW".
+// If not specified, defaults to "NHWC"
+func FusedBatchNormGradV3DataFormat(value string) FusedBatchNormGradV3Attr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// FusedBatchNormGradV3IsTraining sets the optional is_training attribute to value.
+//
+// value: A bool value to indicate the operation is for training (default)
+// or inference.
+// If not specified, defaults to true
+func FusedBatchNormGradV3IsTraining(value bool) FusedBatchNormGradV3Attr {
+	return func(m optionalAttr) {
+		m["is_training"] = value
+	}
+}
+
+// Gradient for batch normalization.
+//
+// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
+// The size of 1D Tensors matches the dimension C of the 4D Tensors.
+//
+// Arguments:
+//	y_backprop: A 4D Tensor for the gradient with respect to y.
+//	x: A 4D Tensor for input data.
+//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
+//	reserve_space_1: When is_training is True, a 1D Tensor for the computed batch
+// mean to be reused in gradient computation. When is_training is
+// False, a 1D Tensor for the population mean to be reused in both
+// 1st and 2nd order gradient computation.
+//	reserve_space_2: When is_training is True, a 1D Tensor for the computed batch
+// variance (inverted variance in the cuDNN case) to be reused in
+// gradient computation. When is_training is False, a 1D Tensor
+// for the population variance to be reused in both 1st and 2nd
+// order gradient computation.
+//	reserve_space_3: When is_training is True, a 1D Tensor for some intermediate results to be reused
+// in gradient computation. When is_training is False, a dummy empty Tensor will be
+// created.
+//
+// Returns:
+//	x_backprop: A 4D Tensor for the gradient with respect to x.
+//	scale_backprop: A 1D Tensor for the gradient with respect to scale.
+//	offset_backprop: A 1D Tensor for the gradient with respect to offset.
+//	reserve_space_4: Unused placeholder to match the mean input in FusedBatchNorm.
+//	reserve_space_5: Unused placeholder to match the variance input
+// in FusedBatchNorm.
+func FusedBatchNormGradV3(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, reserve_space_3 tf.Output, optional ...FusedBatchNormGradV3Attr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_4 tf.Output, reserve_space_5 tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "FusedBatchNormGradV3",
+		Input: []tf.Input{
+			y_backprop, x, scale, reserve_space_1, reserve_space_2, reserve_space_3,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+}
+
+// Gather ragged slices from `params` axis `0` according to `indices`.
+//
+// Outputs a `RaggedTensor` output composed from `output_dense_values` and
+// `output_nested_splits`, such that:
+//
+// ```python
+// output.shape = indices.shape + params.shape[1:]
+// output.ragged_rank = indices.shape.ndims + params.ragged_rank
+// output[i...j, d0...dn] = params[indices[i...j], d0...dn]
+// ```
+//
+// where
+//
+// * `params =
+//    ragged.from_nested_row_splits(params_dense_values, params_nested_splits)`
+//    provides the values that should be gathered.
+// * `indices` ia a dense tensor with dtype `int32` or `int64`, indicating which
+//    values should be gathered.
+// * `output =
+//    ragged.from_nested_row_splits(output_dense_values, output_nested_splits)`
+//    is the output tensor.
+//
+// (Note: This c++ op is used to implement the higher-level python
+// `tf.ragged.gather` op, which also supports ragged indices.)
+//
+//
+// Arguments:
+//	params_nested_splits: The `nested_row_splits` tensors that define the row-partitioning for the
+// `params` RaggedTensor input.
+//	params_dense_values: The `flat_values` for the `params` RaggedTensor. There was a terminology change
+// at the python level from dense_values to flat_values, so dense_values is the
+// deprecated name.
+//	indices: Indices in the outermost dimension of `params` of the values that should be
+// gathered.
+//	OUTPUT_RAGGED_RANK: The ragged rank of the output RaggedTensor. `output_nested_splits` will contain
+// this number of `row_splits` tensors. This value should equal
+// `indices.shape.ndims + params.ragged_rank - 1`.
+//
+// Returns:
+//	output_nested_splits: The `nested_row_splits` tensors that define the row-partitioning for the
+// returned RaggedTensor.
+//	output_dense_values: The `flat_values` for the returned RaggedTensor.
+func RaggedGather(scope *Scope, params_nested_splits []tf.Output, params_dense_values tf.Output, indices tf.Output, OUTPUT_RAGGED_RANK int64) (output_nested_splits []tf.Output, output_dense_values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"OUTPUT_RAGGED_RANK": OUTPUT_RAGGED_RANK}
+	opspec := tf.OpSpec{
+		Type: "RaggedGather",
+		Input: []tf.Input{
+			tf.OutputList(params_nested_splits), params_dense_values, indices,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output_nested_splits, idx, err = makeOutputList(op, idx, "output_nested_splits"); err != nil {
+		scope.UpdateErr("RaggedGather", err)
+		return
+	}
+	output_dense_values = op.Output(idx)
+	return output_nested_splits, output_dense_values
+}
+
+// QuantizeV2Attr is an optional argument to QuantizeV2.
+type QuantizeV2Attr func(optionalAttr)
+
+// QuantizeV2Mode sets the optional mode attribute to value.
+// If not specified, defaults to "MIN_COMBINED"
+func QuantizeV2Mode(value string) QuantizeV2Attr {
+	return func(m optionalAttr) {
+		m["mode"] = value
+	}
+}
+
+// QuantizeV2RoundMode sets the optional round_mode attribute to value.
+// If not specified, defaults to "HALF_AWAY_FROM_ZERO"
+func QuantizeV2RoundMode(value string) QuantizeV2Attr {
+	return func(m optionalAttr) {
+		m["round_mode"] = value
+	}
+}
+
+// QuantizeV2NarrowRange sets the optional narrow_range attribute to value.
+// If not specified, defaults to false
+func QuantizeV2NarrowRange(value bool) QuantizeV2Attr {
+	return func(m optionalAttr) {
+		m["narrow_range"] = value
+	}
+}
+
+// QuantizeV2Axis sets the optional axis attribute to value.
+// If not specified, defaults to -1
+func QuantizeV2Axis(value int64) QuantizeV2Attr {
+	return func(m optionalAttr) {
+		m["axis"] = value
+	}
+}
+
+// QuantizeV2EnsureMinimumRange sets the optional ensure_minimum_range attribute to value.
+// If not specified, defaults to 0.01
+func QuantizeV2EnsureMinimumRange(value float32) QuantizeV2Attr {
+	return func(m optionalAttr) {
+		m["ensure_minimum_range"] = value
+	}
+}
+
+// Quantize the 'input' tensor of type float to 'output' tensor of type 'T'.
+//
+// [min_range, max_range] are scalar floats that specify the range for
+// the 'input' data. The 'mode' attribute controls exactly which calculations are
+// used to convert the float values to their quantized equivalents.  The
+// 'round_mode' attribute controls which rounding tie-breaking algorithm is used
+// when rounding float values to their quantized equivalents.
+//
+// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
+//
+// ```
+// out[i] = (in[i] - min_range) * range(T) / (max_range - min_range)
+// if T == qint8: out[i] -= (range(T) + 1) / 2.0
+// ```
+//
+// here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
+//
+// *MIN_COMBINED Mode Example*
+//
+// Assume the input is type float and has a possible range of [0.0, 6.0] and the
+// output type is quint8 ([0, 255]). The min_range and max_range values should be
+// specified as 0.0 and 6.0. Quantizing from float to quint8 will multiply each
+// value of the input by 255/6 and cast to quint8.
+//
+// If the output type was qint8 ([-128, 127]), the operation will additionally
+// subtract each value by 128 prior to casting, so that the range of values aligns
+// with the range of qint8.
+//
+// If the mode is 'MIN_FIRST', then this approach is used:
+//
+// ```
+// num_discrete_values = 1 << (# of bits in T)
+// range_adjust = num_discrete_values / (num_discrete_values - 1)
+// range = (range_max - range_min) * range_adjust
+// range_scale = num_discrete_values / range
+// quantized = round(input * range_scale) - round(range_min * range_scale) +
+//   numeric_limits<T>::min()
+// quantized = max(quantized, numeric_limits<T>::min())
+// quantized = min(quantized, numeric_limits<T>::max())
+// ```
+//
+// The biggest difference between this and MIN_COMBINED is that the minimum range
+// is rounded first, before it's subtracted from the rounded value. With
+// MIN_COMBINED, a small bias is introduced where repeated iterations of quantizing
+// and dequantizing will introduce a larger and larger error.
+//
+// *SCALED mode Example*
+//
+// `SCALED` mode matches the quantization approach used in
+// `QuantizeAndDequantize{V2|V3}`.
+//
+// If the mode is `SCALED`, the quantization is performed by multiplying each
+// input value by a scaling_factor.
+// The scaling_factor is determined from `min_range` and `max_range` to be as large
+// as possible such that the range from `min_range` to `max_range` is representable
+// within values of type T.
+//
+// ```c++
+//
+//   const int min_T = std::numeric_limits<T>::min();
+//   const int max_T = std::numeric_limits<T>::max();
+//   const float max_float = std::numeric_limits<float>::max();
+//
+//   const float scale_factor_from_min_side =
+//       (min_T * min_range > 0) ? min_T / min_range : max_float;
+//   const float scale_factor_from_max_side =
+//       (max_T * max_range > 0) ? max_T / max_range : max_float;
+//
+//   const float scale_factor = std::min(scale_factor_from_min_side,
+//                                       scale_factor_from_max_side);
+// ```
+//
+// We next use the scale_factor to adjust min_range and max_range as follows:
+//
+// ```c++
+//       min_range = min_T / scale_factor;
+//       max_range = max_T / scale_factor;
+// ```
+//
+//
+// e.g. if T = qint8, and initially min_range = -10, and max_range = 9, we would
+// compare -128/-10.0 = 12.8 to 127/9.0 = 14.11, and set scaling_factor = 12.8
+// In this case, min_range would remain -10, but max_range would be adjusted to
+// 127 / 12.8 = 9.921875
+//
+// So we will quantize input values in the range (-10, 9.921875) to (-128, 127).
+//
+// The input tensor can now be quantized by clipping values to the range
+// `min_range` to `max_range`, then multiplying by scale_factor as follows:
+//
+// ```c++
+// result = round(min(max_range, max(min_range, input)) * scale_factor)
+// ```
+//
+// The adjusted `min_range` and `max_range` are returned as outputs 2 and 3 of
+// this operation. These outputs should be used as the range for any further
+// calculations.
+//
+//
+// *narrow_range (bool) attribute*
+//
+// If true, we do not use the minimum quantized value.
+// i.e. for int8 the quantized output, it would be restricted to the range
+// -127..127 instead of the full -128..127 range.
+// This is provided for compatibility with certain inference backends.
+// (Only applies to SCALED mode)
+//
+//
+// *axis (int) attribute*
+//
+// An optional `axis` attribute can specify a dimension index of the input tensor,
+// such that quantization ranges will be calculated and applied separately for each
+// slice of the tensor along that dimension. This is useful for per-channel
+// quantization.
+//
+// If axis is specified, min_range and max_range
+//
+// if `axis`=None, per-tensor quantization is performed as normal.
+//
+//
+// *ensure_minimum_range (float) attribute*
+//
+// Ensures the minimum quantization range is at least this value.
+// The legacy default value for this is 0.01, but it is strongly suggested to
+// set it to 0 for new uses.
+//
+//
+// Arguments:
+//
+//	min_range: The minimum value of the quantization range. This value may be adjusted by the
+// op depending on other parameters. The adjusted value is written to `output_min`.
+// If the `axis` attribute is specified, this must be a 1-D tensor whose size
+// matches the `axis` dimension of the input and output tensors.
+//	max_range: The maximum value of the quantization range. This value may be adjusted by the
+// op depending on other parameters. The adjusted value is written to `output_max`.
+// If the `axis` attribute is specified, this must be a 1-D tensor whose size
+// matches the `axis` dimension of the input and output tensors.
+//
+//
+// Returns:
+//	output: The quantized data produced from the float input.
+//	output_min: The final quantization range minimum, used to clip input values before scaling
+// and rounding them to quantized values.
+// If the `axis` attribute is specified, this will be a 1-D tensor whose size
+// matches the `axis` dimension of the input and output tensors.
+//	output_max: The final quantization range maximum, used to clip input values before scaling
+// and rounding them to quantized values.
+// If the `axis` attribute is specified, this will be a 1-D tensor whose size
+// matches the `axis` dimension of the input and output tensors.
+func QuantizeV2(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, T tf.DataType, optional ...QuantizeV2Attr) (output tf.Output, output_min tf.Output, output_max tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"T": T}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizeV2",
+		Input: []tf.Input{
+			input, min_range, max_range,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Returns the truth value of (x >= y) element-wise.
+//
+// *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+//
+// Example:
+//
+// ```python
+// x = tf.constant([5, 4, 6, 7])
+// y = tf.constant([5, 2, 5, 10])
+// tf.math.greater_equal(x, y) ==> [True, True, True, False]
+//
+// x = tf.constant([5, 4, 6, 7])
+// y = tf.constant([5])
+// tf.math.greater_equal(x, y) ==> [True, False, True, True]
+// ```
+func GreaterEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "GreaterEqual",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// BatchAttr is an optional argument to Batch.
+type BatchAttr func(optionalAttr)
+
+// BatchMaxEnqueuedBatches sets the optional max_enqueued_batches attribute to value.
+// If not specified, defaults to 10
+func BatchMaxEnqueuedBatches(value int64) BatchAttr {
+	return func(m optionalAttr) {
+		m["max_enqueued_batches"] = value
+	}
+}
+
+// BatchAllowedBatchSizes sets the optional allowed_batch_sizes attribute to value.
+// If not specified, defaults to {}
+func BatchAllowedBatchSizes(value []int64) BatchAttr {
+	return func(m optionalAttr) {
+		m["allowed_batch_sizes"] = value
+	}
+}
+
+// BatchContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func BatchContainer(value string) BatchAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// BatchSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func BatchSharedName(value string) BatchAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// BatchBatchingQueue sets the optional batching_queue attribute to value.
+// If not specified, defaults to ""
+func BatchBatchingQueue(value string) BatchAttr {
+	return func(m optionalAttr) {
+		m["batching_queue"] = value
+	}
+}
+
+// Batches all input tensors nondeterministically.
+//
+// When many instances of this Op are being run concurrently with the same
+// container/shared_name in the same device, some will output zero-shaped Tensors
+// and others will output Tensors of size up to max_batch_size.
+//
+// All Tensors in in_tensors are batched together (so, for example, labels and
+// features should be batched with a single instance of this operation.
+//
+// Each invocation of batch emits an `id` scalar which will be used to identify
+// this particular invocation when doing unbatch or its gradient.
+//
+// Each op which emits a non-empty batch will also emit a non-empty batch_index
+// Tensor, which, is a [K, 3] matrix where each row contains the invocation's id,
+// start, and length of elements of each set of Tensors present in batched_tensors.
+//
+// Batched tensors are concatenated along the first dimension, and all tensors in
+// in_tensors must have the first dimension of the same size.
+//
+// in_tensors: The tensors to be batched.
+// num_batch_threads: Number of scheduling threads for processing batches of work.
+//  Determines the number of batches processed in parallel.
+// max_batch_size: Batch sizes will never be bigger than this.
+// batch_timeout_micros: Maximum number of microseconds to wait before outputting
+//  an incomplete batch.
+// allowed_batch_sizes: Optional list of allowed batch sizes. If left empty, does
+//  nothing. Otherwise, supplies a list of batch sizes, causing the op to pad
+//  batches up to one of those sizes. The entries must increase monotonically, and
+//  the final entry must equal max_batch_size.
+// grad_timeout_micros: The timeout to use for the gradient. See Unbatch.
+// batched_tensors: Either empty tensors or a batch of concatenated Tensors.
+// batch_index: If out_tensors is non-empty, has information to invert it.
+// container: Controls the scope of sharing of this batch.
+// id: always contains a scalar with a unique ID for this invocation of Batch.
+// shared_name: Concurrently running instances of batch in the same device with the
+//  same container and shared_name will batch their elements together. If left
+//  empty, the op name will be used as the shared name.
+// T: the types of tensors to be batched.
+func Batch(scope *Scope, in_tensors []tf.Output, num_batch_threads int64, max_batch_size int64, batch_timeout_micros int64, grad_timeout_micros int64, optional ...BatchAttr) (batched_tensors []tf.Output, batch_index tf.Output, id tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_batch_threads": num_batch_threads, "max_batch_size": max_batch_size, "batch_timeout_micros": batch_timeout_micros, "grad_timeout_micros": grad_timeout_micros}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Batch",
+		Input: []tf.Input{
+			tf.OutputList(in_tensors),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if batched_tensors, idx, err = makeOutputList(op, idx, "batched_tensors"); err != nil {
+		scope.UpdateErr("Batch", err)
+		return
+	}
+	batch_index = op.Output(idx)
+	id = op.Output(idx)
+	return batched_tensors, batch_index, id
+}
+
+// UnicodeDecodeAttr is an optional argument to UnicodeDecode.
+type UnicodeDecodeAttr func(optionalAttr)
+
+// UnicodeDecodeErrors sets the optional errors attribute to value.
+//
+// value: Error handling policy when there is invalid formatting found in the input.
+// The value of 'strict' will cause the operation to produce a InvalidArgument
+// error on any invalid input formatting. A value of 'replace' (the default) will
+// cause the operation to replace any invalid formatting in the input with the
+// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
+// skip any invalid formatting in the input and produce no corresponding output
+// character.
+// If not specified, defaults to "replace"
+func UnicodeDecodeErrors(value string) UnicodeDecodeAttr {
+	return func(m optionalAttr) {
+		m["errors"] = value
+	}
+}
+
+// UnicodeDecodeReplacementChar sets the optional replacement_char attribute to value.
+//
+// value: The replacement character codepoint to be used in place of any invalid
+// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
+// be used. The default value is the default unicode replacement character is
+// 0xFFFD or U+65533.)
+// If not specified, defaults to 65533
+func UnicodeDecodeReplacementChar(value int64) UnicodeDecodeAttr {
+	return func(m optionalAttr) {
+		m["replacement_char"] = value
+	}
+}
+
+// UnicodeDecodeReplaceControlCharacters sets the optional replace_control_characters attribute to value.
+//
+// value: Whether to replace the C0 control characters (00-1F) with the
+// `replacement_char`. Default is false.
+// If not specified, defaults to false
+func UnicodeDecodeReplaceControlCharacters(value bool) UnicodeDecodeAttr {
+	return func(m optionalAttr) {
+		m["replace_control_characters"] = value
+	}
+}
+
+// UnicodeDecodeTsplits sets the optional Tsplits attribute to value.
+// If not specified, defaults to DT_INT64
+func UnicodeDecodeTsplits(value tf.DataType) UnicodeDecodeAttr {
+	return func(m optionalAttr) {
+		m["Tsplits"] = value
+	}
+}
+
+// Decodes each string in `input` into a sequence of Unicode code points.
+//
+// The character codepoints for all strings are returned using a single vector
+// `char_values`, with strings expanded to characters in row-major order.
+//
+// The `row_splits` tensor indicates where the codepoints for
+// each input string begin and end within the `char_values` tensor.
+// In particular, the values for the `i`th
+// string (in row-major order) are stored in the slice
+// `[row_splits[i]:row_splits[i+1]]`. Thus:
+//
+// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th
+//   character in the `i`th string (in row-major order).
+// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th
+//   string (in row-major order).
+//
+// Arguments:
+//	input: The text to be decoded. Can have any shape. Note that the output is flattened
+// to a vector of char values.
+//	input_encoding: Text encoding of the input strings. This is any of the encodings supported
+// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
+//
+// Returns:
+//	row_splits: A 1D int32 tensor containing the row splits.
+//	char_values: A 1D int32 Tensor containing the decoded codepoints.
+func UnicodeDecode(scope *Scope, input tf.Output, input_encoding string, optional ...UnicodeDecodeAttr) (row_splits tf.Output, char_values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"input_encoding": input_encoding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "UnicodeDecode",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Create a dense tensor from a ragged tensor, possibly altering its shape.
+//
+// The `ragged_to_dense` op creates a dense tensor from a list of row partition
+// tensors, a value vector, and default values. If the shape is unspecified, the
+// minimal shape required to contain all the elements in the ragged tensor (the
+// natural shape) will be used. If some dimensions are left unspecified, then the
+// size of the natural shape is used in that dimension.
+//
+// The default_value will be broadcast to the output shape. After that, the values
+// from the ragged tensor overwrite the default values. Note that the default_value
+// must have less dimensions than the value.
+//
+// The row partition tensors are in the order of the dimensions.
+// At present, the types can be:
+// * "ROW_SPLITS": the row_splits tensor from the ragged tensor.
+// * "VALUE_ROWIDS": the value_rowids tensor from the ragged tensor.
+// * "FIRST_DIM_SIZE": if value_rowids is used for the first dimension, then it
+//   is preceded by "FIRST_DIM_SIZE".
+//
+// Arguments:
+//	shape: The desired shape of the the output tensor. If left unspecified (empty),
+// the minimal shape required to contain all the elements in the ragged tensor
+// (the natural shape) will be used. If some dimensions are left unspecified, then
+// the size of the natural shape is used in that dimension.
+//
+// Note that dense dimensions cannot be modified by the shape argument. Trying to
+// change the size of a dense dimension will cause the op to fail.
+// Examples:
+// natural shape: [4, 5, 6]
+// shape: -1
+// output shape: [4, 5, 6]
+//
+// natural shape: [4, 5, 6]
+// shape: [3, -1, 2]
+// output shape: [3, 5, 2]
+//
+// natural shape: [4, 5, 6]
+// shape: [3, 7, 2]
+// output shape: [3, 7, 2]
+//
+//	values: A 1D tensor representing the values of the ragged tensor.
+//	default_value: The default_value when the shape is larger than the ragged tensor. The
+// default_value is broadcast until it is the shape of the output tensor, and
+// then overwritten by values in the ragged tensor. The default value must be
+// compatible with this broadcast operation, and must have fewer dimensions than
+// the value tensor.
+//
+//	row_partition_types: The types of the row partition tensors. At present, these can be:
+// * "ROW_SPLITS": the row_splits tensor from the ragged tensor.
+// * "VALUE_ROWIDS": the value_rowids tensor from the ragged tensor.
+// * "FIRST_DIM_SIZE": if value_rowids is used for the first dimension, then it
+//   is preceeded by "FIRST_DIM_SIZE".
+// The tensors are in the order of the dimensions.
+//
+// Returns The resulting dense tensor.
+func RaggedTensorToTensor(scope *Scope, shape tf.Output, values tf.Output, default_value tf.Output, row_partition_tensors []tf.Output, row_partition_types []string) (result tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"row_partition_types": row_partition_types}
+	opspec := tf.OpSpec{
+		Type: "RaggedTensorToTensor",
+		Input: []tf.Input{
+			shape, values, default_value, tf.OutputList(row_partition_tensors),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// BatchMatMulAttr is an optional argument to BatchMatMul.
+type BatchMatMulAttr func(optionalAttr)
+
+// BatchMatMulAdjX sets the optional adj_x attribute to value.
+//
+// value: If `True`, adjoint the slices of `x`. Defaults to `False`.
+// If not specified, defaults to false
+func BatchMatMulAdjX(value bool) BatchMatMulAttr {
+	return func(m optionalAttr) {
+		m["adj_x"] = value
+	}
+}
+
+// BatchMatMulAdjY sets the optional adj_y attribute to value.
+//
+// value: If `True`, adjoint the slices of `y`. Defaults to `False`.
+// If not specified, defaults to false
+func BatchMatMulAdjY(value bool) BatchMatMulAttr {
+	return func(m optionalAttr) {
+		m["adj_y"] = value
+	}
+}
+
+// Multiplies slices of two tensors in batches.
+//
+// Multiplies all slices of `Tensor` `x` and `y` (each slice can be
+// viewed as an element of a batch), and arranges the individual results
+// in a single output tensor of the same batch size. Each of the
+// individual slices can optionally be adjointed (to adjoint a matrix
+// means to transpose and conjugate it) before multiplication by setting
+// the `adj_x` or `adj_y` flag to `True`, which are by default `False`.
+//
+// The input tensors `x` and `y` are 2-D or higher with shape `[..., r_x, c_x]`
+// and `[..., r_y, c_y]`.
+//
+// The output tensor is 2-D or higher with shape `[..., r_o, c_o]`, where:
+//
+//     r_o = c_x if adj_x else r_x
+//     c_o = r_y if adj_y else c_y
+//
+// It is computed as:
+//
+//     output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :])
+//
+// Arguments:
+//	x: 2-D or higher with shape `[..., r_x, c_x]`.
+//	y: 2-D or higher with shape `[..., r_y, c_y]`.
+//
+// Returns 3-D or higher with shape `[..., r_o, c_o]`
+func BatchMatMul(scope *Scope, x tf.Output, y tf.Output, optional ...BatchMatMulAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "BatchMatMul",
+		Input: []tf.Input{
+			x, y,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RaggedTensorFromVariantAttr is an optional argument to RaggedTensorFromVariant.
+type RaggedTensorFromVariantAttr func(optionalAttr)
+
+// RaggedTensorFromVariantTsplits sets the optional Tsplits attribute to value.
+// If not specified, defaults to DT_INT64
+func RaggedTensorFromVariantTsplits(value tf.DataType) RaggedTensorFromVariantAttr {
+	return func(m optionalAttr) {
+		m["Tsplits"] = value
+	}
+}
+
+// Decodes a `variant` Tensor into a `RaggedTensor`.
+//
+// Decodes the given `variant` Tensor and returns a `RaggedTensor`. The input
+// could be a scalar, meaning it encodes a single `RaggedTensor` with ragged_rank
+// `output_ragged_rank`. It could also have an arbitrary rank, in which case each
+// element is decoded into a `RaggedTensor` with ragged_rank `input_ragged_rank`
+// and these are then stacked according to the input shape to output a single
+// `RaggedTensor` with ragged_rank `output_ragged_rank`. Each `variant` element in
+// the input Tensor is decoded by retrieving from the element a 1-D `variant`
+// Tensor with `input_ragged_rank + 1` Tensors, corresponding to the splits and
+// values of the decoded `RaggedTensor`. If `input_ragged_rank` is -1, then it is
+// inferred as `output_ragged_rank` - `rank(encoded_ragged)`. See
+// `RaggedTensorToVariant` for the corresponding encoding logic.
+//
+//
+// Arguments:
+//	encoded_ragged: A `variant` Tensor containing encoded `RaggedTensor`s.
+//	input_ragged_rank: The ragged rank of each encoded `RaggedTensor` component in the input. If set to
+// -1, this is inferred as `output_ragged_rank` - `rank(encoded_ragged)`
+//	output_ragged_rank: The expected ragged rank of the output `RaggedTensor`. The following must hold:
+// `output_ragged_rank = rank(encoded_ragged) + input_ragged_rank`.
+//
+//
+// Returns:
+//	output_nested_splits: A list of one or more Tensors representing the splits of the output
+// `RaggedTensor`.
+//	output_dense_values: A Tensor representing the values of the output `RaggedTensor`.
+func RaggedTensorFromVariant(scope *Scope, encoded_ragged tf.Output, input_ragged_rank int64, output_ragged_rank int64, Tvalues tf.DataType, optional ...RaggedTensorFromVariantAttr) (output_nested_splits []tf.Output, output_dense_values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"input_ragged_rank": input_ragged_rank, "output_ragged_rank": output_ragged_rank, "Tvalues": Tvalues}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RaggedTensorFromVariant",
+		Input: []tf.Input{
+			encoded_ragged,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output_nested_splits, idx, err = makeOutputList(op, idx, "output_nested_splits"); err != nil {
+		scope.UpdateErr("RaggedTensorFromVariant", err)
+		return
+	}
+	output_dense_values = op.Output(idx)
+	return output_nested_splits, output_dense_values
+}
+
+// RandomPoissonV2Attr is an optional argument to RandomPoissonV2.
+type RandomPoissonV2Attr func(optionalAttr)
+
+// RandomPoissonV2Seed sets the optional seed attribute to value.
+//
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func RandomPoissonV2Seed(value int64) RandomPoissonV2Attr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// RandomPoissonV2Seed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomPoissonV2Seed2(value int64) RandomPoissonV2Attr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// RandomPoissonV2Dtype sets the optional dtype attribute to value.
+// If not specified, defaults to DT_INT64
+func RandomPoissonV2Dtype(value tf.DataType) RandomPoissonV2Attr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs random values from the Poisson distribution(s) described by rate.
+//
+// This op uses two algorithms, depending on rate. If rate >= 10, then
+// the algorithm by Hormann is used to acquire samples via
+// transformation-rejection.
+// See http://www.sciencedirect.com/science/article/pii/0167668793909974.
+//
+// Otherwise, Knuth's algorithm is used to acquire samples via multiplying uniform
+// random variables.
+// See Donald E. Knuth (1969). Seminumerical Algorithms. The Art of Computer
+// Programming, Volume 2. Addison Wesley
+//
+// Arguments:
+//	shape: 1-D integer tensor. Shape of independent samples to draw from each
+// distribution described by the shape parameters given in rate.
+//	rate: A tensor in which each scalar is a "rate" parameter describing the
+// associated poisson distribution.
+//
+// Returns A tensor with shape `shape + shape(rate)`. Each slice
+// `[:, ..., :, i0, i1, ...iN]` contains the samples drawn for
+// `rate[i0, i1, ...iN]`.
+func RandomPoissonV2(scope *Scope, shape tf.Output, rate tf.Output, optional ...RandomPoissonV2Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RandomPoissonV2",
+		Input: []tf.Input{
+			shape, rate,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// CropAndResizeGradBoxesAttr is an optional argument to CropAndResizeGradBoxes.
+type CropAndResizeGradBoxesAttr func(optionalAttr)
+
+// CropAndResizeGradBoxesMethod sets the optional method attribute to value.
+//
+// value: A string specifying the interpolation method. Only 'bilinear' is
+// supported for now.
+// If not specified, defaults to "bilinear"
+func CropAndResizeGradBoxesMethod(value string) CropAndResizeGradBoxesAttr {
+	return func(m optionalAttr) {
+		m["method"] = value
+	}
+}
+
+// Computes the gradient of the crop_and_resize op wrt the input boxes tensor.
+//
+// Arguments:
+//	grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
+//	image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
+// Both `image_height` and `image_width` need to be positive.
+//	boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
+// specifies the coordinates of a box in the `box_ind[i]` image and is specified
+// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
+// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
+// `[0, 1]` interval of normalized image height is mapped to
+// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in
+// which case the sampled crop is an up-down flipped version of the original
+// image. The width dimension is treated similarly. Normalized coordinates
+// outside the `[0, 1]` range are allowed, in which case we use
+// `extrapolation_value` to extrapolate the input image values.
+//	box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
+// The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
+//
+// Returns A 2-D tensor of shape `[num_boxes, 4]`.
+func CropAndResizeGradBoxes(scope *Scope, grads tf.Output, image tf.Output, boxes tf.Output, box_ind tf.Output, optional ...CropAndResizeGradBoxesAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CropAndResizeGradBoxes",
+		Input: []tf.Input{
+			grads, image, boxes, box_ind,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the derivative of a Gamma random sample w.r.t. `alpha`.
+func RandomGammaGrad(scope *Scope, alpha tf.Output, sample tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "RandomGammaGrad",
+		Input: []tf.Input{
+			alpha, sample,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that takes a Bernoulli sample of the contents of another dataset.
+//
+// There is no transformation in the `tf.data` Python API for creating this dataset.
+// Instead, it is created as a result of the `filter_with_random_uniform_fusion`
+// static optimization. Whether this optimization is performed is determined by the
+// `experimental_optimization.filter_with_random_uniform_fusion` option of
+// `tf.data.Options`.
+//
+// Arguments:
+//
+//	rate: A scalar representing the sample rate. Each element of `input_dataset` is
+// retained with this probability, independent of all other elements.
+//	seed: A scalar representing seed of random number generator.
+//	seed2: A scalar representing seed2 of random number generator.
+//
+//
+func SamplingDataset(scope *Scope, input_dataset tf.Output, rate tf.Output, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "SamplingDataset",
+		Input: []tf.Input{
+			input_dataset, rate, seed, seed2,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Reads and outputs the entire contents of the input filename.
+func ReadFile(scope *Scope, filename tf.Output) (contents tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReadFile",
+		Input: []tf.Input{
+			filename,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes requantization range per channel.
+//
+// Arguments:
+//	input: The original input tensor.
+//	input_min: The minimum value of the input tensor
+//	input_max: The maximum value of the input tensor.
+//	clip_value_max: The maximum value of the output that needs to be clipped.
+// Example: set this to 6 for Relu6.
+//
+// Returns:
+//	output_min: The minimum value of the final output tensor
+//	output_max: The maximum value of the final output tensor.
+func RequantizationRangePerChannel(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, clip_value_max float32) (output_min tf.Output, output_max tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"clip_value_max": clip_value_max}
+	opspec := tf.OpSpec{
+		Type: "RequantizationRangePerChannel",
+		Input: []tf.Input{
+			input, input_min, input_max,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// TruncatedNormalAttr is an optional argument to TruncatedNormal.
+type TruncatedNormalAttr func(optionalAttr)
+
+// TruncatedNormalSeed sets the optional seed attribute to value.
+//
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func TruncatedNormalSeed(value int64) TruncatedNormalAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// TruncatedNormalSeed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func TruncatedNormalSeed2(value int64) TruncatedNormalAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Outputs random values from a truncated normal distribution.
+//
+// The generated values follow a normal distribution with mean 0 and standard
+// deviation 1, except that values whose magnitude is more than 2 standard
+// deviations from the mean are dropped and re-picked.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	dtype: The type of the output.
+//
+// Returns A tensor of the specified shape filled with random truncated normal
+// values.
+func TruncatedNormal(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...TruncatedNormalAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TruncatedNormal",
+		Input: []tf.Input{
+			shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ParameterizedTruncatedNormalAttr is an optional argument to ParameterizedTruncatedNormal.
+type ParameterizedTruncatedNormalAttr func(optionalAttr)
+
+// ParameterizedTruncatedNormalSeed sets the optional seed attribute to value.
+//
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func ParameterizedTruncatedNormalSeed(value int64) ParameterizedTruncatedNormalAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// ParameterizedTruncatedNormalSeed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func ParameterizedTruncatedNormalSeed2(value int64) ParameterizedTruncatedNormalAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Outputs random values from a normal distribution. The parameters may each be a
+//
+// scalar which applies to the entire output, or a vector of length shape[0] which
+// stores the parameters for each batch.
+//
+// Arguments:
+//	shape: The shape of the output tensor. Batches are indexed by the 0th dimension.
+//	means: The mean parameter of each batch.
+//	stdevs: The standard deviation parameter of each batch. Must be greater than 0.
+//	minvals: The minimum cutoff. May be -infinity.
+//	maxvals: The maximum cutoff. May be +infinity, and must be more than the minval
+// for each batch.
+//
+// Returns A matrix of shape num_batches x samples_per_batch, filled with random
+// truncated normal values using the parameters for each row.
+func ParameterizedTruncatedNormal(scope *Scope, shape tf.Output, means tf.Output, stdevs tf.Output, minvals tf.Output, maxvals tf.Output, optional ...ParameterizedTruncatedNormalAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ParameterizedTruncatedNormal",
+		Input: []tf.Input{
+			shape, means, stdevs, minvals, maxvals,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// QuantizedMatMulAttr is an optional argument to QuantizedMatMul.
+type QuantizedMatMulAttr func(optionalAttr)
+
+// QuantizedMatMulToutput sets the optional Toutput attribute to value.
+// If not specified, defaults to DT_QINT32
+func QuantizedMatMulToutput(value tf.DataType) QuantizedMatMulAttr {
+	return func(m optionalAttr) {
+		m["Toutput"] = value
+	}
+}
+
+// QuantizedMatMulTransposeA sets the optional transpose_a attribute to value.
+//
+// value: If true, `a` is transposed before multiplication.
+// If not specified, defaults to false
+func QuantizedMatMulTransposeA(value bool) QuantizedMatMulAttr {
+	return func(m optionalAttr) {
+		m["transpose_a"] = value
+	}
+}
+
+// QuantizedMatMulTransposeB sets the optional transpose_b attribute to value.
+//
+// value: If true, `b` is transposed before multiplication.
+// If not specified, defaults to false
+func QuantizedMatMulTransposeB(value bool) QuantizedMatMulAttr {
+	return func(m optionalAttr) {
+		m["transpose_b"] = value
+	}
+}
+
+// QuantizedMatMulTactivation sets the optional Tactivation attribute to value.
+//
+// value: The type of output produced by activation function
+// following this operation.
+// If not specified, defaults to DT_QUINT8
+func QuantizedMatMulTactivation(value tf.DataType) QuantizedMatMulAttr {
+	return func(m optionalAttr) {
+		m["Tactivation"] = value
+	}
+}
+
+// Perform a quantized matrix multiplication of  `a` by the matrix `b`.
+//
+// The inputs must be two-dimensional matrices and the inner dimension of
+// `a` (after being transposed if `transpose_a` is non-zero) must match the
+// outer dimension of `b` (after being transposed if `transposed_b` is
+// non-zero).
+//
+// Arguments:
+//	a: Must be a two-dimensional tensor.
+//	b: Must be a two-dimensional tensor.
+//	min_a: The float value that the lowest quantized `a` value represents.
+//	max_a: The float value that the highest quantized `a` value represents.
+//	min_b: The float value that the lowest quantized `b` value represents.
+//	max_b: The float value that the highest quantized `b` value represents.
+//
+// Returns:
+//	out
+//	min_out: The float value that the lowest quantized output value represents.
+//	max_out: The float value that the highest quantized output value represents.
+func QuantizedMatMul(scope *Scope, a tf.Output, b tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, optional ...QuantizedMatMulAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizedMatMul",
+		Input: []tf.Input{
+			a, b, min_a, max_a, min_b, max_b,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Extract `patches` from `images` and put them in the "depth" output dimension.
+//
+// Arguments:
+//	images: 4-D Tensor with shape `[batch, in_rows, in_cols, depth]`.
+//	ksizes: The size of the sliding window for each dimension of `images`.
+//	strides: How far the centers of two consecutive patches are in
+// the images. Must be: `[1, stride_rows, stride_cols, 1]`.
+//	rates: Must be: `[1, rate_rows, rate_cols, 1]`. This is the
+// input stride, specifying how far two consecutive patch samples are in the
+// input. Equivalent to extracting patches with
+// `patch_sizes_eff = patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by
+// subsampling them spatially by a factor of `rates`. This is equivalent to
+// `rate` in dilated (a.k.a. Atrous) convolutions.
+//	padding: The type of padding algorithm to use.
+//
+// Returns 4-D Tensor with shape `[batch, out_rows, out_cols, ksize_rows *
+// ksize_cols * depth]` containing image patches with size
+// `ksize_rows x ksize_cols x depth` vectorized in the "depth" dimension. Note
+// `out_rows` and `out_cols` are the dimensions of the output patches.
+func ExtractImagePatches(scope *Scope, images tf.Output, ksizes []int64, strides []int64, rates []int64, padding string) (patches tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksizes": ksizes, "strides": strides, "rates": rates, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "ExtractImagePatches",
+		Input: []tf.Input{
+			images,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Forwards the value of an available tensor from `inputs` to `output`.
+//
+// `Merge` waits for at least one of the tensors in `inputs` to become available.
+// It is usually combined with `Switch` to implement branching.
+//
+// `Merge` forwards the first tensor to become available to `output`, and sets
+// `value_index` to its index in `inputs`.
+//
+// Arguments:
+//	inputs: The input tensors, exactly one of which will become available.
+//
+// Returns:
+//	output: Will be set to the available input tensor.
+//	value_index: The index of the chosen input tensor in `inputs`.
+func Merge(scope *Scope, inputs []tf.Output) (output tf.Output, value_index tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Merge",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// PaddedBatchDatasetV2Attr is an optional argument to PaddedBatchDatasetV2.
+type PaddedBatchDatasetV2Attr func(optionalAttr)
+
+// PaddedBatchDatasetV2ParallelCopy sets the optional parallel_copy attribute to value.
+// If not specified, defaults to false
+func PaddedBatchDatasetV2ParallelCopy(value bool) PaddedBatchDatasetV2Attr {
+	return func(m optionalAttr) {
+		m["parallel_copy"] = value
+	}
+}
+
+// Creates a dataset that batches and pads `batch_size` elements from the input.
+//
+// Arguments:
+//
+//	batch_size: A scalar representing the number of elements to accumulate in a
+// batch.
+//	padded_shapes: A list of int64 tensors representing the desired padded shapes
+// of the corresponding output components. These shapes may be partially
+// specified, using `-1` to indicate that a particular dimension should be
+// padded to the maximum size of all batch elements.
+//	padding_values: A list of scalars containing the padding value to use for
+// each of the outputs.
+//	drop_remainder: A scalar representing whether the last batch should be dropped in case its size
+// is smaller than desired.
+//
+func PaddedBatchDatasetV2(scope *Scope, input_dataset tf.Output, batch_size tf.Output, padded_shapes []tf.Output, padding_values []tf.Output, drop_remainder tf.Output, output_shapes []tf.Shape, optional ...PaddedBatchDatasetV2Attr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_shapes": output_shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "PaddedBatchDatasetV2",
+		Input: []tf.Input{
+			input_dataset, batch_size, tf.OutputList(padded_shapes), tf.OutputList(padding_values), drop_remainder,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// BlockLSTMV2Attr is an optional argument to BlockLSTMV2.
+type BlockLSTMV2Attr func(optionalAttr)
+
+// BlockLSTMV2CellClip sets the optional cell_clip attribute to value.
+//
+// value: Value to clip the 'cs' value to.
+// If not specified, defaults to 0
+func BlockLSTMV2CellClip(value float32) BlockLSTMV2Attr {
+	return func(m optionalAttr) {
+		m["cell_clip"] = value
+	}
+}
+
+// BlockLSTMV2UsePeephole sets the optional use_peephole attribute to value.
+//
+// value: Whether to use peephole weights.
+// If not specified, defaults to false
+func BlockLSTMV2UsePeephole(value bool) BlockLSTMV2Attr {
+	return func(m optionalAttr) {
+		m["use_peephole"] = value
+	}
+}
+
+// Computes the LSTM cell forward propagation for all the time steps.
+//
+// This is equivalent to applying LSTMBlockCell in a loop, like so:
+//
+// ```python
+// for x1 in unpack(x):
+//   i1, cs1, f1, o1, ci1, co1, h1 = LSTMBlock(
+//     x1, cs_prev, h_prev, w, wci, wcf, wco, b)
+//   cs_prev = cs1
+//   h_prev = h1
+//   i.append(i1)
+//   cs.append(cs1)
+//   f.append(f1)
+//   o.append(o1)
+//   ci.append(ci1)
+//   co.append(co1)
+//   h.append(h1)
+// return pack(i), pack(cs), pack(f), pack(o), pack(ci), pack(ch), pack(h)
+//
+// Note that unlike LSTMBlockCell (and BlockLSTM) which uses ICFO gate layout,
+// this op uses IFCO. So in order for the following snippet to be equivalent
+// all gate-related outputs should be reordered.
+// ```
+//
+// Arguments:
+//	seq_len_max: Maximum time length actually used by this input. Outputs are padded
+// with zeros beyond this length.
+//	x: The sequence input to the LSTM, shape (timelen, batch_size, num_inputs).
+//	cs_prev: Value of the initial cell state.
+//	h_prev: Initial output of cell (to be used for peephole).
+//	w: The weight matrix.
+//	wci: The weight matrix for input gate peephole connection.
+//	wcf: The weight matrix for forget gate peephole connection.
+//	wco: The weight matrix for output gate peephole connection.
+//	b: The bias vector.
+//
+// Returns:
+//	i: The input gate over the whole time sequence.
+//	cs: The cell state before the tanh over the whole time sequence.
+//	f: The forget gate over the whole time sequence.
+//	o: The output gate over the whole time sequence.
+//	ci: The cell input over the whole time sequence.
+//	co: The cell after the tanh over the whole time sequence.
+//	h: The output h vector over the whole time sequence.
+func BlockLSTMV2(scope *Scope, seq_len_max tf.Output, x tf.Output, cs_prev tf.Output, h_prev tf.Output, w tf.Output, wci tf.Output, wcf tf.Output, wco tf.Output, b tf.Output, optional ...BlockLSTMV2Attr) (i tf.Output, cs tf.Output, f tf.Output, o tf.Output, ci tf.Output, co tf.Output, h tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "BlockLSTMV2",
+		Input: []tf.Input{
+			seq_len_max, x, cs_prev, h_prev, w, wci, wcf, wco, b,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6)
+}
+
+// MaxPool3DGradGradAttr is an optional argument to MaxPool3DGradGrad.
+type MaxPool3DGradGradAttr func(optionalAttr)
+
+// MaxPool3DGradGradDataFormat sets the optional data_format attribute to value.
+//
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func MaxPool3DGradGradDataFormat(value string) MaxPool3DGradGradAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Computes second-order gradients of the maxpooling function.
+//
+// Arguments:
+//	orig_input: The original input tensor.
+//	orig_output: The original output tensor.
+//	grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
+//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
+// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+//
+// Returns Gradients of gradients w.r.t. the input to `max_pool`.
+func MaxPool3DGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradGradAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MaxPool3DGradGrad",
+		Input: []tf.Input{
+			orig_input, orig_output, grad,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Return a tensor with the same shape and contents as the input tensor or value.
+func Identity(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Identity",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Outputs a `Summary` protocol buffer with scalar values.
+//
+// The input `tags` and `values` must have the same shape.  The generated summary
+// has a summary value for each tag-value pair in `tags` and `values`.
+//
+// Arguments:
+//	tags: Tags for the summary.
+//	values: Same shape as `tags.  Values for the summary.
+//
+// Returns Scalar.  Serialized `Summary` protocol buffer.
+func ScalarSummary(scope *Scope, tags tf.Output, values tf.Output) (summary tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ScalarSummary",
+		Input: []tf.Input{
+			tags, values,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceSparseApplyProximalAdagradAttr is an optional argument to ResourceSparseApplyProximalAdagrad.
+type ResourceSparseApplyProximalAdagradAttr func(optionalAttr)
+
+// ResourceSparseApplyProximalAdagradUseLocking sets the optional use_locking attribute to value.
+//
+// value: If True, updating of the var and accum tensors will be protected by
+// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceSparseApplyProximalAdagradUseLocking(value bool) ResourceSparseApplyProximalAdagradAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Sparse update entries in '*var' and '*accum' according to FOBOS algorithm.
+//
+// That is for rows we have grad for, we update var and accum as follows:
+// accum += grad * grad
+// prox_v = var
+// prox_v -= lr * grad * (1 / sqrt(accum))
+// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Learning rate. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//
+// Returns the created operation.
+func ResourceSparseApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalAdagradAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceSparseApplyProximalAdagrad",
+		Input: []tf.Input{
+			var_, accum, lr, l1, l2, grad, indices,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes numerical negative value element-wise.
+//
+// I.e., \\(y = -x\\).
+func Neg(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Neg",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Calculates the gradient of the SparseMatrixSoftmax op.
+//
+// Arguments:
+//	softmax: A CSRSparseMatrix.
+//	grad_softmax: The gradient of `softmax`.
+//
+//
+// Returns The output gradient.
+func SparseMatrixSoftmaxGrad(scope *Scope, softmax tf.Output, grad_softmax tf.Output, type_ tf.DataType) (gradient tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"type": type_}
+	opspec := tf.OpSpec{
+		Type: "SparseMatrixSoftmaxGrad",
+		Input: []tf.Input{
+			softmax, grad_softmax,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the GRU cell back-propagation for 1 time step.
+//
+// Args
+//     x: Input to the GRU cell.
+//     h_prev: State input from the previous GRU cell.
+//     w_ru: Weight matrix for the reset and update gate.
+//     w_c: Weight matrix for the cell connection gate.
+//     b_ru: Bias vector for the reset and update gate.
+//     b_c: Bias vector for the cell connection gate.
+//     r: Output of the reset gate.
+//     u: Output of the update gate.
+//     c: Output of the cell connection gate.
+//     d_h: Gradients of the h_new wrt to objective function.
+//
+// Returns
+//     d_x: Gradients of the x wrt to objective function.
+//     d_h_prev: Gradients of the h wrt to objective function.
+//     d_c_bar Gradients of the c_bar wrt to objective function.
+//     d_r_bar_u_bar Gradients of the r_bar & u_bar wrt to objective function.
+//
+// This kernel op implements the following mathematical equations:
+//
+// Note on notation of the variables:
+//
+// Concatenation of a and b is represented by a_b
+// Element-wise dot product of a and b is represented by ab
+// Element-wise dot product is represented by \circ
+// Matrix multiplication is represented by *
+//
+// Additional notes for clarity:
+//
+// `w_ru` can be segmented into 4 different matrices.
+// ```
+// w_ru = [w_r_x w_u_x
+//         w_r_h_prev w_u_h_prev]
+// ```
+// Similarly, `w_c` can be segmented into 2 different matrices.
+// ```
+// w_c = [w_c_x w_c_h_prevr]
+// ```
+// Same goes for biases.
+// ```
+// b_ru = [b_ru_x b_ru_h]
+// b_c = [b_c_x b_c_h]
+// ```
+// Another note on notation:
+// ```
+// d_x = d_x_component_1 + d_x_component_2
+//
+// where d_x_component_1 = d_r_bar * w_r_x^T + d_u_bar * w_r_x^T
+// and d_x_component_2 = d_c_bar * w_c_x^T
+//
+// d_h_prev = d_h_prev_component_1 + d_h_prevr \circ r + d_h \circ u
+// where d_h_prev_componenet_1 = d_r_bar * w_r_h_prev^T + d_u_bar * w_r_h_prev^T
+// ```
+//
+// Mathematics behind the Gradients below:
+// ```
+// d_c_bar = d_h \circ (1-u) \circ (1-c \circ c)
+// d_u_bar = d_h \circ (h-c) \circ u \circ (1-u)
+//
+// d_r_bar_u_bar = [d_r_bar d_u_bar]
+//
+// [d_x_component_1 d_h_prev_component_1] = d_r_bar_u_bar * w_ru^T
+//
+// [d_x_component_2 d_h_prevr] = d_c_bar * w_c^T
+//
+// d_x = d_x_component_1 + d_x_component_2
+//
+// d_h_prev = d_h_prev_component_1 + d_h_prevr \circ r + u
+// ```
+// Below calculation is performed in the python wrapper for the Gradients
+// (not in the gradient kernel.)
+// ```
+// d_w_ru = x_h_prevr^T * d_c_bar
+//
+// d_w_c = x_h_prev^T * d_r_bar_u_bar
+//
+// d_b_ru = sum of d_r_bar_u_bar along axis = 0
+//
+// d_b_c = sum of d_c_bar along axis = 0
+// ```
+func GRUBlockCellGrad(scope *Scope, x tf.Output, h_prev tf.Output, w_ru tf.Output, w_c tf.Output, b_ru tf.Output, b_c tf.Output, r tf.Output, u tf.Output, c tf.Output, d_h tf.Output) (d_x tf.Output, d_h_prev tf.Output, d_c_bar tf.Output, d_r_bar_u_bar tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "GRUBlockCellGrad",
+		Input: []tf.Input{
+			x, h_prev, w_ru, w_c, b_ru, b_c, r, u, c, d_h,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// TextLineReaderV2Attr is an optional argument to TextLineReaderV2.
+type TextLineReaderV2Attr func(optionalAttr)
+
+// TextLineReaderV2SkipHeaderLines sets the optional skip_header_lines attribute to value.
+//
+// value: Number of lines to skip from the beginning of every file.
+// If not specified, defaults to 0
+func TextLineReaderV2SkipHeaderLines(value int64) TextLineReaderV2Attr {
+	return func(m optionalAttr) {
+		m["skip_header_lines"] = value
+	}
+}
+
+// TextLineReaderV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this reader is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func TextLineReaderV2Container(value string) TextLineReaderV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// TextLineReaderV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this reader is named in the given bucket
+// with this shared_name. Otherwise, the node name is used instead.
+// If not specified, defaults to ""
+func TextLineReaderV2SharedName(value string) TextLineReaderV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// A Reader that outputs the lines of a file delimited by '\n'.
+//
+// Returns The handle to reference the Reader.
+func TextLineReaderV2(scope *Scope, optional ...TextLineReaderV2Attr) (reader_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TextLineReaderV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Encode audio data using the WAV file format.
+//
+// This operation will generate a string suitable to be saved out to create a .wav
+// audio file. It will be encoded in the 16-bit PCM format. It takes in float
+// values in the range -1.0f to 1.0f, and any outside that value will be clamped to
+// that range.
+//
+// `audio` is a 2-D float Tensor of shape `[length, channels]`.
+// `sample_rate` is a scalar Tensor holding the rate to use (e.g. 44100).
+//
+// Arguments:
+//	audio: 2-D with shape `[length, channels]`.
+//	sample_rate: Scalar containing the sample frequency.
+//
+// Returns 0-D. WAV-encoded file contents.
+func EncodeWav(scope *Scope, audio tf.Output, sample_rate tf.Output) (contents tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "EncodeWav",
+		Input: []tf.Input{
+			audio, sample_rate,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// EuclideanNormAttr is an optional argument to EuclideanNorm.
+type EuclideanNormAttr func(optionalAttr)
+
+// EuclideanNormKeepDims sets the optional keep_dims attribute to value.
+//
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func EuclideanNormKeepDims(value bool) EuclideanNormAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// Computes the euclidean norm of elements across dimensions of a tensor.
+//
+// Reduces `input` along the dimensions given in `axis`. Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `axis`. If `keep_dims` is true, the reduced dimensions are
+// retained with length 1.
+//
+// Arguments:
+//	input: The tensor to reduce.
+//	axis: The dimensions to reduce. Must be in the range
+// `[-rank(input), rank(input))`.
+//
+// Returns The reduced tensor.
+func EuclideanNorm(scope *Scope, input tf.Output, axis tf.Output, optional ...EuclideanNormAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "EuclideanNorm",
+		Input: []tf.Input{
+			input, axis,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// IRFFT3DAttr is an optional argument to IRFFT3D.
+type IRFFT3DAttr func(optionalAttr)
+
+// IRFFT3DTreal sets the optional Treal attribute to value.
+// If not specified, defaults to DT_FLOAT
+func IRFFT3DTreal(value tf.DataType) IRFFT3DAttr {
+	return func(m optionalAttr) {
+		m["Treal"] = value
+	}
+}
+
+// Inverse 3D real-valued fast Fourier transform.
+//
+// Computes the inverse 3-dimensional discrete Fourier transform of a real-valued
+// signal over the inner-most 3 dimensions of `input`.
+//
+// The inner-most 3 dimensions of `input` are assumed to be the result of `RFFT3D`:
+// The inner-most dimension contains the `fft_length / 2 + 1` unique components of
+// the DFT of a real-valued signal. If `fft_length` is not provided, it is computed
+// from the size of the inner-most 3 dimensions of `input`. If the FFT length used
+// to compute `input` is odd, it should be provided since it cannot be inferred
+// properly.
+//
+// Along each axis `IRFFT3D` is computed on, if `fft_length` (or
+// `fft_length / 2 + 1` for the inner-most dimension) is smaller than the
+// corresponding dimension of `input`, the dimension is cropped. If it is larger,
+// the dimension is padded with zeros.
+//
+// Arguments:
+//	input: A complex tensor.
+//	fft_length: An int32 tensor of shape [3]. The FFT length for each dimension.
+//
+// Returns A float32 tensor of the same rank as `input`. The inner-most 3
+//   dimensions of `input` are replaced with the `fft_length` samples of their
+//   inverse 3D real Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.irfftn with 3 dimensions.
+// @end_compatibility
+func IRFFT3D(scope *Scope, input tf.Output, fft_length tf.Output, optional ...IRFFT3DAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "IRFFT3D",
+		Input: []tf.Input{
+			input, fft_length,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the element-wise min of two SparseTensors.
+//
+// Assumes the two SparseTensors have the same shape, i.e., no broadcasting.
+//
+// Arguments:
+//	a_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, in the canonical lexicographic ordering.
+//	a_values: 1-D.  `N` non-empty values corresponding to `a_indices`.
+//	a_shape: 1-D.  Shape of the input SparseTensor.
+//	b_indices: counterpart to `a_indices` for the other operand.
+//	b_values: counterpart to `a_values` for the other operand; must be of the same dtype.
+//	b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal.
+//
+// Returns:
+//	output_indices: 2-D.  The indices of the output SparseTensor.
+//	output_values: 1-D.  The values of the output SparseTensor.
+func SparseSparseMinimum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSparseMinimum",
+		Input: []tf.Input{
+			a_indices, a_values, a_shape, b_indices, b_values, b_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// StatefulStandardNormalV2Attr is an optional argument to StatefulStandardNormalV2.
+type StatefulStandardNormalV2Attr func(optionalAttr)
+
+// StatefulStandardNormalV2Dtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_FLOAT
+func StatefulStandardNormalV2Dtype(value tf.DataType) StatefulStandardNormalV2Attr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs random values from a normal distribution.
+//
+// The generated values will have mean 0 and standard deviation 1.
+//
+// Arguments:
+//	resource: The handle of the resource variable that stores the state of the RNG.
+//	algorithm: The RNG algorithm.
+//	shape: The shape of the output tensor.
+//
+// Returns A tensor of the specified shape filled with random normal values.
+func StatefulStandardNormalV2(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, optional ...StatefulStandardNormalV2Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StatefulStandardNormalV2",
+		Input: []tf.Input{
+			resource, algorithm, shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceSparseApplyFtrlAttr is an optional argument to ResourceSparseApplyFtrl.
+type ResourceSparseApplyFtrlAttr func(optionalAttr)
+
+// ResourceSparseApplyFtrlUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceSparseApplyFtrlUseLocking(value bool) ResourceSparseApplyFtrlAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update relevant entries in '*var' according to the Ftrl-proximal scheme.
+//
+// That is for rows we have grad for, we update var, accum and linear as follows:
+// accum_new = accum + grad * grad
+// linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+// accum = accum_new
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	linear: Should be from a Variable().
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//	lr: Scaling factor. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	lr_power: Scaling factor. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceSparseApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceSparseApplyFtrl",
+		Input: []tf.Input{
+			var_, accum, linear, grad, indices, lr, l1, l2, lr_power,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Debugging/model interpretability outputs for each example.
+//
+// It traverses all the trees and computes debug metrics for individual examples,
+// such as getting split feature ids and logits after each split along the decision
+// path used to compute directional feature contributions.
+//
+// Arguments:
+//
+//	bucketized_features: A list of rank 1 Tensors containing bucket id for each
+// feature.
+//	logits_dimension: scalar, dimension of the logits, to be used for constructing the protos in
+// examples_debug_outputs_serialized.
+//
+// Returns Output rank 1 Tensor containing a proto serialized as a string for each example.
+func BoostedTreesExampleDebugOutputs(scope *Scope, tree_ensemble_handle tf.Output, bucketized_features []tf.Output, logits_dimension int64) (examples_debug_outputs_serialized tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"logits_dimension": logits_dimension}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesExampleDebugOutputs",
+		Input: []tf.Input{
+			tree_ensemble_handle, tf.OutputList(bucketized_features),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// StatefulUniformAttr is an optional argument to StatefulUniform.
+type StatefulUniformAttr func(optionalAttr)
+
+// StatefulUniformDtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_FLOAT
+func StatefulUniformDtype(value tf.DataType) StatefulUniformAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs random values from a uniform distribution.
+//
+// The generated values follow a uniform distribution in the range `[0, 1)`. The
+// lower bound 0 is included in the range, while the upper bound 1 is excluded.
+//
+// Arguments:
+//	resource: The handle of the resource variable that stores the state of the RNG.
+//	algorithm: The RNG algorithm.
+//	shape: The shape of the output tensor.
+//
+// Returns Random values with specified shape.
+func StatefulUniform(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, optional ...StatefulUniformAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StatefulUniform",
+		Input: []tf.Input{
+			resource, algorithm, shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates ngrams from ragged string data.
+//
+// This op accepts a ragged tensor with 1 ragged dimension containing only
+// strings and outputs a ragged tensor with 1 ragged dimension containing ngrams
+// of that string, joined along the innermost axis.
+//
+// Arguments:
+//	data: The values tensor of the ragged string tensor to make ngrams out of. Must be a
+// 1D string tensor.
+//	data_splits: The splits tensor of the ragged string tensor to make ngrams out of.
+//	separator: The string to append between elements of the token. Use "" for no separator.
+//	ngram_widths: The sizes of the ngrams to create.
+//	left_pad: The string to use to pad the left side of the ngram sequence. Only used if
+// pad_width != 0.
+//	right_pad: The string to use to pad the right side of the ngram sequence. Only used if
+// pad_width != 0.
+//	pad_width: The number of padding elements to add to each side of each
+// sequence. Note that padding will never be greater than 'ngram_widths'-1
+// regardless of this value. If `pad_width=-1`, then add `max(ngram_widths)-1`
+// elements.
+//
+//
+// Returns:
+//	ngrams: The values tensor of the output ngrams ragged tensor.
+//	ngrams_splits: The splits tensor of the output ngrams ragged tensor.
+func StringNGrams(scope *Scope, data tf.Output, data_splits tf.Output, separator string, ngram_widths []int64, left_pad string, right_pad string, pad_width int64, preserve_short_sequences bool) (ngrams tf.Output, ngrams_splits tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"separator": separator, "ngram_widths": ngram_widths, "left_pad": left_pad, "right_pad": right_pad, "pad_width": pad_width, "preserve_short_sequences": preserve_short_sequences}
+	opspec := tf.OpSpec{
+		Type: "StringNGrams",
+		Input: []tf.Input{
+			data, data_splits,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Reduces sparse updates into the variable referenced by `resource` using the `min` operation.
+//
+// This operation computes
+//
+//     # Scalar indices
+//     ref[indices, ...] = min(ref[indices, ...], updates[...])
+//
+//     # Vector indices (for each i)
+//     ref[indices[i], ...] = min(ref[indices[i], ...], updates[i, ...])
+//
+//     # High rank indices (for each i, ..., j)
+//     ref[indices[i, ..., j], ...] = min(ref[indices[i, ..., j], ...], updates[i, ..., j, ...])
+//
+// Duplicate entries are handled correctly: if multiple `indices` reference
+// the same location, their contributions are combined.
+//
+// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
+// </div>
+//
+// Arguments:
+//	resource: Should be from a `Variable` node.
+//	indices: A tensor of indices into the first dimension of `ref`.
+//	updates: A tensor of updated values to add to `ref`.
+//
+// Returns the created operation.
+func ResourceScatterMin(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterMin",
+		Input: []tf.Input{
+			resource, indices, updates,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Multiplies sparse updates into the variable referenced by `resource`.
+//
+// This operation computes
+//
+//     # Scalar indices
+//     ref[indices, ...] *= updates[...]
+//
+//     # Vector indices (for each i)
+//     ref[indices[i], ...] *= updates[i, ...]
+//
+//     # High rank indices (for each i, ..., j)
+//     ref[indices[i, ..., j], ...] *= updates[i, ..., j, ...]
+//
+// Duplicate entries are handled correctly: if multiple `indices` reference
+// the same location, their contributions multiply.
+//
+// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
+// </div>
+//
+// Arguments:
+//	resource: Should be from a `Variable` node.
+//	indices: A tensor of indices into the first dimension of `ref`.
+//	updates: A tensor of updated values to add to `ref`.
+//
+// Returns the created operation.
+func ResourceScatterMul(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterMul",
+		Input: []tf.Input{
+			resource, indices, updates,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// MatMulAttr is an optional argument to MatMul.
+type MatMulAttr func(optionalAttr)
+
+// MatMulTransposeA sets the optional transpose_a attribute to value.
+//
+// value: If true, "a" is transposed before multiplication.
+// If not specified, defaults to false
+func MatMulTransposeA(value bool) MatMulAttr {
+	return func(m optionalAttr) {
+		m["transpose_a"] = value
+	}
+}
+
+// MatMulTransposeB sets the optional transpose_b attribute to value.
+//
+// value: If true, "b" is transposed before multiplication.
+// If not specified, defaults to false
+func MatMulTransposeB(value bool) MatMulAttr {
+	return func(m optionalAttr) {
+		m["transpose_b"] = value
+	}
+}
+
+// Multiply the matrix "a" by the matrix "b".
+//
+// The inputs must be two-dimensional matrices and the inner dimension of
+// "a" (after being transposed if transpose_a is true) must match the
+// outer dimension of "b" (after being transposed if transposed_b is
+// true).
+//
+// *Note*: The default kernel implementation for MatMul on GPUs uses
+// cublas.
+func MatMul(scope *Scope, a tf.Output, b tf.Output, optional ...MatMulAttr) (product tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MatMul",
+		Input: []tf.Input{
+			a, b,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SparseReduceSumSparseAttr is an optional argument to SparseReduceSumSparse.
+type SparseReduceSumSparseAttr func(optionalAttr)
+
+// SparseReduceSumSparseKeepDims sets the optional keep_dims attribute to value.
+//
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func SparseReduceSumSparseKeepDims(value bool) SparseReduceSumSparseAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// Computes the sum of elements across dimensions of a SparseTensor.
+//
+// This Op takes a SparseTensor and is the sparse counterpart to
+// `tf.reduce_sum()`.  In contrast to SparseReduceSum, this Op returns a
+// SparseTensor.
+//
+// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
+// with length 1.
+//
+// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
+// with a single element is returned.  Additionally, the axes can be negative,
+// which are interpreted according to the indexing rules in Python.
+//
+// Arguments:
+//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+//	input_shape: 1-D.  Shape of the input SparseTensor.
+//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
+func SparseReduceSumSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseReduceSumSparse",
+		Input: []tf.Input{
+			input_indices, input_values, input_shape, reduction_axes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Computes rectified linear: `max(features, 0)`.
+//
+// See: https://en.wikipedia.org/wiki/Rectifier_(neural_networks)
+// Example usage:
+// >>> tf.nn.relu([-2., 0., -0., 3.]).numpy()
+// array([ 0.,  0., -0.,  3.], dtype=float32)
+func Relu(scope *Scope, features tf.Output) (activations tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Relu",
+		Input: []tf.Input{
+			features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Get the number of nodes in a tree
+//
+// Arguments:
+//	tree_handle: Handle to the tree resource.
+//
+// Returns The size of the tree.
+func TensorForestTreeSize(scope *Scope, tree_handle tf.Output) (tree_size tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorForestTreeSize",
+		Input: []tf.Input{
+			tree_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Subtracts sparse updates from the variable referenced by `resource`.
+//
+// This operation computes
+//
+//     # Scalar indices
+//     ref[indices, ...] -= updates[...]
+//
+//     # Vector indices (for each i)
+//     ref[indices[i], ...] -= updates[i, ...]
+//
+//     # High rank indices (for each i, ..., j)
+//     ref[indices[i, ..., j], ...] -= updates[i, ..., j, ...]
+//
+// Duplicate entries are handled correctly: if multiple `indices` reference
+// the same location, their contributions add.
+//
+// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
+// </div>
+//
+// Arguments:
+//	resource: Should be from a `Variable` node.
+//	indices: A tensor of indices into the first dimension of `ref`.
+//	updates: A tensor of updated values to add to `ref`.
+//
+// Returns the created operation.
+func ResourceScatterSub(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterSub",
+		Input: []tf.Input{
+			resource, indices, updates,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// This op consumes a lock created by `MutexLock`.
+//
+// This op exists to consume a tensor created by `MutexLock` (other than
+// direct control dependencies).  It should be the only that consumes the tensor,
+// and will raise an error if it is not.  Its only purpose is to keep the
+// mutex lock tensor alive until it is consumed by this op.
+//
+// **NOTE**: This operation must run on the same device as its input.  This may
+// be enforced via the `colocate_with` mechanism.
+//
+// Arguments:
+//	mutex_lock: A tensor returned by `MutexLock`.
+//
+// Returns the created operation.
+func ConsumeMutexLock(scope *Scope, mutex_lock tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ConsumeMutexLock",
+		Input: []tf.Input{
+			mutex_lock,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Adjust the contrast of one or more images.
+//
+// `images` is a tensor of at least 3 dimensions.  The last 3 dimensions are
+// interpreted as `[height, width, channels]`.  The other dimensions only
+// represent a collection of images, such as `[batch, height, width, channels].`
+//
+// Contrast is adjusted independently for each channel of each image.
+//
+// For each channel, the Op first computes the mean of the image pixels in the
+// channel and then adjusts each component of each pixel to
+// `(x - mean) * contrast_factor + mean`.
+//
+// Arguments:
+//	images: Images to adjust.  At least 3-D.
+//	contrast_factor: A float multiplier for adjusting contrast.
+//
+// Returns The contrast-adjusted image or images.
+func AdjustContrastv2(scope *Scope, images tf.Output, contrast_factor tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "AdjustContrastv2",
+		Input: []tf.Input{
+			images, contrast_factor,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Produces the average pool of the input tensor for quantized types.
+//
+// Arguments:
+//	input: 4-D with shape `[batch, height, width, channels]`.
+//	min_input: The float value that the lowest quantized input value represents.
+//	max_input: The float value that the highest quantized input value represents.
+//	ksize: The size of the window for each dimension of the input tensor.
+// The length must be 4 to match the number of dimensions of the input.
+//	strides: The stride of the sliding window for each dimension of the input
+// tensor.  The length must be 4 to match the number of dimensions of the input.
+//	padding: The type of padding algorithm to use.
+//
+// Returns:
+//	output
+//	min_output: The float value that the lowest quantized output value represents.
+//	max_output: The float value that the highest quantized output value represents.
+func QuantizedAvgPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "QuantizedAvgPool",
+		Input: []tf.Input{
+			input, min_input, max_input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Clips tensor values to a specified min and max.
+//
+// Given a tensor `t`, this operation returns a tensor of the same type and
+// shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`.
+// Any values less than `clip_value_min` are set to `clip_value_min`. Any values
+// greater than `clip_value_max` are set to `clip_value_max`.
+//
+// Arguments:
+//	t: A `Tensor`.
+//	clip_value_min: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape
+// as `t`. The minimum value to clip by.
+//	clip_value_max: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape
+// as `t`. The maximum value to clip by.
+//
+// Returns A clipped `Tensor` with the same shape as input 't'.
+func ClipByValue(scope *Scope, t tf.Output, clip_value_min tf.Output, clip_value_max tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ClipByValue",
+		Input: []tf.Input{
+			t, clip_value_min, clip_value_max,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// List of the given size with empty elements.
+//
+// element_shape: the shape of the future elements of the list
+// num_elements: the number of elements to reserve
+// handle: the output list
+// element_dtype: the desired type of elements in the list.
+func TensorListReserve(scope *Scope, element_shape tf.Output, num_elements tf.Output, element_dtype tf.DataType) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"element_dtype": element_dtype}
+	opspec := tf.OpSpec{
+		Type: "TensorListReserve",
+		Input: []tf.Input{
+			element_shape, num_elements,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// VariableShapeAttr is an optional argument to VariableShape.
+type VariableShapeAttr func(optionalAttr)
+
+// VariableShapeOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_INT32
+func VariableShapeOutType(value tf.DataType) VariableShapeAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Returns the shape of the variable pointed to by `resource`.
+//
+// This operation returns a 1-D integer tensor representing the shape of `input`.
+//
+// For example:
+//
+// ```
+// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
+// shape(t) ==> [2, 2, 3]
+// ```
+func VariableShape(scope *Scope, input tf.Output, optional ...VariableShapeAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "VariableShape",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the minimum along segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+// for an explanation of segments.
+//
+// This operator is similar to the unsorted segment sum operator found
+// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum).
+// Instead of computing the sum over segments, it computes the minimum such that:
+//
+// \\(output_i = \min_{j...} data_[j...]\\) where min is over tuples `j...` such
+// that `segment_ids[j...] == i`.
+//
+// If the minimum is empty for a given segment ID `i`, it outputs the largest
+// possible value for the specific numeric type,
+// `output[i] = numeric_limits<T>::max()`.
+//
+// For example:
+//
+// ``` python
+// c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]])
+// tf.unsorted_segment_min(c, tf.constant([0, 1, 0]), num_segments=2)
+// # ==> [[ 1,  2, 2, 1],
+// #       [5,  6, 7, 8]]
+// ```
+//
+// If the given segment ID `i` is negative, then the corresponding value is
+// dropped, and will not be included in the result.
+//
+// Arguments:
+//
+//	segment_ids: A tensor whose shape is a prefix of `data.shape`.
+//
+//
+// Returns Has same shape as data, except for the first `segment_ids.rank`
+// dimensions, which are replaced with a single dimension which has size
+// `num_segments`.
+func UnsortedSegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "UnsortedSegmentMin",
+		Input: []tf.Input{
+			data, segment_ids, num_segments,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceScatterNdSubAttr is an optional argument to ResourceScatterNdSub.
+type ResourceScatterNdSubAttr func(optionalAttr)
+
+// ResourceScatterNdSubUseLocking sets the optional use_locking attribute to value.
+//
+// value: An optional bool. Defaults to True. If True, the assignment will
+// be protected by a lock; otherwise the behavior is undefined,
+// but may exhibit less contention.
+// If not specified, defaults to true
+func ResourceScatterNdSubUseLocking(value bool) ResourceScatterNdSubAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Applies sparse subtraction to individual values or slices in a Variable.
+//
+// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+//
+// `indices` must be integer tensor, containing indices into `ref`.
+// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+//
+// The innermost dimension of `indices` (with length `K`) corresponds to
+// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
+// dimension of `ref`.
+//
+// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
+//
+// ```
+// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]
+// ```
+//
+// For example, say we want to subtract 4 scattered elements from a rank-1 tensor
+// with 8 elements. In Python, that subtraction would look like this:
+//
+// ```python
+// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8], use_resource=True)
+// indices = tf.constant([[4], [3], [1], [7]])
+// updates = tf.constant([9, 10, 11, 12])
+// sub = tf.scatter_nd_sub(ref, indices, updates)
+// with tf.Session() as sess:
+//   print sess.run(sub)
+// ```
+//
+// The resulting update to ref would look like this:
+//
+//     [1, -9, 3, -6, -4, 6, 7, -4]
+//
+// See `tf.scatter_nd` for more details about how to make updates to
+// slices.
+//
+// Arguments:
+//	ref: A resource handle. Must be from a VarHandleOp.
+//	indices: A Tensor. Must be one of the following types: int32, int64.
+// A tensor of indices into ref.
+//	updates: A Tensor. Must have the same type as ref. A tensor of
+// values to add to ref.
+//
+// Returns the created operation.
+func ResourceScatterNdSub(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdSubAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterNdSub",
+		Input: []tf.Input{
+			ref, indices, updates,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// DataFormatDimMapAttr is an optional argument to DataFormatDimMap.
+type DataFormatDimMapAttr func(optionalAttr)
+
+// DataFormatDimMapSrcFormat sets the optional src_format attribute to value.
+//
+// value: source data format.
+// If not specified, defaults to "NHWC"
+func DataFormatDimMapSrcFormat(value string) DataFormatDimMapAttr {
+	return func(m optionalAttr) {
+		m["src_format"] = value
+	}
+}
+
+// DataFormatDimMapDstFormat sets the optional dst_format attribute to value.
+//
+// value: destination data format.
+// If not specified, defaults to "NCHW"
+func DataFormatDimMapDstFormat(value string) DataFormatDimMapAttr {
+	return func(m optionalAttr) {
+		m["dst_format"] = value
+	}
+}
+
+// Returns the dimension index in the destination data format given the one in
+//
+// the source data format.
+//
+// Arguments:
+//	x: A Tensor with each element as a dimension index in source data format.
+// Must be in the range [-4, 4).
+//
+// Returns A Tensor with each element as a dimension index in destination data format.
+func DataFormatDimMap(scope *Scope, x tf.Output, optional ...DataFormatDimMapAttr) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DataFormatDimMap",
+		Input: []tf.Input{
+			x,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// UpperBoundAttr is an optional argument to UpperBound.
+type UpperBoundAttr func(optionalAttr)
+
+// UpperBoundOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_INT32
+func UpperBoundOutType(value tf.DataType) UpperBoundAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Applies upper_bound(sorted_search_values, values) along each row.
+//
+// Each set of rows with the same index in (sorted_inputs, values) is treated
+// independently.  The resulting row is the equivalent of calling
+// `np.searchsorted(sorted_inputs, values, side='right')`.
+//
+// The result is not a global index to the entire
+// `Tensor`, but rather just the index in the last dimension.
+//
+// A 2-D example:
+//   sorted_sequence = [[0, 3, 9, 9, 10],
+//                      [1, 2, 3, 4, 5]]
+//   values = [[2, 4, 9],
+//             [0, 2, 6]]
+//
+//   result = UpperBound(sorted_sequence, values)
+//
+//   result == [[1, 2, 4],
+//              [0, 2, 5]]
+//
+// Arguments:
+//	sorted_inputs: 2-D Tensor where each row is ordered.
+//	values: 2-D Tensor with the same numbers of rows as `sorted_search_values`. Contains
+// the values that will be searched for in `sorted_search_values`.
+//
+// Returns A `Tensor` with the same shape as `values`.  It contains the last scalar index
+// into the last dimension where values can be inserted without changing the
+// ordered property.
+func UpperBound(scope *Scope, sorted_inputs tf.Output, values tf.Output, optional ...UpperBoundAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "UpperBound",
+		Input: []tf.Input{
+			sorted_inputs, values,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyFtrlV2Attr is an optional argument to ResourceApplyFtrlV2.
+type ResourceApplyFtrlV2Attr func(optionalAttr)
+
+// ResourceApplyFtrlV2UseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyFtrlV2UseLocking(value bool) ResourceApplyFtrlV2Attr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the Ftrl-proximal scheme.
+//
+// grad_with_shrinkage = grad + 2 * l2_shrinkage * var
+// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
+// linear += grad_with_shrinkage +
+//     (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+// accum = accum_new
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	linear: Should be from a Variable().
+//	grad: The gradient.
+//	lr: Scaling factor. Must be a scalar.
+//	l1: L1 regulariation. Must be a scalar.
+//	l2: L2 shrinkage regulariation. Must be a scalar.
+//
+//	lr_power: Scaling factor. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlV2Attr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyFtrlV2",
+		Input: []tf.Input{
+			var_, accum, linear, grad, lr, l1, l2, l2_shrinkage, lr_power,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Deprecated. Use TensorArraySplitV3
+//
+// DEPRECATED at GraphDef version 26: Use TensorArraySplitV3
+func TensorArraySplitV2(scope *Scope, handle tf.Output, value tf.Output, lengths tf.Output, flow_in tf.Output) (flow_out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorArraySplitV2",
+		Input: []tf.Input{
+			handle, value, lengths, flow_in,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ComputeAccidentalHitsAttr is an optional argument to ComputeAccidentalHits.
+type ComputeAccidentalHitsAttr func(optionalAttr)
+
+// ComputeAccidentalHitsSeed sets the optional seed attribute to value.
+//
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func ComputeAccidentalHitsSeed(value int64) ComputeAccidentalHitsAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// ComputeAccidentalHitsSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func ComputeAccidentalHitsSeed2(value int64) ComputeAccidentalHitsAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Computes the ids of the positions in sampled_candidates that match true_labels.
+//
+// When doing log-odds NCE, the result of this op should be passed through a
+// SparseToDense op, then added to the logits of the sampled candidates. This has
+// the effect of 'removing' the sampled labels that match the true labels by
+// making the classifier sure that they are sampled labels.
+//
+// Arguments:
+//	true_classes: The true_classes output of UnpackSparseLabels.
+//	sampled_candidates: The sampled_candidates output of CandidateSampler.
+//	num_true: Number of true labels per context.
+//
+// Returns:
+//	indices: A vector of indices corresponding to rows of true_candidates.
+//	ids: A vector of IDs of positions in sampled_candidates that match a true_label
+// for the row with the corresponding index in indices.
+//	weights: A vector of the same length as indices and ids, in which each element
+// is -FLOAT_MAX.
+func ComputeAccidentalHits(scope *Scope, true_classes tf.Output, sampled_candidates tf.Output, num_true int64, optional ...ComputeAccidentalHitsAttr) (indices tf.Output, ids tf.Output, weights tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_true": num_true}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ComputeAccidentalHits",
+		Input: []tf.Input{
+			true_classes, sampled_candidates,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// VarHandleOpAttr is an optional argument to VarHandleOp.
+type VarHandleOpAttr func(optionalAttr)
+
+// VarHandleOpContainer sets the optional container attribute to value.
+//
+// value: the container this variable is placed in.
+// If not specified, defaults to ""
+func VarHandleOpContainer(value string) VarHandleOpAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// VarHandleOpSharedName sets the optional shared_name attribute to value.
+//
+// value: the name by which this variable is referred to.
+// If not specified, defaults to ""
+func VarHandleOpSharedName(value string) VarHandleOpAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Creates a handle to a Variable resource.
+//
+// Arguments:
+//	dtype: the type of this variable. Must agree with the dtypes
+// of all ops using this variable.
+//	shape: The (possibly partially specified) shape of this variable.
+func VarHandleOp(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...VarHandleOpAttr) (resource tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype, "shape": shape}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "VarHandleOp",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns (x - y)(x - y) element-wise.
+//
+// *NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func SquaredDifference(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SquaredDifference",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that emits the records from one or more TFRecord files.
+//
+// Arguments:
+//	filenames: A scalar or vector containing the name(s) of the file(s) to be
+// read.
+//	compression_type: A scalar containing either (i) the empty string (no
+// compression), (ii) "ZLIB", or (iii) "GZIP".
+//	buffer_size: A scalar representing the number of bytes to buffer. A value of
+// 0 means no buffering will be performed.
+func TFRecordDataset(scope *Scope, filenames tf.Output, compression_type tf.Output, buffer_size tf.Output) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TFRecordDataset",
+		Input: []tf.Input{
+			filenames, compression_type, buffer_size,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RpcAttr is an optional argument to Rpc.
+type RpcAttr func(optionalAttr)
+
+// RpcProtocol sets the optional protocol attribute to value.
+//
+// value: RPC protocol to use.  Empty string means use the default protocol.
+// Options include 'grpc'.
+// If not specified, defaults to ""
+func RpcProtocol(value string) RpcAttr {
+	return func(m optionalAttr) {
+		m["protocol"] = value
+	}
+}
+
+// RpcFailFast sets the optional fail_fast attribute to value.
+//
+// value: `boolean`. If `true` (default), then failures to connect
+// (i.e., the server does not immediately respond) cause an RPC failure.
+// If not specified, defaults to true
+func RpcFailFast(value bool) RpcAttr {
+	return func(m optionalAttr) {
+		m["fail_fast"] = value
+	}
+}
+
+// RpcTimeoutInMs sets the optional timeout_in_ms attribute to value.
+//
+// value: `int`. If `0` (default), then the kernel will run the RPC
+// request and only time out if the RPC deadline passes or the session times out.
+// If this value is greater than `0`, then the op will raise an exception if
+// the RPC takes longer than `timeout_in_ms`.
+// If not specified, defaults to 0
+func RpcTimeoutInMs(value int64) RpcAttr {
+	return func(m optionalAttr) {
+		m["timeout_in_ms"] = value
+	}
+}
+
+// Perform batches of RPC requests.
+//
+// This op asynchronously performs either a single RPC request, or a batch
+// of requests.  RPC requests are defined by three main parameters:
+//
+//   - `address` (the host+port or BNS address of the request)
+//   - `method` (the RPC method name for the request)
+//   - `request` (the serialized proto string, or vector of strings,
+//      of the RPC request argument).
+//
+// For example, if you have an RPC service running on port localhost:2345,
+// and its interface is configured with the following proto declaration:
+//
+// ```
+// service MyService {
+//   rpc MyMethod(MyRequestProto) returns (MyResponseProto) {
+//   }
+// };
+// ```
+//
+// then call this op with arguments:
+//
+// ```
+// address = "localhost:2345"
+// method = "MyService/MyMethod"
+// ```
+//
+// The `request` tensor is a string tensor representing serialized `MyRequestProto`
+// strings; and the output string tensor `response` will have the same shape
+// and contain (upon successful completion) corresponding serialized
+// `MyResponseProto` strings.
+//
+// For example, to send a single, empty, `MyRequestProto`, call
+// this op with `request = ""`.  To send 5 **parallel** empty requests,
+// call this op with `request = ["", "", "", "", ""]`.
+//
+// More generally, one can create a batch of `MyRequestProto` serialized protos
+// from regular batched tensors using the `encode_proto` op, and convert
+// the response `MyResponseProto` serialized protos to batched tensors
+// using the `decode_proto` op.
+//
+// **NOTE** Working with serialized proto strings is faster than instantiating
+// actual proto objects in memory, so no performance degradation is expected
+// compared to writing custom kernels for this workflow.
+//
+// If the connection fails or the remote worker returns an error
+// status, the op reraises this exception locally.
+//
+// See the `TryRpc` op if you prefer to handle RPC failures manually in the graph.
+//
+// Arguments:
+//	address: `0-D` or `1-D`.  The address (i.e. host_name:port) of the RPC server.
+// If this tensor has more than 1 element, then multiple parallel rpc requests
+// are sent.  This argument broadcasts with `method` and `request`.
+//	method: `0-D` or `1-D`.  The method address on the RPC server.
+// If this tensor has more than 1 element, then multiple parallel rpc requests
+// are sent.  This argument broadcasts with `address` and `request`.
+//	request: `0-D` or `1-D`.  Serialized proto strings: the rpc request argument.
+// If this tensor has more than 1 element, then multiple parallel rpc requests
+// are sent.  This argument broadcasts with `address` and `method`.
+//
+// Returns Same shape as `request`. Serialized proto strings: the rpc responses.
+func Rpc(scope *Scope, address tf.Output, method tf.Output, request tf.Output, optional ...RpcAttr) (response tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Rpc",
+		Input: []tf.Input{
+			address, method, request,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// BoostedTreesQuantileStreamResourceHandleOpAttr is an optional argument to BoostedTreesQuantileStreamResourceHandleOp.
+type BoostedTreesQuantileStreamResourceHandleOpAttr func(optionalAttr)
+
+// BoostedTreesQuantileStreamResourceHandleOpContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func BoostedTreesQuantileStreamResourceHandleOpContainer(value string) BoostedTreesQuantileStreamResourceHandleOpAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// BoostedTreesQuantileStreamResourceHandleOpSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func BoostedTreesQuantileStreamResourceHandleOpSharedName(value string) BoostedTreesQuantileStreamResourceHandleOpAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Creates a handle to a BoostedTreesQuantileStreamResource.
+func BoostedTreesQuantileStreamResourceHandleOp(scope *Scope, optional ...BoostedTreesQuantileStreamResourceHandleOpAttr) (resource tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesQuantileStreamResourceHandleOp",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)`
+//
+// if < 0, `scale * features` otherwise.
+//
+// To be used together with
+// `initializer = tf.variance_scaling_initializer(factor=1.0, mode='FAN_IN')`.
+// For correct dropout, use `tf.contrib.nn.alpha_dropout`.
+//
+// See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515)
+func Selu(scope *Scope, features tf.Output) (activations tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Selu",
+		Input: []tf.Input{
+			features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
 // ResourceSparseApplyAdagradAttr is an optional argument to ResourceSparseApplyAdagrad.
 type ResourceSparseApplyAdagradAttr func(optionalAttr)
 
@@ -33710,72 +31175,293 @@ func ResourceSparseApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, l
 	return scope.AddOperation(opspec)
 }
 
-// Compute the upper regularized incomplete Gamma function `Q(a, x)`.
+// EagerPyFuncAttr is an optional argument to EagerPyFunc.
+type EagerPyFuncAttr func(optionalAttr)
+
+// EagerPyFuncIsAsync sets the optional is_async attribute to value.
+// If not specified, defaults to false
+func EagerPyFuncIsAsync(value bool) EagerPyFuncAttr {
+	return func(m optionalAttr) {
+		m["is_async"] = value
+	}
+}
+
+// Eagerly executes a python function to compute func(input)->output. The
 //
-// The upper regularized incomplete Gamma function is defined as:
+// semantics of the input, output, and attributes are the same as those for
+// PyFunc.
+func EagerPyFunc(scope *Scope, input []tf.Output, token string, Tout []tf.DataType, optional ...EagerPyFuncAttr) (output []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"token": token, "Tout": Tout}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "EagerPyFunc",
+		Input: []tf.Input{
+			tf.OutputList(input),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
+		scope.UpdateErr("EagerPyFunc", err)
+		return
+	}
+	return output
+}
+
+// SdcaOptimizerV2Attr is an optional argument to SdcaOptimizerV2.
+type SdcaOptimizerV2Attr func(optionalAttr)
+
+// SdcaOptimizerV2Adaptive sets the optional adaptive attribute to value.
 //
-// \\(Q(a, x) = Gamma(a, x) / Gamma(a) = 1 - P(a, x)\\)
+// value: Whether to use Adaptive SDCA for the inner loop.
+// If not specified, defaults to true
+func SdcaOptimizerV2Adaptive(value bool) SdcaOptimizerV2Attr {
+	return func(m optionalAttr) {
+		m["adaptive"] = value
+	}
+}
+
+// Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for
 //
-// where
+// linear models with L1 + L2 regularization. As global optimization objective is
+// strongly-convex, the optimizer optimizes the dual objective at each step. The
+// optimizer applies each update one example at a time. Examples are sampled
+// uniformly, and the optimizer is learning rate free and enjoys linear convergence
+// rate.
 //
-// \\(Gamma(a, x) = int_{x}^{\infty} t^{a-1} exp(-t) dt\\)
+// [Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).<br>
+// Shai Shalev-Shwartz, Tong Zhang. 2012
 //
-// is the upper incomplete Gama function.
+// $$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$
 //
-// Note, above `P(a, x)` (`Igamma`) is the lower regularized complete
-// Gamma function.
-func Igammac(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) {
+// [Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).<br>
+// Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan,
+// Peter Richtarik, Martin Takac. 2015
+//
+// [Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).<br>
+// Dominik Csiba, Zheng Qu, Peter Richtarik. 2015
+//
+// Arguments:
+//	sparse_example_indices: a list of vectors which contain example indices.
+//	sparse_feature_indices: a list of vectors which contain feature indices.
+//	sparse_feature_values: a list of vectors which contains feature value
+// associated with each feature group.
+//	dense_features: a list of matrices which contains the dense feature values.
+//	example_weights: a vector which contains the weight associated with each
+// example.
+//	example_labels: a vector which contains the label/target associated with each
+// example.
+//	sparse_indices: a list of vectors where each value is the indices which has
+// corresponding weights in sparse_weights. This field maybe omitted for the
+// dense approach.
+//	sparse_weights: a list of vectors where each value is the weight associated with
+// a sparse feature group.
+//	dense_weights: a list of vectors where the values are the weights associated
+// with a dense feature group.
+//	example_state_data: a list of vectors containing the example state data.
+//	loss_type: Type of the primal loss. Currently SdcaSolver supports logistic,
+// squared and hinge losses.
+//	l1: Symmetric l1 regularization strength.
+//	l2: Symmetric l2 regularization strength.
+//	num_loss_partitions: Number of partitions of the global loss function.
+//	num_inner_iterations: Number of iterations per mini-batch.
+//
+// Returns:
+//	out_example_state_data: a list of vectors containing the updated example state
+// data.
+//	out_delta_sparse_weights: a list of vectors where each value is the delta
+// weights associated with a sparse feature group.
+//	out_delta_dense_weights: a list of vectors where the values are the delta
+// weights associated with a dense feature group.
+func SdcaOptimizerV2(scope *Scope, sparse_example_indices []tf.Output, sparse_feature_indices []tf.Output, sparse_feature_values []tf.Output, dense_features []tf.Output, example_weights tf.Output, example_labels tf.Output, sparse_indices []tf.Output, sparse_weights []tf.Output, dense_weights []tf.Output, example_state_data tf.Output, loss_type string, l1 float32, l2 float32, num_loss_partitions int64, num_inner_iterations int64, optional ...SdcaOptimizerV2Attr) (out_example_state_data tf.Output, out_delta_sparse_weights []tf.Output, out_delta_dense_weights []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"loss_type": loss_type, "l1": l1, "l2": l2, "num_loss_partitions": num_loss_partitions, "num_inner_iterations": num_inner_iterations}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SdcaOptimizerV2",
+		Input: []tf.Input{
+			tf.OutputList(sparse_example_indices), tf.OutputList(sparse_feature_indices), tf.OutputList(sparse_feature_values), tf.OutputList(dense_features), example_weights, example_labels, tf.OutputList(sparse_indices), tf.OutputList(sparse_weights), tf.OutputList(dense_weights), example_state_data,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	out_example_state_data = op.Output(idx)
+	if out_delta_sparse_weights, idx, err = makeOutputList(op, idx, "out_delta_sparse_weights"); err != nil {
+		scope.UpdateErr("SdcaOptimizerV2", err)
+		return
+	}
+	if out_delta_dense_weights, idx, err = makeOutputList(op, idx, "out_delta_dense_weights"); err != nil {
+		scope.UpdateErr("SdcaOptimizerV2", err)
+		return
+	}
+	return out_example_state_data, out_delta_sparse_weights, out_delta_dense_weights
+}
+
+// Creates a dataset that contains the elements of `input_dataset` ignoring errors.
+func IgnoreErrorsDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "IgnoreErrorsDataset",
+		Input: []tf.Input{
+			input_dataset,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Deprecated. Use TensorArrayGradV3
+//
+// DEPRECATED at GraphDef version 26: Use TensorArrayWriteV3
+func TensorArrayWriteV2(scope *Scope, handle tf.Output, index tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Igammac",
+		Type: "TensorArrayWriteV2",
 		Input: []tf.Input{
-			a, x,
+			handle, index, value, flow_in,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Compute the lower regularized incomplete Gamma function `P(a, x)`.
+// DenseToSparseSetOperationAttr is an optional argument to DenseToSparseSetOperation.
+type DenseToSparseSetOperationAttr func(optionalAttr)
+
+// DenseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value.
+// If not specified, defaults to true
+func DenseToSparseSetOperationValidateIndices(value bool) DenseToSparseSetOperationAttr {
+	return func(m optionalAttr) {
+		m["validate_indices"] = value
+	}
+}
+
+// Applies set operation along last dimension of `Tensor` and `SparseTensor`.
 //
-// The lower regularized incomplete Gamma function is defined as:
+// See SetOperationOp::SetOperationFromContext for values of `set_operation`.
+//
+// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,
+// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same
+// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but
+// ignored.
+//
+// If `validate_indices` is `True`, this op validates the order and range of `set2`
+// indices.
+//
+// Output `result` is a `SparseTensor` represented by `result_indices`,
+// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
+// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
+// dimension contains the result of `set_operation` applied to the corresponding
+// `[0...n-1]` dimension of `set`.
+//
+// Arguments:
+//	set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`.
+// Dimension `n` contains values in a set, duplicates are allowed but ignored.
+//	set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
+// order.
+//	set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
+// order.
+//	set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must
+// be the same as the 1st `n-1` dimensions of `set1`, `result_shape[n]` is the
+// max set size across `n-1` dimensions.
 //
 //
-// \\(P(a, x) = gamma(a, x) / Gamma(a) = 1 - Q(a, x)\\)
-//
-// where
-//
-// \\(gamma(a, x) = \\int_{0}^{x} t^{a-1} exp(-t) dt\\)
-//
-// is the lower incomplete Gamma function.
-//
-// Note, above `Q(a, x)` (`Igammac`) is the upper regularized complete
-// Gamma function.
-func Igamma(scope *Scope, a tf.Output, x tf.Output) (z tf.Output) {
+// Returns:
+//	result_indices: 2D indices of a `SparseTensor`.
+//	result_values: 1D values of a `SparseTensor`.
+//	result_shape: 1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
+// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
+// is the max result set size across all `0...n-1` dimensions.
+func DenseToSparseSetOperation(scope *Scope, set1 tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...DenseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"set_operation": set_operation}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "Igamma",
+		Type: "DenseToSparseSetOperation",
 		Input: []tf.Input{
-			a, x,
+			set1, set2_indices, set2_values, set2_shape,
 		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// CollectiveBcastRecvAttr is an optional argument to CollectiveBcastRecv.
+type CollectiveBcastRecvAttr func(optionalAttr)
+
+// CollectiveBcastRecvCommunicationHint sets the optional communication_hint attribute to value.
+// If not specified, defaults to "auto"
+func CollectiveBcastRecvCommunicationHint(value string) CollectiveBcastRecvAttr {
+	return func(m optionalAttr) {
+		m["communication_hint"] = value
+	}
+}
+
+// Receives a tensor value broadcast from another device.
+func CollectiveBcastRecv(scope *Scope, T tf.DataType, group_size int64, group_key int64, instance_key int64, shape tf.Shape, optional ...CollectiveBcastRecvAttr) (data tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"T": T, "group_size": group_size, "group_key": group_key, "instance_key": instance_key, "shape": shape}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CollectiveBcastRecv",
+
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Deprecated. Use TensorArrayScatterV3
+// Scatter the data from the input value into specific TensorArray elements.
 //
-// DEPRECATED at GraphDef version 26: Use TensorArrayScatterV3
-func TensorArrayScatterV2(scope *Scope, handle tf.Output, indices tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
+// `indices` must be a vector, its length must match the first dim of `value`.
+//
+// Arguments:
+//	handle: The handle to a TensorArray.
+//	indices: The locations at which to write the tensor elements.
+//	value: The concatenated tensor to write to the TensorArray.
+//	flow_in: A float scalar that enforces proper chaining of operations.
+//
+// Returns A float scalar that enforces proper chaining of operations.
+func TensorArrayScatterV3(scope *Scope, handle tf.Output, indices tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorArrayScatterV2",
+		Type: "TensorArrayScatterV3",
 		Input: []tf.Input{
 			handle, indices, value, flow_in,
 		},
@@ -33784,33 +31470,341 @@ func TensorArrayScatterV2(scope *Scope, handle tf.Output, indices tf.Output, val
 	return op.Output(0)
 }
 
-// DecodeBmpAttr is an optional argument to DecodeBmp.
-type DecodeBmpAttr func(optionalAttr)
+// Computes the matrix square root of one or more square matrices:
+//
+// matmul(sqrtm(A), sqrtm(A)) = A
+//
+// The input matrix should be invertible. If the input matrix is real, it should
+// have no eigenvalues which are real and negative (pairs of complex conjugate
+// eigenvalues are allowed).
+//
+// The matrix square root is computed by first reducing the matrix to
+// quasi-triangular form with the real Schur decomposition. The square root
+// of the quasi-triangular matrix is then computed directly. Details of
+// the algorithm can be found in: Nicholas J. Higham, "Computing real
+// square roots of a real matrix", Linear Algebra Appl., 1987.
+//
+// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+// form square matrices. The output is a tensor of the same shape as the input
+// containing the matrix square root for all input submatrices `[..., :, :]`.
+//
+// Arguments:
+//	input: Shape is `[..., M, M]`.
+//
+// Returns Shape is `[..., M, M]`.
+//
+// @compatibility(scipy)
+// Equivalent to scipy.linalg.sqrtm
+// @end_compatibility
+func MatrixSquareRoot(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixSquareRoot",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
 
-// DecodeBmpChannels sets the optional channels attribute to value.
+// MutexV2Attr is an optional argument to MutexV2.
+type MutexV2Attr func(optionalAttr)
+
+// MutexV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this variable is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func MutexV2Container(value string) MutexV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// MutexV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this variable is named in the given bucket
+// with this shared_name. Otherwise, the node name is used instead.
+// If not specified, defaults to ""
+func MutexV2SharedName(value string) MutexV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Creates a Mutex resource that can be locked by `MutexLock`.
+//
+// Returns The mutex resource.
+func MutexV2(scope *Scope, optional ...MutexV2Attr) (resource tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MutexV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the truth value of (x < y) element-wise.
+//
+// *NOTE*: `Less` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+//
+// Example:
+//
+// ```python
+// x = tf.constant([5, 4, 6])
+// y = tf.constant([5])
+// tf.math.less(x, y) ==> [False, True, False]
+//
+// x = tf.constant([5, 4, 6])
+// y = tf.constant([5, 6, 7])
+// tf.math.less(x, y) ==> [False, True, True]
+// ```
+func Less(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Less",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MaxPoolGradGradWithArgmaxAttr is an optional argument to MaxPoolGradGradWithArgmax.
+type MaxPoolGradGradWithArgmaxAttr func(optionalAttr)
+
+// MaxPoolGradGradWithArgmaxIncludeBatchInIndex sets the optional include_batch_in_index attribute to value.
+//
+// value: Whether to include batch dimension in flattened index of `argmax`.
+// If not specified, defaults to false
+func MaxPoolGradGradWithArgmaxIncludeBatchInIndex(value bool) MaxPoolGradGradWithArgmaxAttr {
+	return func(m optionalAttr) {
+		m["include_batch_in_index"] = value
+	}
+}
+
+// Computes second-order gradients of the maxpooling function.
+//
+// Arguments:
+//	input: The original input.
+//	grad: 4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t. the
+// input of `max_pool`.
+//	argmax: The indices of the maximum values chosen for each output of `max_pool`.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns Gradients of gradients w.r.t. the input of `max_pool`.
+func MaxPoolGradGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradGradWithArgmaxAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MaxPoolGradGradWithArgmax",
+		Input: []tf.Input{
+			input, grad, argmax,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// StridedSliceAttr is an optional argument to StridedSlice.
+type StridedSliceAttr func(optionalAttr)
+
+// StridedSliceBeginMask sets the optional begin_mask attribute to value.
+//
+// value: a bitmask where a bit i being 1 means to ignore the begin
+// value and instead use the largest interval possible. At runtime
+// begin[i] will be replaced with `[0, n-1)` if `stride[i] > 0` or
+// `[-1, n-1]` if `stride[i] < 0`
 // If not specified, defaults to 0
-func DecodeBmpChannels(value int64) DecodeBmpAttr {
+func StridedSliceBeginMask(value int64) StridedSliceAttr {
 	return func(m optionalAttr) {
-		m["channels"] = value
+		m["begin_mask"] = value
 	}
 }
 
-// Decode the first frame of a BMP-encoded image to a uint8 tensor.
+// StridedSliceEndMask sets the optional end_mask attribute to value.
 //
-// The attr `channels` indicates the desired number of color channels for the
-// decoded image.
+// value: analogous to `begin_mask`
+// If not specified, defaults to 0
+func StridedSliceEndMask(value int64) StridedSliceAttr {
+	return func(m optionalAttr) {
+		m["end_mask"] = value
+	}
+}
+
+// StridedSliceEllipsisMask sets the optional ellipsis_mask attribute to value.
 //
-// Accepted values are:
+// value: a bitmask where bit `i` being 1 means the `i`th
+// position is actually an ellipsis. One bit at most can be 1.
+// If `ellipsis_mask == 0`, then an implicit ellipsis mask of `1 << (m+1)`
+// is provided. This means that `foo[3:5] == foo[3:5, ...]`. An ellipsis
+// implicitly creates as many range specifications as necessary to fully
+// specify the sliced range for every dimension. For example for a 4-dimensional
+// tensor `foo` the slice `foo[2, ..., 5:8]` implies `foo[2, :, :, 5:8]`.
+// If not specified, defaults to 0
+func StridedSliceEllipsisMask(value int64) StridedSliceAttr {
+	return func(m optionalAttr) {
+		m["ellipsis_mask"] = value
+	}
+}
+
+// StridedSliceNewAxisMask sets the optional new_axis_mask attribute to value.
 //
-// *   0: Use the number of channels in the BMP-encoded image.
-// *   3: output an RGB image.
-// *   4: output an RGBA image.
+// value: a bitmask where bit `i` being 1 means the `i`th
+// specification creates a new shape 1 dimension. For example
+// `foo[:4, tf.newaxis, :2]` would produce a shape `(4, 1, 2)` tensor.
+// If not specified, defaults to 0
+func StridedSliceNewAxisMask(value int64) StridedSliceAttr {
+	return func(m optionalAttr) {
+		m["new_axis_mask"] = value
+	}
+}
+
+// StridedSliceShrinkAxisMask sets the optional shrink_axis_mask attribute to value.
+//
+// value: a bitmask where bit `i` implies that the `i`th
+// specification should shrink the dimensionality. begin and end
+// must imply a slice of size 1 in the dimension. For example in
+// python one might do `foo[:, 3, :]` which would result in
+// `shrink_axis_mask` being 2.
+// If not specified, defaults to 0
+func StridedSliceShrinkAxisMask(value int64) StridedSliceAttr {
+	return func(m optionalAttr) {
+		m["shrink_axis_mask"] = value
+	}
+}
+
+// Return a strided slice from `input`.
+//
+// Note, most python users will want to use the Python `Tensor.__getitem__`
+// or `Variable.__getitem__` rather than this op directly.
+//
+// The goal of this op is to produce a new tensor with a subset of
+// the elements from the `n` dimensional `input` tensor. The subset is chosen using
+// a sequence of `m` sparse range specifications encoded into the arguments
+// of this function. Note, in some cases
+// `m` could be equal to `n`, but this need not be the case. Each
+// range specification entry can be one of the following:
+//
+// - An ellipsis (...). Ellipses are used to imply zero or more
+//   dimensions of full-dimension selection and are produced using
+//   `ellipsis_mask`. For example, `foo[...]` is the identity slice.
+//
+// - A new axis. This is used to insert a new shape=1 dimension and is
+//   produced using `new_axis_mask`. For example, `foo[:, ...]` where
+//   `foo` is shape `(3, 4)` produces a `(1, 3, 4)` tensor.
+//
+//
+// - A range `begin:end:stride`. This is used to specify how much to choose from
+//   a given dimension. `stride` can be any integer but 0.  `begin` is an integer
+//   which represents the index of the first value to select while `end` represents
+//   the index of the last value to select. The number of values selected in each
+//   dimension is `end - begin` if `stride > 0` and `begin - end` if `stride < 0`.
+//   `begin` and `end` can be negative where `-1` is the last element, `-2` is
+//   the second to last. `begin_mask` controls whether to replace the explicitly
+//   given `begin` with an implicit effective value of `0` if `stride > 0` and
+//   `-1` if `stride < 0`. `end_mask` is analogous but produces the number
+//   required to create the largest open interval. For example, given a shape
+//   `(3,)` tensor `foo[:]`, the effective `begin` and `end` are `0` and `3`. Do
+//   not assume this is equivalent to `foo[0:-1]` which has an effective `begin`
+//   and `end` of `0` and `2`. Another example is `foo[-2::-1]` which reverses the
+//   first dimension of a tensor while dropping the last two (in the original
+//   order elements). For example `foo = [1,2,3,4]; foo[-2::-1]` is `[4,3]`.
+//
+// - A single index. This is used to keep only elements that have a given
+//   index. For example (`foo[2, :]` on a shape `(5,6)` tensor produces a
+//   shape `(6,)` tensor. This is encoded in `begin` and `end` and
+//   `shrink_axis_mask`.
+//
+// Each conceptual range specification is encoded in the op's argument. This
+// encoding is best understand by considering a non-trivial example. In
+// particular,
+// `foo[1, 2:4, None, ..., :-3:-1, :]` will be encoded as
+//
+// ```
+// begin = [1, 2, x, x, 0, x] # x denotes don't care (usually 0)
+// end = [2, 4, x, x, -3, x]
+// strides = [1, 1, x, x, -1, 1]
+// begin_mask = 1<<4 | 1 << 5 = 48
+// end_mask = 1<<5 = 32
+// ellipsis_mask = 1<<3 = 8
+// new_axis_mask = 1<<2 4
+// shrink_axis_mask = 1<<0
+// ```
+//
+// In this case if `foo.shape` is (5, 5, 5, 5, 5, 5) the final shape of
+// the slice becomes (2, 1, 5, 5, 2, 5).
+// Let us walk step by step through each argument specification.
+//
+// 1.  The first argument in the example slice is turned into `begin = 1` and
+// `end = begin + 1 = 2`. To disambiguate from the original spec `2:4` we
+// also set the appropriate bit in `shrink_axis_mask`.
+//
+// 2. `2:4` is contributes 2, 4, 1 to begin, end, and stride. All masks have
+// zero bits contributed.
+//
+// 3. None is a synonym for `tf.newaxis`. This means insert a dimension of size 1
+// dimension in the final shape. Dummy values are contributed to begin,
+// end and stride, while the new_axis_mask bit is set.
+//
+// 4. `...` grab the full ranges from as many dimensions as needed to
+// fully specify a slice for every dimension of the input shape.
+//
+// 5. `:-3:-1` shows the use of negative indices. A negative index `i` associated
+// with a dimension that has shape `s` is converted to a positive index
+// `s + i`. So `-1` becomes `s-1` (i.e. the last element). This conversion
+// is done internally so begin, end and strides receive x, -3, and -1.
+// The appropriate begin_mask bit is set to indicate the start range is the
+// full range (ignoring the x).
+//
+// 6. `:` indicates that the entire contents of the corresponding dimension
+// is selected. This is equivalent to `::` or `0::1`. begin, end, and strides
+// receive 0, 0, and 1, respectively. The appropriate bits in `begin_mask` and
+// `end_mask` are also set.
+//
+// *Requirements*:
+//   `0 != strides[i] for i in [0, m)`
+//   `ellipsis_mask must be a power of two (only one ellipsis)`
 //
 // Arguments:
-//	contents: 0-D.  The BMP-encoded image.
 //
-// Returns 3-D with shape `[height, width, channels]`. RGB order
-func DecodeBmp(scope *Scope, contents tf.Output, optional ...DecodeBmpAttr) (image tf.Output) {
+//	begin: `begin[k]` specifies the offset into the `k`th range specification.
+// The exact dimension this corresponds to will be determined by context.
+// Out-of-bounds values will be silently clamped. If the `k`th bit of
+// `begin_mask` then `begin[k]` is ignored and the full range of the
+// appropriate dimension is used instead. Negative values causes indexing
+// to start from the highest element e.g. If `foo==[1,2,3]` then `foo[-1]==3`.
+//	end: `end[i]` is like `begin` with the exception that `end_mask` is
+// used to determine full ranges.
+//	strides: `strides[i]` specifies the increment in the `i`th specification
+// after extracting a given element. Negative indices will reverse
+// the original order. Out or range values are
+// clamped to `[0,dim[i]) if slice[i]>0` or `[-1,dim[i]-1] if slice[i] < 0`
+func StridedSlice(scope *Scope, input tf.Output, begin tf.Output, end tf.Output, strides tf.Output, optional ...StridedSliceAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -33819,9 +31813,9 @@ func DecodeBmp(scope *Scope, contents tf.Output, optional ...DecodeBmpAttr) (ima
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DecodeBmp",
+		Type: "StridedSlice",
 		Input: []tf.Input{
-			contents,
+			input, begin, end, strides,
 		},
 		Attrs: attrs,
 	}
@@ -33829,84 +31823,1134 @@ func DecodeBmp(scope *Scope, contents tf.Output, optional ...DecodeBmpAttr) (ima
 	return op.Output(0)
 }
 
-// Computes natural logarithm of x element-wise.
+// RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug.
+type RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
 //
-// I.e., \\(y = \log_e x\\).
-func Log(scope *Scope, x tf.Output) (y tf.Output) {
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve RMSProp embedding parameters with debug support.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the RMSProp optimization algorithm.
+//	ms: Parameter ms updated by the RMSProp optimization algorithm.
+//	mom: Parameter mom updated by the RMSProp optimization algorithm.
+//	gradient_accumulators: Parameter gradient_accumulators updated by the RMSProp optimization algorithm.
+func RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingRMSPropParametersGradAccumDebugAttr) (parameters tf.Output, ms tf.Output, mom tf.Output, gradient_accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingRMSPropParametersGradAccumDebug",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// DenseToDenseSetOperationAttr is an optional argument to DenseToDenseSetOperation.
+type DenseToDenseSetOperationAttr func(optionalAttr)
+
+// DenseToDenseSetOperationValidateIndices sets the optional validate_indices attribute to value.
+// If not specified, defaults to true
+func DenseToDenseSetOperationValidateIndices(value bool) DenseToDenseSetOperationAttr {
+	return func(m optionalAttr) {
+		m["validate_indices"] = value
+	}
+}
+
+// Applies set operation along last dimension of 2 `Tensor` inputs.
+//
+// See SetOperationOp::SetOperationFromContext for values of `set_operation`.
+//
+// Output `result` is a `SparseTensor` represented by `result_indices`,
+// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
+// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
+// dimension contains the result of `set_operation` applied to the corresponding
+// `[0...n-1]` dimension of `set`.
+//
+// Arguments:
+//	set1: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`.
+// Dimension `n` contains values in a set, duplicates are allowed but ignored.
+//	set2: `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set1`.
+// Dimension `n` contains values in a set, duplicates are allowed but ignored.
+//
+//
+// Returns:
+//	result_indices: 2D indices of a `SparseTensor`.
+//	result_values: 1D values of a `SparseTensor`.
+//	result_shape: 1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
+// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
+// is the max result set size across all `0...n-1` dimensions.
+func DenseToDenseSetOperation(scope *Scope, set1 tf.Output, set2 tf.Output, set_operation string, optional ...DenseToDenseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"set_operation": set_operation}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DenseToDenseSetOperation",
+		Input: []tf.Input{
+			set1, set2,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Returns the set of files matching one or more glob patterns.
+//
+// Note that this routine only supports wildcard characters in the
+// basename portion of the pattern, not in the directory portion.
+// Note also that the order of filenames returned is deterministic.
+//
+// Arguments:
+//	pattern: Shell wildcard pattern(s). Scalar or vector of type string.
+//
+// Returns A vector of matching filenames.
+func MatchingFiles(scope *Scope, pattern tf.Output) (filenames tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Log",
+		Type: "MatchingFiles",
 		Input: []tf.Input{
-			x,
+			pattern,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes arctangent of `y/x` element-wise, respecting signs of the arguments.
+// Returns element-wise remainder of division. When `x < 0` xor `y < 0` is
+//
+// true, this follows Python semantics in that the result here is consistent
+// with a flooring divide. E.g. `floor(x / y) * y + mod(x, y) = x`.
+//
+// *NOTE*: `FloorMod` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func FloorMod(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "FloorMod",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Slice a `SparseTensor` based on the `start` and `size`.
+//
+// For example, if the input is
+//
+//     input_tensor = shape = [2, 7]
+//     [    a   d e  ]
+//     [b c          ]
+//
+// Graphically the output tensors are:
+//
+//     sparse_slice([0, 0], [2, 4]) = shape = [2, 4]
+//     [    a  ]
+//     [b c    ]
+//
+//     sparse_slice([0, 4], [2, 3]) = shape = [2, 3]
+//     [ d e  ]
+//     [      ]
+//
+// Arguments:
+//	indices: 2-D tensor represents the indices of the sparse tensor.
+//	values: 1-D tensor represents the values of the sparse tensor.
+//	shape: 1-D. tensor represents the shape of the sparse tensor.
+//	start: 1-D. tensor represents the start of the slice.
+//	size: 1-D. tensor represents the size of the slice.
+// output indices: A list of 1-D tensors represents the indices of the output
+// sparse tensors.
+//
+// Returns:
+//	output_indices
+//	output_values: A list of 1-D tensors represents the values of the output sparse
+// tensors.
+//	output_shape: A list of 1-D tensors represents the shape of the output sparse
+// tensors.
+func SparseSlice(scope *Scope, indices tf.Output, values tf.Output, shape tf.Output, start tf.Output, size tf.Output) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSlice",
+		Input: []tf.Input{
+			indices, values, shape, start, size,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// SparseMatrixSparseMatMulAttr is an optional argument to SparseMatrixSparseMatMul.
+type SparseMatrixSparseMatMulAttr func(optionalAttr)
+
+// SparseMatrixSparseMatMulTransposeA sets the optional transpose_a attribute to value.
+//
+// value: Indicates whether `a` should be transposed.
+// If not specified, defaults to false
+func SparseMatrixSparseMatMulTransposeA(value bool) SparseMatrixSparseMatMulAttr {
+	return func(m optionalAttr) {
+		m["transpose_a"] = value
+	}
+}
+
+// SparseMatrixSparseMatMulTransposeB sets the optional transpose_b attribute to value.
+//
+// value: Indicates whether `b` should be transposed.
+// If not specified, defaults to false
+func SparseMatrixSparseMatMulTransposeB(value bool) SparseMatrixSparseMatMulAttr {
+	return func(m optionalAttr) {
+		m["transpose_b"] = value
+	}
+}
+
+// SparseMatrixSparseMatMulAdjointA sets the optional adjoint_a attribute to value.
+//
+// value: Indicates whether `a` should be conjugate-transposed.
+// If not specified, defaults to false
+func SparseMatrixSparseMatMulAdjointA(value bool) SparseMatrixSparseMatMulAttr {
+	return func(m optionalAttr) {
+		m["adjoint_a"] = value
+	}
+}
+
+// SparseMatrixSparseMatMulAdjointB sets the optional adjoint_b attribute to value.
+//
+// value: Indicates whether `b` should be conjugate-transposed.
+// If not specified, defaults to false
+func SparseMatrixSparseMatMulAdjointB(value bool) SparseMatrixSparseMatMulAttr {
+	return func(m optionalAttr) {
+		m["adjoint_b"] = value
+	}
+}
+
+// Sparse-matrix-multiplies two CSR matrices `a` and `b`.
+//
+// Performs a matrix multiplication of a sparse matrix `a` with a sparse matrix
+// `b`; returns a sparse matrix `a * b`, unless either `a` or `b` is transposed or
+// adjointed.
+//
+// Each matrix may be transposed or adjointed (conjugated and transposed)
+// according to the Boolean parameters `transpose_a`, `adjoint_a`, `transpose_b`
+// and `adjoint_b`. At most one of `transpose_a` or `adjoint_a` may be True.
+// Similarly, at most one of `transpose_b` or `adjoint_b` may be True.
+//
+// The inputs must have compatible shapes. That is, the inner dimension of `a`
+// must be equal to the outer dimension of `b`. This requirement is adjusted
+// according to whether either `a` or `b` is transposed or adjointed.
+//
+// The `type` parameter denotes the type of the matrix elements. Both `a` and `b`
+// must have the same type. The supported types are: `float32`, `float64`,
+// `complex64` and `complex128`.
+//
+// Both `a` and `b` must have the same rank. Broadcasting is not supported. If they
+// have rank 3, each batch of 2D CSRSparseMatrices within `a` and `b` must have the
+// same dense shape.
+//
+// The sparse matrix product may have numeric (non-structural) zeros.
+// TODO(anudhyan): Consider adding a boolean attribute to control whether to prune
+// zeros.
+//
+// Usage example:
+//
+// ```python
+//     from tensorflow.python.ops.linalg.sparse import sparse_csr_matrix_ops
+//
+//     a_indices = np.array([[0, 0], [2, 3], [2, 4], [3, 0]])
+//     a_values = np.array([1.0, 5.0, -1.0, -2.0], np.float32)
+//     a_dense_shape = [4, 5]
+//
+//     b_indices = np.array([[0, 0], [3, 0], [3, 1]])
+//     b_values = np.array([2.0, 7.0, 8.0], np.float32)
+//     b_dense_shape = [5, 3]
+//
+//     with tf.Session() as sess:
+//       # Define (COO format) Sparse Tensors over Numpy arrays
+//       a_st = tf.SparseTensor(a_indices, a_values, a_dense_shape)
+//       b_st = tf.SparseTensor(b_indices, b_values, b_dense_shape)
+//
+//       # Convert SparseTensors to CSR SparseMatrix
+//       a_sm = sparse_csr_matrix_ops.sparse_tensor_to_csr_sparse_matrix(
+//           a_st.indices, a_st.values, a_st.dense_shape)
+//       b_sm = sparse_csr_matrix_ops.sparse_tensor_to_csr_sparse_matrix(
+//           b_st.indices, b_st.values, b_st.dense_shape)
+//
+//       # Compute the CSR SparseMatrix matrix multiplication
+//       c_sm = sparse_csr_matrix_ops.sparse_matrix_sparse_mat_mul(
+//           a=a_sm, b=b_sm, type=tf.float32)
+//
+//       # Convert the CSR SparseMatrix product to a dense Tensor
+//       c_sm_dense = sparse_csr_matrix_ops.csr_sparse_matrix_to_dense(
+//           c_sm, tf.float32)
+//       # Evaluate the dense Tensor value
+//       c_sm_dense_value = sess.run(c_sm_dense)
+// ```
+//
+// `c_sm_dense_value` stores the dense matrix product:
+//
+// ```
+//     [[  2.   0.   0.]
+//      [  0.   0.   0.]
+//      [ 35.  40.   0.]
+//      [ -4.   0.   0.]]
+// ```
+//
+// a: A `CSRSparseMatrix`.
+// b: A `CSRSparseMatrix` with the same type and rank as `a`.
+// type: The type of both `a` and `b`.
+// transpose_a: If True, `a` transposed before multiplication.
+// transpose_b: If True, `b` transposed before multiplication.
+// adjoint_a: If True, `a` adjointed before multiplication.
+// adjoint_b: If True, `b` adjointed before multiplication.
+//
+// Arguments:
+//	a: A CSRSparseMatrix.
+//	b: A CSRSparseMatrix.
+//
+//
+// Returns A CSRSparseMatrix.
+func SparseMatrixSparseMatMul(scope *Scope, a tf.Output, b tf.Output, type_ tf.DataType, optional ...SparseMatrixSparseMatMulAttr) (c tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"type": type_}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseMatrixSparseMatMul",
+		Input: []tf.Input{
+			a, b,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// CopyHostAttr is an optional argument to CopyHost.
+type CopyHostAttr func(optionalAttr)
+
+// CopyHostTensorName sets the optional tensor_name attribute to value.
+//
+// value: The name of the input tensor.
+// If not specified, defaults to ""
+func CopyHostTensorName(value string) CopyHostAttr {
+	return func(m optionalAttr) {
+		m["tensor_name"] = value
+	}
+}
+
+// CopyHostDebugOpsSpec sets the optional debug_ops_spec attribute to value.
+//
+// value: A list of debug op spec (op, url, gated_grpc) for attached debug
+// ops. Each element of the list has the format
+// <debug_op>;<grpc_url>;<gated_grpc>, wherein gated_grpc is boolean represented
+// as 0/1. E.g., "DebugIdentity;grpc://foo:3333;1",
+// "DebugIdentity;file:///tmp/tfdbg_1;0".
+// If not specified, defaults to {}
+func CopyHostDebugOpsSpec(value []string) CopyHostAttr {
+	return func(m optionalAttr) {
+		m["debug_ops_spec"] = value
+	}
+}
+
+// Copy a tensor to host.
+//
+// Performs CPU-to-CPU deep-copying of tensor.
+// N.B.: If the all downstream attached debug ops are disabled given the current
+// gRPC gating status, the output will simply forward the input tensor without
+// deep-copying. See the documentation of Debug* ops for more details.
+//
+// Unlike the Copy Op, this op has HostMemory constraint on its input or output.
+//
+// Arguments:
+//	input: Input tensor.
+func CopyHost(scope *Scope, input tf.Output, optional ...CopyHostAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CopyHost",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Sparse addition of two CSR matrices, C = alpha * A + beta * B.
+//
+// The gradients of SparseMatrixAdd outputs with respect to alpha and beta are not
+// currently defined (TensorFlow will return zeros for these entries).
+//
+// Arguments:
+//	a: A CSRSparseMatrix.
+//	b: A CSRSparseMatrix.
+//	alpha: A constant scalar.
+//	beta: A constant scalar.
+//
+// Returns A CSRSparseMatrix.
+func SparseMatrixAdd(scope *Scope, a tf.Output, b tf.Output, alpha tf.Output, beta tf.Output) (c tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseMatrixAdd",
+		Input: []tf.Input{
+			a, b, alpha, beta,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SparseMatrixMatMulAttr is an optional argument to SparseMatrixMatMul.
+type SparseMatrixMatMulAttr func(optionalAttr)
+
+// SparseMatrixMatMulTransposeA sets the optional transpose_a attribute to value.
+//
+// value: Indicates whether `a` should be transposed.
+// If not specified, defaults to false
+func SparseMatrixMatMulTransposeA(value bool) SparseMatrixMatMulAttr {
+	return func(m optionalAttr) {
+		m["transpose_a"] = value
+	}
+}
+
+// SparseMatrixMatMulTransposeB sets the optional transpose_b attribute to value.
+//
+// value: Indicates whether `b` should be transposed.
+// If not specified, defaults to false
+func SparseMatrixMatMulTransposeB(value bool) SparseMatrixMatMulAttr {
+	return func(m optionalAttr) {
+		m["transpose_b"] = value
+	}
+}
+
+// SparseMatrixMatMulAdjointA sets the optional adjoint_a attribute to value.
+//
+// value: Indicates whether `a` should be conjugate-transposed.
+// If not specified, defaults to false
+func SparseMatrixMatMulAdjointA(value bool) SparseMatrixMatMulAttr {
+	return func(m optionalAttr) {
+		m["adjoint_a"] = value
+	}
+}
+
+// SparseMatrixMatMulAdjointB sets the optional adjoint_b attribute to value.
+//
+// value: Indicates whether `b` should be conjugate-transposed.
+// If not specified, defaults to false
+func SparseMatrixMatMulAdjointB(value bool) SparseMatrixMatMulAttr {
+	return func(m optionalAttr) {
+		m["adjoint_b"] = value
+	}
+}
+
+// SparseMatrixMatMulTransposeOutput sets the optional transpose_output attribute to value.
+//
+// value: Transposes the product of `a` and `b`.
+// If not specified, defaults to false
+func SparseMatrixMatMulTransposeOutput(value bool) SparseMatrixMatMulAttr {
+	return func(m optionalAttr) {
+		m["transpose_output"] = value
+	}
+}
+
+// SparseMatrixMatMulConjugateOutput sets the optional conjugate_output attribute to value.
+//
+// value: Conjugates the product of `a` and `b`.
+// If not specified, defaults to false
+func SparseMatrixMatMulConjugateOutput(value bool) SparseMatrixMatMulAttr {
+	return func(m optionalAttr) {
+		m["conjugate_output"] = value
+	}
+}
+
+// Matrix-multiplies a sparse matrix with a dense matrix.
+//
+// Returns a dense matrix.
+// For inputs A and B, where A is CSR and B is dense; this op returns a dense C;
+//
+// If transpose_output is false, returns:
+// ```
+//   C = A . B
+// ```
+//
+// If transpose_output is `true`, returns:
+// ```
+//   C = transpose(A . B) = transpose(B) . transpose(A)
+// ```
+// where the transposition is performed along the two innermost (matrix)
+// dimensions.
+//
+// If conjugate_output is `true`, returns:
+// ```
+//   C = conjugate(A . B) = conjugate(A) . conjugate(B)
+// ```
+//
+// If both conjugate_output and transpose_output are `true`, returns:
+// ```
+//   C = conjugate(transpose(A . B)) = conjugate(transpose(B)) .
+//                                     conjugate(transpose(A))
+// ```
+//
+// Arguments:
+//	a: A CSRSparseMatrix.
+//	b: A dense tensor.
+//
+// Returns A dense output tensor.
+func SparseMatrixMatMul(scope *Scope, a tf.Output, b tf.Output, optional ...SparseMatrixMatMulAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseMatrixMatMul",
+		Input: []tf.Input{
+			a, b,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Reads out the CSR components at batch `index`.
+//
+// This op is meant only for debugging / testing, and its interface is not expected
+// to be stable.
+//
+// Arguments:
+//	csr_sparse_matrix: A batched CSRSparseMatrix.
+//	index: The index in `csr_sparse_matrix`'s batch.
+//
+//
+// Returns:
+//	row_ptrs: An array containing CSR matrix row pointers.
+//	col_inds: An array containing CSR matrix column indices.
+//	values: An array containing CSR matrix nonzero values.
+func CSRSparseMatrixComponents(scope *Scope, csr_sparse_matrix tf.Output, index tf.Output, type_ tf.DataType) (row_ptrs tf.Output, col_inds tf.Output, values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"type": type_}
+	opspec := tf.OpSpec{
+		Type: "CSRSparseMatrixComponents",
+		Input: []tf.Input{
+			csr_sparse_matrix, index,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Convert a (possibly batched) CSRSparseMatrix to dense.
+//
+// Arguments:
+//	sparse_input: A batched CSRSparseMatrix.
+//
+//
+// Returns A dense tensor.
+func CSRSparseMatrixToDense(scope *Scope, sparse_input tf.Output, type_ tf.DataType) (dense_output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"type": type_}
+	opspec := tf.OpSpec{
+		Type: "CSRSparseMatrixToDense",
+		Input: []tf.Input{
+			sparse_input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// IteratorFromStringHandleAttr is an optional argument to IteratorFromStringHandle.
+type IteratorFromStringHandleAttr func(optionalAttr)
+
+// IteratorFromStringHandleOutputTypes sets the optional output_types attribute to value.
+//
+// value: If specified, defines the type of each tuple component in an
+// element produced by the resulting iterator.
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func IteratorFromStringHandleOutputTypes(value []tf.DataType) IteratorFromStringHandleAttr {
+	return func(m optionalAttr) {
+		m["output_types"] = value
+	}
+}
+
+// IteratorFromStringHandleOutputShapes sets the optional output_shapes attribute to value.
+//
+// value: If specified, defines the shape of each tuple component in an
+// element produced by the resulting iterator.
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func IteratorFromStringHandleOutputShapes(value []tf.Shape) IteratorFromStringHandleAttr {
+	return func(m optionalAttr) {
+		m["output_shapes"] = value
+	}
+}
+
+// Converts the given string representing a handle to an iterator to a resource.
+//
+// Arguments:
+//	string_handle: A string representation of the given handle.
+//
+// Returns A handle to an iterator resource.
+func IteratorFromStringHandle(scope *Scope, string_handle tf.Output, optional ...IteratorFromStringHandleAttr) (resource_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "IteratorFromStringHandle",
+		Input: []tf.Input{
+			string_handle,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Add all input tensors element wise.
+//
+//   Inputs must be of same size and shape.
+//
+//   ```python
+//   x = [9, 7, 10]
+//   tf.math.add_n(x) ==> 26
+//   ```
+func AddN(scope *Scope, inputs []tf.Output) (sum tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "AddN",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Converts a (possibly batched) CSRSparesMatrix to a SparseTensor.
+//
+// Arguments:
+//	sparse_matrix: A (possibly batched) CSRSparseMatrix.
+//
+//
+// Returns:
+//	indices: SparseTensor indices.
+//	values: SparseTensor values.
+//	dense_shape: SparseTensor dense shape.
+func CSRSparseMatrixToSparseTensor(scope *Scope, sparse_matrix tf.Output, type_ tf.DataType) (indices tf.Output, values tf.Output, dense_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"type": type_}
+	opspec := tf.OpSpec{
+		Type: "CSRSparseMatrixToSparseTensor",
+		Input: []tf.Input{
+			sparse_matrix,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// SizeAttr is an optional argument to Size.
+type SizeAttr func(optionalAttr)
+
+// SizeOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_INT32
+func SizeOutType(value tf.DataType) SizeAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Returns the size of a tensor.
+//
+// This operation returns an integer representing the number of elements in
+// `input`.
+//
+// For example:
+//
+// ```
+// # 't' is [[[1, 1,, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]]
+// size(t) ==> 12
+// ```
+func Size(scope *Scope, input tf.Output, optional ...SizeAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Size",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// TPUReplicatedInputAttr is an optional argument to TPUReplicatedInput.
+type TPUReplicatedInputAttr func(optionalAttr)
+
+// TPUReplicatedInputIsMirroredVariable sets the optional is_mirrored_variable attribute to value.
+// If not specified, defaults to false
+func TPUReplicatedInputIsMirroredVariable(value bool) TPUReplicatedInputAttr {
+	return func(m optionalAttr) {
+		m["is_mirrored_variable"] = value
+	}
+}
+
+// TPUReplicatedInputIndex sets the optional index attribute to value.
+// If not specified, defaults to -1
+func TPUReplicatedInputIndex(value int64) TPUReplicatedInputAttr {
+	return func(m optionalAttr) {
+		m["index"] = value
+	}
+}
+
+// Connects N inputs to an N-way replicated TPU computation.
+//
+// This operation holds a replicated input to a `tpu.replicate()` computation subgraph.
+// Each replicated input has the same shape and type alongside the output.
+//
+// For example:
+// ```
+// %a = "tf.opA"()
+// %b = "tf.opB"()
+// %replicated_input = "tf.TPUReplicatedInput"(%a, %b)
+// %computation = "tf.Computation"(%replicated_input)
+// ```
+// The above computation has a replicated input of two replicas.
+func TPUReplicatedInput(scope *Scope, inputs []tf.Output, optional ...TPUReplicatedInputAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TPUReplicatedInput",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a tensor filled with a scalar value.
+//
+// This operation creates a tensor of shape `dims` and fills it with `value`.
+//
+// For example:
+//
+// ```
+// # Output tensor has shape [2, 3].
+// fill([2, 3], 9) ==> [[9, 9, 9]
+//                      [9, 9, 9]]
+// ```
+//
+// `tf.fill` differs from `tf.constant` in a few ways:
+//
+// *   `tf.fill` only supports scalar contents, whereas `tf.constant` supports
+//     Tensor values.
+// *   `tf.fill` creates an Op in the computation graph that constructs the actual
+//     Tensor value at runtime. This is in contrast to `tf.constant` which embeds
+//     the entire Tensor into the graph with a `Const` node.
+// *   Because `tf.fill` evaluates at graph runtime, it supports dynamic shapes
+//     based on other runtime Tensors, unlike `tf.constant`.
+//
+// Arguments:
+//	dims: 1-D. Represents the shape of the output tensor.
+//	value: 0-D (scalar). Value to fill the returned tensor.
+//
+// @compatibility(numpy)
+// Equivalent to np.full
+// @end_compatibility
+func Fill(scope *Scope, dims tf.Output, value tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Fill",
+		Input: []tf.Input{
+			dims, value,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Converts a dense tensor to a (possibly batched) CSRSparseMatrix.
+//
+// Arguments:
+//	dense_input: A Dense tensor.
+//	indices: Indices of nonzero elements.
+//
+// Returns A (possibly batched) CSRSparseMatrix.
+func DenseToCSRSparseMatrix(scope *Scope, dense_input tf.Output, indices tf.Output) (sparse_output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DenseToCSRSparseMatrix",
+		Input: []tf.Input{
+			dense_input, indices,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Fills empty rows in the input 2-D `SparseTensor` with a default value.
+//
+// The input `SparseTensor` is represented via the tuple of inputs
+// (`indices`, `values`, `dense_shape`).  The output `SparseTensor` has the
+// same `dense_shape` but with indices `output_indices` and values
+// `output_values`.
+//
+// This op inserts a single entry for every row that doesn't have any values.
+// The index is created as `[row, 0, ..., 0]` and the inserted value
+// is `default_value`.
+//
+// For example, suppose `sp_input` has shape `[5, 6]` and non-empty values:
+//
+//     [0, 1]: a
+//     [0, 3]: b
+//     [2, 0]: c
+//     [3, 1]: d
+//
+// Rows 1 and 4 are empty, so the output will be of shape `[5, 6]` with values:
+//
+//     [0, 1]: a
+//     [0, 3]: b
+//     [1, 0]: default_value
+//     [2, 0]: c
+//     [3, 1]: d
+//     [4, 0]: default_value
+//
+// The output `SparseTensor` will be in row-major order and will have the
+// same shape as the input.
+//
+// This op also returns an indicator vector shaped `[dense_shape[0]]` such that
+//
+//     empty_row_indicator[i] = True iff row i was an empty row.
+//
+// And a reverse index map vector shaped `[indices.shape[0]]` that is used during
+// backpropagation,
+//
+//     reverse_index_map[j] = out_j s.t. indices[j, :] == output_indices[out_j, :]
+//
+// Arguments:
+//	indices: 2-D. the indices of the sparse tensor.
+//	values: 1-D. the values of the sparse tensor.
+//	dense_shape: 1-D. the shape of the sparse tensor.
+//	default_value: 0-D. default value to insert into location `[row, 0, ..., 0]`
+//   for rows missing from the input sparse tensor.
+// output indices: 2-D. the indices of the filled sparse tensor.
+//
+// Returns:
+//	output_indices
+//	output_values: 1-D. the values of the filled sparse tensor.
+//	empty_row_indicator: 1-D. whether the dense row was missing in the
+// input sparse tensor.
+//	reverse_index_map: 1-D. a map from the input indices to the output indices.
+func SparseFillEmptyRows(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output, default_value tf.Output) (output_indices tf.Output, output_values tf.Output, empty_row_indicator tf.Output, reverse_index_map tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseFillEmptyRows",
+		Input: []tf.Input{
+			indices, values, dense_shape, default_value,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// Makes the summary of quantiles for the batch.
+//
+// An op that takes a list of tensors (one tensor per feature) and outputs the
+// quantile summaries for each tensor.
+//
+// Arguments:
+//	float_values: float; List of Rank 1 Tensors each containing values for a single feature.
+//	example_weights: float; Rank 1 Tensor with weights per instance.
+//	epsilon: float; The required maximum approximation error.
+//
+// Returns float; List of Rank 2 Tensors each containing the quantile summary
+// (value, weight, min_rank, max_rank) of a single feature.
+func BoostedTreesMakeQuantileSummaries(scope *Scope, float_values []tf.Output, example_weights tf.Output, epsilon tf.Output) (summaries []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesMakeQuantileSummaries",
+		Input: []tf.Input{
+			tf.OutputList(float_values), example_weights, epsilon,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if summaries, idx, err = makeOutputList(op, idx, "summaries"); err != nil {
+		scope.UpdateErr("BoostedTreesMakeQuantileSummaries", err)
+		return
+	}
+	return summaries
+}
+
+// TakeManySparseFromTensorsMapAttr is an optional argument to TakeManySparseFromTensorsMap.
+type TakeManySparseFromTensorsMapAttr func(optionalAttr)
+
+// TakeManySparseFromTensorsMapContainer sets the optional container attribute to value.
+//
+// value: The container name for the `SparseTensorsMap` read by this op.
+// If not specified, defaults to ""
+func TakeManySparseFromTensorsMapContainer(value string) TakeManySparseFromTensorsMapAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// TakeManySparseFromTensorsMapSharedName sets the optional shared_name attribute to value.
+//
+// value: The shared name for the `SparseTensorsMap` read by this op.
+// It should not be blank; rather the `shared_name` or unique Operation name
+// of the Op that created the original `SparseTensorsMap` should be used.
+// If not specified, defaults to ""
+func TakeManySparseFromTensorsMapSharedName(value string) TakeManySparseFromTensorsMapAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Read `SparseTensors` from a `SparseTensorsMap` and concatenate them.
+//
+// The input `sparse_handles` must be an `int64` matrix of shape `[N, 1]` where
+// `N` is the minibatch size and the rows correspond to the output handles of
+// `AddSparseToTensorsMap` or `AddManySparseToTensorsMap`.  The ranks of the
+// original `SparseTensor` objects that went into the given input ops must all
+// match.  When the final `SparseTensor` is created, it has rank one
+// higher than the ranks of the incoming `SparseTensor` objects
+// (they have been concatenated along a new row dimension on the left).
+//
+// The output `SparseTensor` object's shape values for all dimensions but the
+// first are the max across the input `SparseTensor` objects' shape values
+// for the corresponding dimensions.  Its first shape value is `N`, the minibatch
+// size.
+//
+// The input `SparseTensor` objects' indices are assumed ordered in
+// standard lexicographic order.  If this is not the case, after this
+// step run `SparseReorder` to restore index ordering.
+//
+// For example, if the handles represent an input, which is a `[2, 3]` matrix
+// representing two original `SparseTensor` objects:
+//
+// ```
+//     index = [ 0]
+//             [10]
+//             [20]
+//     values = [1, 2, 3]
+//     shape = [50]
+// ```
 //
-// This is the angle \( \theta \in [-\pi, \pi] \) such that
-// \[ x = r \cos(\theta) \]
 // and
-// \[ y = r \sin(\theta) \]
-// where \(r = \sqrt(x^2 + y^2) \).
-func Atan2(scope *Scope, y tf.Output, x tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Atan2",
-		Input: []tf.Input{
-			y, x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// StatefulStandardNormalV2Attr is an optional argument to StatefulStandardNormalV2.
-type StatefulStandardNormalV2Attr func(optionalAttr)
-
-// StatefulStandardNormalV2Dtype sets the optional dtype attribute to value.
 //
-// value: The type of the output.
-// If not specified, defaults to DT_FLOAT
-func StatefulStandardNormalV2Dtype(value tf.DataType) StatefulStandardNormalV2Attr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Outputs random values from a normal distribution.
+// ```
+//     index = [ 2]
+//             [10]
+//     values = [4, 5]
+//     shape = [30]
+// ```
 //
-// The generated values will have mean 0 and standard deviation 1.
+// then the final `SparseTensor` will be:
+//
+// ```
+//     index = [0  0]
+//             [0 10]
+//             [0 20]
+//             [1  2]
+//             [1 10]
+//     values = [1, 2, 3, 4, 5]
+//     shape = [2 50]
+// ```
 //
 // Arguments:
-//	resource: The handle of the resource variable that stores the state of the RNG.
-//	algorithm: The RNG algorithm.
-//	shape: The shape of the output tensor.
+//	sparse_handles: 1-D, The `N` serialized `SparseTensor` objects.
+// Shape: `[N]`.
+//	dtype: The `dtype` of the `SparseTensor` objects stored in the
+// `SparseTensorsMap`.
 //
-// Returns A tensor of the specified shape filled with random normal values.
-func StatefulStandardNormalV2(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, optional ...StatefulStandardNormalV2Attr) (output tf.Output) {
+// Returns:
+//	sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
+//	sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
+//	sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
+func TakeManySparseFromTensorsMap(scope *Scope, sparse_handles tf.Output, dtype tf.DataType, optional ...TakeManySparseFromTensorsMapAttr) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"dtype": dtype}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StatefulStandardNormalV2",
+		Type: "TakeManySparseFromTensorsMap",
 		Input: []tf.Input{
-			resource, algorithm, shape,
+			sparse_handles,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// LoadTPUEmbeddingCenteredRMSPropParametersAttr is an optional argument to LoadTPUEmbeddingCenteredRMSPropParameters.
+type LoadTPUEmbeddingCenteredRMSPropParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingCenteredRMSPropParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingCenteredRMSPropParametersTableId(value int64) LoadTPUEmbeddingCenteredRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingCenteredRMSPropParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingCenteredRMSPropParametersTableName(value string) LoadTPUEmbeddingCenteredRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingCenteredRMSPropParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingCenteredRMSPropParametersConfig(value string) LoadTPUEmbeddingCenteredRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load centered RMSProp embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the centered RMSProp optimization algorithm.
+//	ms: Value of ms used in the centered RMSProp optimization algorithm.
+//	mom: Value of mom used in the centered RMSProp optimization algorithm.
+//	mg: Value of mg used in the centered RMSProp optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingCenteredRMSPropParameters(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, mg tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingCenteredRMSPropParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingCenteredRMSPropParameters",
+		Input: []tf.Input{
+			parameters, ms, mom, mg,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
 }
 
 // RandomPoissonAttr is an optional argument to RandomPoisson.
@@ -33950,25 +32994,447 @@ func RandomPoisson(scope *Scope, shape tf.Output, rate tf.Output, optional ...Ra
 	return op.Output(0)
 }
 
-// Returns the truth value of (x <= y) element-wise.
+// Compute the regularized incomplete beta integral \\(I_x(a, b)\\).
 //
-// *NOTE*: `LessEqual` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func LessEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// The regularized incomplete beta integral is defined as:
+//
+//
+// \\(I_x(a, b) = \frac{B(x; a, b)}{B(a, b)}\\)
+//
+// where
+//
+//
+// \\(B(x; a, b) = \int_0^x t^{a-1} (1 - t)^{b-1} dt\\)
+//
+//
+// is the incomplete beta function and \\(B(a, b)\\) is the *complete*
+// beta function.
+func Betainc(scope *Scope, a tf.Output, b tf.Output, x tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "LessEqual",
+		Type: "Betainc",
 		Input: []tf.Input{
-			x, y,
+			a, b, x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes the minimum along segments of a tensor.
+// Reduces sparse updates into the variable referenced by `resource` using the `max` operation.
+//
+// This operation computes
+//
+//     # Scalar indices
+//     ref[indices, ...] = max(ref[indices, ...], updates[...])
+//
+//     # Vector indices (for each i)
+//     ref[indices[i], ...] = max(ref[indices[i], ...], updates[i, ...])
+//
+//     # High rank indices (for each i, ..., j)
+//     ref[indices[i, ..., j], ...] = max(ref[indices[i, ..., j], ...], updates[i, ..., j, ...])
+//
+// Duplicate entries are handled correctly: if multiple `indices` reference
+// the same location, their contributions are combined.
+//
+// Requires `updates.shape = indices.shape + ref.shape[1:]` or `updates.shape = []`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src='https://www.tensorflow.org/images/ScatterAdd.png' alt>
+// </div>
+//
+// Arguments:
+//	resource: Should be from a `Variable` node.
+//	indices: A tensor of indices into the first dimension of `ref`.
+//	updates: A tensor of updated values to add to `ref`.
+//
+// Returns the created operation.
+func ResourceScatterMax(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterMax",
+		Input: []tf.Input{
+			resource, indices, updates,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes tan of x element-wise.
+//
+//   Given an input tensor, this function computes tangent of every
+//   element in the tensor. Input range is `(-inf, inf)` and
+//   output range is `(-inf, inf)`. If input lies outside the boundary, `nan`
+//   is returned.
+//
+//   ```python
+//   x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 200, 10000, float("inf")])
+//   tf.math.tan(x) ==> [nan 0.45231566 -0.5463025 1.5574077 2.572152 -1.7925274 0.32097113 nan]
+//   ```
+func Tan(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Tan",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// BiasAddGradAttr is an optional argument to BiasAddGrad.
+type BiasAddGradAttr func(optionalAttr)
+
+// BiasAddGradDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the bias tensor will be added to the last dimension
+// of the value tensor.
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// The tensor will be added to "in_channels", the third-to-the-last
+//     dimension.
+// If not specified, defaults to "NHWC"
+func BiasAddGradDataFormat(value string) BiasAddGradAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// The backward operation for "BiasAdd" on the "bias" tensor.
+//
+// It accumulates all the values from out_backprop into the feature dimension.
+// For NHWC data format, the feature dimension is the last. For NCHW data format,
+// the feature dimension is the third-to-last.
+//
+// Arguments:
+//	out_backprop: Any number of dimensions.
+//
+// Returns 1-D with size the feature dimension of `out_backprop`.
+func BiasAddGrad(scope *Scope, out_backprop tf.Output, optional ...BiasAddGradAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "BiasAddGrad",
+		Input: []tf.Input{
+			out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Advance the counter of a counter-based RNG.
+//
+// The state of the RNG after
+// `rng_skip(n)` will be the same as that after `stateful_uniform([n])`
+// (or any other distribution). The actual increment added to the
+// counter is an unspecified implementation detail.
+//
+// Arguments:
+//	resource: The handle of the resource variable that stores the state of the RNG.
+//	algorithm: The RNG algorithm.
+//	delta: The amount of advancement.
+//
+// Returns the created operation.
+func RngSkip(scope *Scope, resource tf.Output, algorithm tf.Output, delta tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "RngSkip",
+		Input: []tf.Input{
+			resource, algorithm, delta,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Generates values in an interval.
+//
+// A sequence of `num` evenly-spaced values are generated beginning at `start`.
+// If `num > 1`, the values in the sequence increase by `stop - start / num - 1`,
+// so that the last one is exactly `stop`.
+//
+// For example:
+//
+// ```
+// tf.linspace(10.0, 12.0, 3, name="linspace") => [ 10.0  11.0  12.0]
+// ```
+//
+// Arguments:
+//	start: 0-D tensor. First entry in the range.
+//	stop: 0-D tensor. Last entry in the range.
+//	num: 0-D tensor. Number of values to generate.
+//
+// Returns 1-D. The generated values.
+func LinSpace(scope *Scope, start tf.Output, stop tf.Output, num tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "LinSpace",
+		Input: []tf.Input{
+			start, stop, num,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MultinomialAttr is an optional argument to Multinomial.
+type MultinomialAttr func(optionalAttr)
+
+// MultinomialSeed sets the optional seed attribute to value.
+//
+// value: If either seed or seed2 is set to be non-zero, the internal random number
+// generator is seeded by the given seed.  Otherwise, a random seed is used.
+// If not specified, defaults to 0
+func MultinomialSeed(value int64) MultinomialAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// MultinomialSeed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func MultinomialSeed2(value int64) MultinomialAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// MultinomialOutputDtype sets the optional output_dtype attribute to value.
+// If not specified, defaults to DT_INT64
+func MultinomialOutputDtype(value tf.DataType) MultinomialAttr {
+	return func(m optionalAttr) {
+		m["output_dtype"] = value
+	}
+}
+
+// Draws samples from a multinomial distribution.
+//
+// Arguments:
+//	logits: 2-D Tensor with shape `[batch_size, num_classes]`.  Each slice `[i, :]`
+// represents the unnormalized log probabilities for all classes.
+//	num_samples: 0-D.  Number of independent samples to draw for each row slice.
+//
+// Returns 2-D Tensor with shape `[batch_size, num_samples]`.  Each slice `[i, :]`
+// contains the drawn class labels with range `[0, num_classes)`.
+func Multinomial(scope *Scope, logits tf.Output, num_samples tf.Output, optional ...MultinomialAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Multinomial",
+		Input: []tf.Input{
+			logits, num_samples,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// NonDeterministicIntsAttr is an optional argument to NonDeterministicInts.
+type NonDeterministicIntsAttr func(optionalAttr)
+
+// NonDeterministicIntsDtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_INT64
+func NonDeterministicIntsDtype(value tf.DataType) NonDeterministicIntsAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Non-deterministically generates some integers.
+//
+// This op may use some OS-provided source of non-determinism (e.g. an RNG), so each execution will give different results.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//
+// Returns Non-deterministic integer values with specified shape.
+func NonDeterministicInts(scope *Scope, shape tf.Output, optional ...NonDeterministicIntsAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "NonDeterministicInts",
+		Input: []tf.Input{
+			shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that caches elements from `input_dataset`.
+//
+// A CacheDataset will iterate over the input_dataset, and store tensors. If the
+// cache already exists, the cache will be used. If the cache is inappropriate
+// (e.g. cannot be opened, contains tensors of the wrong shape / size), an error
+// will the returned when used.
+//
+// Arguments:
+//
+//	filename: A path on the filesystem where we should cache the dataset. Note: this
+// will be a directory.
+//
+//
+func CacheDataset(scope *Scope, input_dataset tf.Output, filename tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "CacheDataset",
+		Input: []tf.Input{
+			input_dataset, filename,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ThreadPoolHandleAttr is an optional argument to ThreadPoolHandle.
+type ThreadPoolHandleAttr func(optionalAttr)
+
+// ThreadPoolHandleMaxIntraOpParallelism sets the optional max_intra_op_parallelism attribute to value.
+//
+// value: The maximum degree of parallelism to use within operations that execute on this
+// threadpool.
+// If not specified, defaults to 1
+func ThreadPoolHandleMaxIntraOpParallelism(value int64) ThreadPoolHandleAttr {
+	return func(m optionalAttr) {
+		m["max_intra_op_parallelism"] = value
+	}
+}
+
+// ThreadPoolHandleContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func ThreadPoolHandleContainer(value string) ThreadPoolHandleAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// ThreadPoolHandleSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func ThreadPoolHandleSharedName(value string) ThreadPoolHandleAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Creates a dataset that uses a custom thread pool to compute `input_dataset`.
+//
+// Arguments:
+//	num_threads: The number of threads in the thread pool.
+//	display_name: A human-readable name for the threads that may be visible in some
+// visualizations.
+// threadpool.
+//
+// Returns A resource that can be consumed by one or more ExperimentalThreadPoolDataset
+// ops.
+func ThreadPoolHandle(scope *Scope, num_threads int64, display_name string, optional ...ThreadPoolHandleAttr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_threads": num_threads, "display_name": display_name}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ThreadPoolHandle",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SparseReduceMaxSparseAttr is an optional argument to SparseReduceMaxSparse.
+type SparseReduceMaxSparseAttr func(optionalAttr)
+
+// SparseReduceMaxSparseKeepDims sets the optional keep_dims attribute to value.
+//
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func SparseReduceMaxSparseKeepDims(value bool) SparseReduceMaxSparseAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// Computes the max of elements across dimensions of a SparseTensor.
+//
+// This Op takes a SparseTensor and is the sparse counterpart to
+// `tf.reduce_max()`.  In contrast to SparseReduceMax, this Op returns a
+// SparseTensor.
+//
+// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
+// with length 1.
+//
+// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
+// with a single element is returned.  Additionally, the axes can be negative,
+// which are interpreted according to the indexing rules in Python.
+//
+// Arguments:
+//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+//	input_shape: 1-D.  Shape of the input SparseTensor.
+//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
+func SparseReduceMaxSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceMaxSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseReduceMaxSparse",
+		Input: []tf.Input{
+			input_indices, input_values, input_shape, reduction_axes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Computes the maximum along segments of a tensor.
 //
 // Read
 // [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
@@ -33976,26 +33442,31 @@ func LessEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 //
 // This operator is similar to the unsorted segment sum operator found
 // [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum).
-// Instead of computing the sum over segments, it computes the minimum such that:
+// Instead of computing the sum over segments, it computes the maximum such that:
 //
-// \\(output_i = \min_{j...} data_[j...]\\) where min is over tuples `j...` such
+// \\(output_i = \max_{j...} data[j...]\\) where max is over tuples `j...` such
 // that `segment_ids[j...] == i`.
 //
-// If the minimum is empty for a given segment ID `i`, it outputs the largest
+// If the maximum is empty for a given segment ID `i`, it outputs the smallest
 // possible value for the specific numeric type,
-// `output[i] = numeric_limits<T>::max()`.
+// `output[i] = numeric_limits<T>::lowest()`.
+//
+// If the given segment ID `i` is negative, then the corresponding value is
+// dropped, and will not be included in the result.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentMax.png" alt>
+// </div>
 //
 // For example:
 //
 // ``` python
 // c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]])
-// tf.unsorted_segment_min(c, tf.constant([0, 1, 0]), num_segments=2)
-// # ==> [[ 1,  2, 2, 1],
+// tf.unsorted_segment_max(c, tf.constant([0, 1, 0]), num_segments=2)
+// # ==> [[ 4,  3, 3, 4],
 // #       [5,  6, 7, 8]]
 // ```
 //
-// If the given segment ID `i` is negative, then the corresponding value is
-// dropped, and will not be included in the result.
 //
 // Arguments:
 //
@@ -34005,12 +33476,12 @@ func LessEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 // Returns Has same shape as data, except for the first `segment_ids.rank`
 // dimensions, which are replaced with a single dimension which has size
 // `num_segments`.
-func UnsortedSegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
+func UnsortedSegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "UnsortedSegmentMin",
+		Type: "UnsortedSegmentMax",
 		Input: []tf.Input{
 			data, segment_ids, num_segments,
 		},
@@ -34019,51 +33490,84 @@ func UnsortedSegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output, num
 	return op.Output(0)
 }
 
-// Does nothing. Serves as a control trigger for scheduling.
-//
-// Only useful as a placeholder for control edges.
+// Set a summary_writer_interface to record statistics using given stats_aggregator.
 //
 // Returns the created operation.
-func ControlTrigger(scope *Scope) (o *tf.Operation) {
+func StatsAggregatorSetSummaryWriter(scope *Scope, stats_aggregator tf.Output, summary tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "ControlTrigger",
+		Type: "StatsAggregatorSetSummaryWriter",
+		Input: []tf.Input{
+			stats_aggregator, summary,
+		},
 	}
 	return scope.AddOperation(opspec)
 }
 
-// PrintV2Attr is an optional argument to PrintV2.
-type PrintV2Attr func(optionalAttr)
+// FusedBatchNormGradAttr is an optional argument to FusedBatchNormGrad.
+type FusedBatchNormGradAttr func(optionalAttr)
 
-// PrintV2OutputStream sets the optional output_stream attribute to value.
+// FusedBatchNormGradEpsilon sets the optional epsilon attribute to value.
 //
-// value: A string specifying the output stream or logging level to print to.
-// If not specified, defaults to "stderr"
-func PrintV2OutputStream(value string) PrintV2Attr {
+// value: A small float number added to the variance of x.
+// If not specified, defaults to 0.0001
+func FusedBatchNormGradEpsilon(value float32) FusedBatchNormGradAttr {
 	return func(m optionalAttr) {
-		m["output_stream"] = value
+		m["epsilon"] = value
 	}
 }
 
-// PrintV2End sets the optional end attribute to value.
-// If not specified, defaults to "\n"
-func PrintV2End(value string) PrintV2Attr {
+// FusedBatchNormGradDataFormat sets the optional data_format attribute to value.
+//
+// value: The data format for y_backprop, x, x_backprop.
+// Either "NHWC" (default) or "NCHW".
+// If not specified, defaults to "NHWC"
+func FusedBatchNormGradDataFormat(value string) FusedBatchNormGradAttr {
 	return func(m optionalAttr) {
-		m["end"] = value
+		m["data_format"] = value
 	}
 }
 
-// Prints a string scalar.
+// FusedBatchNormGradIsTraining sets the optional is_training attribute to value.
 //
-// Prints a string scalar to the desired output_stream.
+// value: A bool value to indicate the operation is for training (default)
+// or inference.
+// If not specified, defaults to true
+func FusedBatchNormGradIsTraining(value bool) FusedBatchNormGradAttr {
+	return func(m optionalAttr) {
+		m["is_training"] = value
+	}
+}
+
+// Gradient for batch normalization.
+//
+// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
+// The size of 1D Tensors matches the dimension C of the 4D Tensors.
 //
 // Arguments:
-//	input: The string scalar to print.
+//	y_backprop: A 4D Tensor for the gradient with respect to y.
+//	x: A 4D Tensor for input data.
+//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
+//	reserve_space_1: When is_training is True, a 1D Tensor for the computed batch
+// mean to be reused in gradient computation. When is_training is
+// False, a 1D Tensor for the population mean to be reused in both
+// 1st and 2nd order gradient computation.
+//	reserve_space_2: When is_training is True, a 1D Tensor for the computed batch
+// variance (inverted variance in the cuDNN case) to be reused in
+// gradient computation. When is_training is False, a 1D Tensor
+// for the population variance to be reused in both 1st and 2nd
+// order gradient computation.
 //
-// Returns the created operation.
-func PrintV2(scope *Scope, input tf.Output, optional ...PrintV2Attr) (o *tf.Operation) {
+// Returns:
+//	x_backprop: A 4D Tensor for the gradient with respect to x.
+//	scale_backprop: A 1D Tensor for the gradient with respect to scale.
+//	offset_backprop: A 1D Tensor for the gradient with respect to offset.
+//	reserve_space_3: Unused placeholder to match the mean input in FusedBatchNorm.
+//	reserve_space_4: Unused placeholder to match the variance input
+// in FusedBatchNorm.
+func FusedBatchNormGrad(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradAttr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -34072,25 +33576,395 @@ func PrintV2(scope *Scope, input tf.Output, optional ...PrintV2Attr) (o *tf.Oper
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "PrintV2",
+		Type: "FusedBatchNormGrad",
 		Input: []tf.Input{
-			input,
+			y_backprop, x, scale, reserve_space_1, reserve_space_2,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
 }
 
-// Returns the truth value of (x >= y) element-wise.
+// Subtracts a value from the current value of a variable.
 //
-// *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func GreaterEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// Any ReadVariableOp with a control dependency on this op is guaranteed to
+// see the decremented value or a subsequent newer one.
+//
+// Arguments:
+//	resource: handle to the resource in which to store the variable.
+//	value: the value by which the variable will be incremented.
+//
+// Returns the created operation.
+func AssignSubVariableOp(scope *Scope, resource tf.Output, value tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "GreaterEqual",
+		Type: "AssignSubVariableOp",
+		Input: []tf.Input{
+			resource, value,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// SparseReduceMaxAttr is an optional argument to SparseReduceMax.
+type SparseReduceMaxAttr func(optionalAttr)
+
+// SparseReduceMaxKeepDims sets the optional keep_dims attribute to value.
+//
+// value: If true, retain reduced dimensions with length 1.
+// If not specified, defaults to false
+func SparseReduceMaxKeepDims(value bool) SparseReduceMaxAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// Computes the max of elements across dimensions of a SparseTensor.
+//
+// This Op takes a SparseTensor and is the sparse counterpart to
+// `tf.reduce_max()`.  In particular, this Op also returns a dense `Tensor`
+// instead of a sparse one.
+//
+// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
+// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
+// with length 1.
+//
+// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
+// with a single element is returned.  Additionally, the axes can be negative,
+// which are interpreted according to the indexing rules in Python.
+//
+// Arguments:
+//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
+//	input_shape: 1-D.  Shape of the input SparseTensor.
+//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
+//
+// Returns `R-K`-D.  The reduced Tensor.
+func SparseReduceMax(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceMaxAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseReduceMax",
+		Input: []tf.Input{
+			input_indices, input_values, input_shape, reduction_axes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Conv3DBackpropFilterAttr is an optional argument to Conv3DBackpropFilter.
+type Conv3DBackpropFilterAttr func(optionalAttr)
+
+// Conv3DBackpropFilterDilations sets the optional dilations attribute to value.
+// If not specified, defaults to {i:1 i:1 i:1 i:1 i:1}
+func Conv3DBackpropFilterDilations(value []int64) Conv3DBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of 3-D convolution with respect to the filter.
+//
+// DEPRECATED at GraphDef version 10: Use Conv3DBackpropFilterV2
+//
+// Arguments:
+//	input: Shape `[batch, depth, rows, cols, in_channels]`.
+//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
+// `in_channels` must match between `input` and `filter`.
+//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
+// out_channels]`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+func Conv3DBackpropFilter(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropFilterAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Conv3DBackpropFilter",
+		Input: []tf.Input{
+			input, filter, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// QuantizedInstanceNormAttr is an optional argument to QuantizedInstanceNorm.
+type QuantizedInstanceNormAttr func(optionalAttr)
+
+// QuantizedInstanceNormOutputRangeGiven sets the optional output_range_given attribute to value.
+//
+// value: If True, `given_y_min` and `given_y_min`
+// and `given_y_max` are used as the output range. Otherwise,
+// the implementation computes the output range.
+// If not specified, defaults to false
+func QuantizedInstanceNormOutputRangeGiven(value bool) QuantizedInstanceNormAttr {
+	return func(m optionalAttr) {
+		m["output_range_given"] = value
+	}
+}
+
+// QuantizedInstanceNormGivenYMin sets the optional given_y_min attribute to value.
+//
+// value: Output in `y_min` if `output_range_given` is True.
+// If not specified, defaults to 0
+func QuantizedInstanceNormGivenYMin(value float32) QuantizedInstanceNormAttr {
+	return func(m optionalAttr) {
+		m["given_y_min"] = value
+	}
+}
+
+// QuantizedInstanceNormGivenYMax sets the optional given_y_max attribute to value.
+//
+// value: Output in `y_max` if `output_range_given` is True.
+// If not specified, defaults to 0
+func QuantizedInstanceNormGivenYMax(value float32) QuantizedInstanceNormAttr {
+	return func(m optionalAttr) {
+		m["given_y_max"] = value
+	}
+}
+
+// QuantizedInstanceNormVarianceEpsilon sets the optional variance_epsilon attribute to value.
+//
+// value: A small float number to avoid dividing by 0.
+// If not specified, defaults to 1e-05
+func QuantizedInstanceNormVarianceEpsilon(value float32) QuantizedInstanceNormAttr {
+	return func(m optionalAttr) {
+		m["variance_epsilon"] = value
+	}
+}
+
+// QuantizedInstanceNormMinSeparation sets the optional min_separation attribute to value.
+//
+// value: Minimum value of `y_max - y_min`
+// If not specified, defaults to 0.001
+func QuantizedInstanceNormMinSeparation(value float32) QuantizedInstanceNormAttr {
+	return func(m optionalAttr) {
+		m["min_separation"] = value
+	}
+}
+
+// Quantized Instance normalization.
+//
+// Arguments:
+//	x: A 4D input Tensor.
+//	x_min: The value represented by the lowest quantized input.
+//	x_max: The value represented by the highest quantized input.
+//
+// Returns:
+//	y: A 4D Tensor.
+//	y_min: The value represented by the lowest quantized output.
+//	y_max: The value represented by the highest quantized output.
+func QuantizedInstanceNorm(scope *Scope, x tf.Output, x_min tf.Output, x_max tf.Output, optional ...QuantizedInstanceNormAttr) (y tf.Output, y_min tf.Output, y_max tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizedInstanceNorm",
+		Input: []tf.Input{
+			x, x_min, x_max,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// FusedBatchNormV3Attr is an optional argument to FusedBatchNormV3.
+type FusedBatchNormV3Attr func(optionalAttr)
+
+// FusedBatchNormV3Epsilon sets the optional epsilon attribute to value.
+//
+// value: A small float number added to the variance of x.
+// If not specified, defaults to 0.0001
+func FusedBatchNormV3Epsilon(value float32) FusedBatchNormV3Attr {
+	return func(m optionalAttr) {
+		m["epsilon"] = value
+	}
+}
+
+// FusedBatchNormV3DataFormat sets the optional data_format attribute to value.
+//
+// value: The data format for x and y. Either "NHWC" (default) or "NCHW".
+// If not specified, defaults to "NHWC"
+func FusedBatchNormV3DataFormat(value string) FusedBatchNormV3Attr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// FusedBatchNormV3IsTraining sets the optional is_training attribute to value.
+//
+// value: A bool value to indicate the operation is for training (default)
+// or inference.
+// If not specified, defaults to true
+func FusedBatchNormV3IsTraining(value bool) FusedBatchNormV3Attr {
+	return func(m optionalAttr) {
+		m["is_training"] = value
+	}
+}
+
+// Batch normalization.
+//
+// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
+// The size of 1D Tensors matches the dimension C of the 4D Tensors.
+//
+// Arguments:
+//	x: A 4D Tensor for input data.
+//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
+//	offset: A 1D Tensor for offset, to shift to the normalized x.
+//	mean: A 1D Tensor for population mean. Used for inference only;
+// must be empty for training.
+//	variance: A 1D Tensor for population variance. Used for inference only;
+// must be empty for training.
+//
+// Returns:
+//	y: A 4D Tensor for output data.
+//	batch_mean: A 1D Tensor for the computed batch mean, to be used by TensorFlow
+// to compute the running mean.
+//	batch_variance: A 1D Tensor for the computed batch variance, to be used by
+// TensorFlow to compute the running variance.
+//	reserve_space_1: A 1D Tensor for the computed batch mean, to be reused
+// in the gradient computation.
+//	reserve_space_2: A 1D Tensor for the computed batch variance (inverted variance
+// in the cuDNN case), to be reused in the gradient computation.
+//	reserve_space_3: A 1D Tensor for some intermediate results, to be reused in the gradient
+// computation for better efficiency.
+func FusedBatchNormV3(scope *Scope, x tf.Output, scale tf.Output, offset tf.Output, mean tf.Output, variance tf.Output, optional ...FusedBatchNormV3Attr) (y tf.Output, batch_mean tf.Output, batch_variance tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, reserve_space_3 tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "FusedBatchNormV3",
+		Input: []tf.Input{
+			x, scale, offset, mean, variance,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5)
+}
+
+// Computes reciprocal of square root of x element-wise.
+//
+// I.e., \\(y = 1 / \sqrt{x}\\).
+func Rsqrt(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Rsqrt",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// The gradient operator for the SparseSlice op.
+//
+// This op takes in the upstream gradient w.r.t. non-empty values of
+// the sliced `SparseTensor`, and outputs the gradients w.r.t.
+// the non-empty values of input `SparseTensor`.
+//
+// Arguments:
+//	backprop_val_grad: 1-D. The gradient with respect to
+// the non-empty values of the sliced `SparseTensor`.
+//	input_indices: 2-D.  The `indices` of the input `SparseTensor`.
+//	input_start: 1-D. tensor represents the start of the slice.
+//	output_indices: 2-D.  The `indices` of the sliced `SparseTensor`.
+//
+// Returns 1-D. The gradient with respect to the non-empty values of input `SparseTensor`.
+func SparseSliceGrad(scope *Scope, backprop_val_grad tf.Output, input_indices tf.Output, input_start tf.Output, output_indices tf.Output) (val_grad tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSliceGrad",
+		Input: []tf.Input{
+			backprop_val_grad, input_indices, input_start, output_indices,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Says whether the targets are in the top `K` predictions.
+//
+// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the
+// prediction for the target class is among the top `k` predictions among
+// all predictions for example `i`. Note that the behavior of `InTopK` differs
+// from the `TopK` op in its handling of ties; if multiple classes have the
+// same prediction value and straddle the top-`k` boundary, all of those
+// classes are considered to be in the top `k`.
+//
+// More formally, let
+//
+//   \\(predictions_i\\) be the predictions for all classes for example `i`,
+//   \\(targets_i\\) be the target class for example `i`,
+//   \\(out_i\\) be the output for example `i`,
+//
+// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$
+//
+// Arguments:
+//	predictions: A `batch_size` x `classes` tensor.
+//	targets: A `batch_size` vector of class ids.
+//	k: Number of top elements to look at for computing precision.
+//
+// Returns Computed Precision at `k` as a `bool Tensor`.
+func InTopK(scope *Scope, predictions tf.Output, targets tf.Output, k int64) (precision tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"k": k}
+	opspec := tf.OpSpec{
+		Type: "InTopK",
+		Input: []tf.Input{
+			predictions, targets,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns x - y element-wise.
+//
+// *NOTE*: `Subtract` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Sub(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Sub",
 		Input: []tf.Input{
 			x, y,
 		},
@@ -34099,51 +33973,996 @@ func GreaterEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	return op.Output(0)
 }
 
-// StagePeekAttr is an optional argument to StagePeek.
-type StagePeekAttr func(optionalAttr)
+// FusedResizeAndPadConv2DAttr is an optional argument to FusedResizeAndPadConv2D.
+type FusedResizeAndPadConv2DAttr func(optionalAttr)
 
-// StagePeekCapacity sets the optional capacity attribute to value.
+// FusedResizeAndPadConv2DResizeAlignCorners sets the optional resize_align_corners attribute to value.
+//
+// value: If true, the centers of the 4 corner pixels of the input and output tensors are
+// aligned, preserving the values at the corner pixels. Defaults to false.
+// If not specified, defaults to false
+func FusedResizeAndPadConv2DResizeAlignCorners(value bool) FusedResizeAndPadConv2DAttr {
+	return func(m optionalAttr) {
+		m["resize_align_corners"] = value
+	}
+}
+
+// Performs a resize and padding as a preprocess during a convolution.
+//
+// It's often possible to do spatial transformations more efficiently as part of
+// the packing stage of a convolution, so this op allows for an optimized
+// implementation where these stages are fused together. This prevents the need to
+// write out the intermediate results as whole tensors, reducing memory pressure,
+// and we can get some latency gains by merging the transformation calculations.
+// The data_format attribute for Conv2D isn't supported by this op, and defaults to
+// 'NHWC' order.
+// Internally this op uses a single per-graph scratch buffer, which means that it
+// will block if multiple versions are being run in parallel. This is because this
+// operator is primarily an optimization to minimize memory usage.
+//
+// Arguments:
+//	input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
+//	size: A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+// new size for the images.
+//	paddings: A two-column matrix specifying the padding sizes. The number of
+// rows must be the same as the rank of `input`.
+//	filter: 4-D with shape
+// `[filter_height, filter_width, in_channels, out_channels]`.
+//
+//	strides: 1-D of length 4.  The stride of the sliding window for each dimension
+// of `input`. Must be in the same order as the dimension specified with format.
+//	padding: The type of padding algorithm to use.
+func FusedResizeAndPadConv2D(scope *Scope, input tf.Output, size tf.Output, paddings tf.Output, filter tf.Output, mode string, strides []int64, padding string, optional ...FusedResizeAndPadConv2DAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"mode": mode, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "FusedResizeAndPadConv2D",
+		Input: []tf.Input{
+			input, size, paddings, filter,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the truth value of x OR y element-wise.
+//
+// *NOTE*: `LogicalOr` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func LogicalOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "LogicalOr",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Adds `bias` to `value`.
+//
+// This is a deprecated version of BiasAdd and will be soon removed.
+//
+// This is a special case of `tf.add` where `bias` is restricted to be 1-D.
+// Broadcasting is supported, so `value` may have any number of dimensions.
+//
+// Arguments:
+//	value: Any number of dimensions.
+//	bias: 1-D with size the last dimension of `value`.
+//
+// Returns Broadcasted sum of `value` and `bias`.
+func BiasAddV1(scope *Scope, value tf.Output, bias tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BiasAddV1",
+		Input: []tf.Input{
+			value, bias,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Pads a tensor with mirrored values.
+//
+// This operation pads a `input` with mirrored values according to the `paddings`
+// you specify. `paddings` is an integer tensor with shape `[n, 2]`, where n is
+// the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
+// how many values to add before the contents of `input` in that dimension, and
+// `paddings[D, 1]` indicates how many values to add after the contents of `input`
+// in that dimension. Both `paddings[D, 0]` and `paddings[D, 1]` must be no greater
+// than `input.dim_size(D)` (or `input.dim_size(D) - 1`) if `copy_border` is true
+// (if false, respectively).
+//
+// The padded size of each dimension D of the output is:
+//
+// `paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
+//
+// For example:
+//
+// ```
+// # 't' is [[1, 2, 3], [4, 5, 6]].
+// # 'paddings' is [[1, 1]], [2, 2]].
+// # 'mode' is SYMMETRIC.
+// # rank of 't' is 2.
+// pad(t, paddings) ==> [[2, 1, 1, 2, 3, 3, 2]
+//                       [2, 1, 1, 2, 3, 3, 2]
+//                       [5, 4, 4, 5, 6, 6, 5]
+//                       [5, 4, 4, 5, 6, 6, 5]]
+// ```
+//
+// Arguments:
+//	input: The input tensor to be padded.
+//	paddings: A two-column matrix specifying the padding sizes. The number of
+// rows must be the same as the rank of `input`.
+//	mode: Either `REFLECT` or `SYMMETRIC`. In reflect mode the padded regions
+// do not include the borders, while in symmetric mode the padded regions
+// do include the borders. For example, if `input` is `[1, 2, 3]` and `paddings`
+// is `[0, 2]`, then the output is `[1, 2, 3, 2, 1]` in reflect mode, and
+// it is `[1, 2, 3, 3, 2]` in symmetric mode.
+//
+// Returns The padded tensor.
+func MirrorPad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"mode": mode}
+	opspec := tf.OpSpec{
+		Type: "MirrorPad",
+		Input: []tf.Input{
+			input, paddings,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// TensorArrayV3Attr is an optional argument to TensorArrayV3.
+type TensorArrayV3Attr func(optionalAttr)
+
+// TensorArrayV3ElementShape sets the optional element_shape attribute to value.
+//
+// value: The expected shape of an element, if known. Used to
+// validate the shapes of TensorArray elements. If this shape is not
+// fully specified, gathering zero-size TensorArrays is an error.
+// If not specified, defaults to {unknown_rank:true}
+func TensorArrayV3ElementShape(value tf.Shape) TensorArrayV3Attr {
+	return func(m optionalAttr) {
+		m["element_shape"] = value
+	}
+}
+
+// TensorArrayV3DynamicSize sets the optional dynamic_size attribute to value.
+//
+// value: A boolean that determines whether writes to the TensorArray
+// are allowed to grow the size.  By default, this is not allowed.
+// If not specified, defaults to false
+func TensorArrayV3DynamicSize(value bool) TensorArrayV3Attr {
+	return func(m optionalAttr) {
+		m["dynamic_size"] = value
+	}
+}
+
+// TensorArrayV3ClearAfterRead sets the optional clear_after_read attribute to value.
+//
+// value: If true (default), Tensors in the TensorArray are cleared
+// after being read.  This disables multiple read semantics but allows early
+// release of memory.
+// If not specified, defaults to true
+func TensorArrayV3ClearAfterRead(value bool) TensorArrayV3Attr {
+	return func(m optionalAttr) {
+		m["clear_after_read"] = value
+	}
+}
+
+// TensorArrayV3IdenticalElementShapes sets the optional identical_element_shapes attribute to value.
+//
+// value: If true (default is false), then all
+// elements in the TensorArray will be expected to have have identical shapes.
+// This allows certain behaviors, like dynamically checking for
+// consistent shapes on write, and being able to fill in properly
+// shaped zero tensors on stack -- even if the element_shape attribute
+// is not fully defined.
+// If not specified, defaults to false
+func TensorArrayV3IdenticalElementShapes(value bool) TensorArrayV3Attr {
+	return func(m optionalAttr) {
+		m["identical_element_shapes"] = value
+	}
+}
+
+// TensorArrayV3TensorArrayName sets the optional tensor_array_name attribute to value.
+//
+// value: Overrides the name used for the temporary tensor_array
+// resource. Default value is the name of the 'TensorArray' op (which
+// is guaranteed unique).
+// If not specified, defaults to ""
+func TensorArrayV3TensorArrayName(value string) TensorArrayV3Attr {
+	return func(m optionalAttr) {
+		m["tensor_array_name"] = value
+	}
+}
+
+// An array of Tensors of given size.
+//
+// Write data via Write and read via Read or Pack.
+//
+// Arguments:
+//	size: The size of the array.
+//	dtype: The type of the elements on the tensor_array.
+//
+// Returns:
+//	handle: The handle to the TensorArray.
+//	flow: A scalar used to control gradient flow.
+func TensorArrayV3(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV3Attr) (handle tf.Output, flow tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayV3",
+		Input: []tf.Input{
+			size,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// MatrixSolveLsAttr is an optional argument to MatrixSolveLs.
+type MatrixSolveLsAttr func(optionalAttr)
+
+// MatrixSolveLsFast sets the optional fast attribute to value.
+// If not specified, defaults to true
+func MatrixSolveLsFast(value bool) MatrixSolveLsAttr {
+	return func(m optionalAttr) {
+		m["fast"] = value
+	}
+}
+
+// Solves one or more linear least-squares problems.
+//
+// `matrix` is a tensor of shape `[..., M, N]` whose inner-most 2 dimensions
+// form real or complex matrices of size `[M, N]`. `Rhs` is a tensor of the same
+// type as `matrix` and shape `[..., M, K]`.
+// The output is a tensor shape `[..., N, K]` where each output matrix solves
+// each of the equations
+// `matrix[..., :, :]` * `output[..., :, :]` = `rhs[..., :, :]`
+// in the least squares sense.
+//
+// We use the following notation for (complex) matrix and right-hand sides
+// in the batch:
+//
+// `matrix`=\\(A \in \mathbb{C}^{m \times n}\\),
+// `rhs`=\\(B  \in \mathbb{C}^{m \times k}\\),
+// `output`=\\(X  \in \mathbb{C}^{n \times k}\\),
+// `l2_regularizer`=\\(\lambda \in \mathbb{R}\\).
+//
+// If `fast` is `True`, then the solution is computed by solving the normal
+// equations using Cholesky decomposition. Specifically, if \\(m \ge n\\) then
+// \\(X = (A^H A + \lambda I)^{-1} A^H B\\), which solves the least-squares
+// problem \\(X = \mathrm{argmin}_{Z \in \Re^{n \times k} } ||A Z - B||_F^2 + \lambda ||Z||_F^2\\).
+// If \\(m \lt n\\) then `output` is computed as
+// \\(X = A^H (A A^H + \lambda I)^{-1} B\\), which (for \\(\lambda = 0\\)) is the
+// minimum-norm solution to the under-determined linear system, i.e.
+// \\(X = \mathrm{argmin}_{Z \in \mathbb{C}^{n \times k} } ||Z||_F^2 \\),
+// subject to \\(A Z = B\\). Notice that the fast path is only numerically stable
+// when \\(A\\) is numerically full rank and has a condition number
+// \\(\mathrm{cond}(A) \lt \frac{1}{\sqrt{\epsilon_{mach} } }\\) or \\(\lambda\\) is
+// sufficiently large.
+//
+// If `fast` is `False` an algorithm based on the numerically robust complete
+// orthogonal decomposition is used. This computes the minimum-norm
+// least-squares solution, even when \\(A\\) is rank deficient. This path is
+// typically 6-7 times slower than the fast path. If `fast` is `False` then
+// `l2_regularizer` is ignored.
+//
+// Arguments:
+//	matrix: Shape is `[..., M, N]`.
+//	rhs: Shape is `[..., M, K]`.
+//	l2_regularizer: Scalar tensor.
+//
+// @compatibility(numpy)
+// Equivalent to np.linalg.lstsq
+// @end_compatibility
+//
+// Returns Shape is `[..., N, K]`.
+func MatrixSolveLs(scope *Scope, matrix tf.Output, rhs tf.Output, l2_regularizer tf.Output, optional ...MatrixSolveLsAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixSolveLs",
+		Input: []tf.Input{
+			matrix, rhs, l2_regularizer,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Generates sparse cross from a list of sparse and dense tensors.
+//
+// The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each
+// representing features of one feature column. It outputs a 2D `SparseTensor` with
+// the batchwise crosses of these features.
+//
+// For example, if the inputs are
+//
+//     inputs[0]: SparseTensor with shape = [2, 2]
+//     [0, 0]: "a"
+//     [1, 0]: "b"
+//     [1, 1]: "c"
+//
+//     inputs[1]: SparseTensor with shape = [2, 1]
+//     [0, 0]: "d"
+//     [1, 0]: "e"
+//
+//     inputs[2]: Tensor [["f"], ["g"]]
+//
+// then the output will be
+//
+//     shape = [2, 2]
+//     [0, 0]: "a_X_d_X_f"
+//     [1, 0]: "b_X_e_X_g"
+//     [1, 1]: "c_X_e_X_g"
+//
+// if hashed_output=true then the output will be
+//
+//     shape = [2, 2]
+//     [0, 0]: FingerprintCat64(
+//                 Fingerprint64("f"), FingerprintCat64(
+//                     Fingerprint64("d"), Fingerprint64("a")))
+//     [1, 0]: FingerprintCat64(
+//                 Fingerprint64("g"), FingerprintCat64(
+//                     Fingerprint64("e"), Fingerprint64("b")))
+//     [1, 1]: FingerprintCat64(
+//                 Fingerprint64("g"), FingerprintCat64(
+//                     Fingerprint64("e"), Fingerprint64("c")))
+//
+// Arguments:
+//	indices: 2-D.  Indices of each input `SparseTensor`.
+//	values: 1-D.   values of each `SparseTensor`.
+//	shapes: 1-D.   Shapes of each `SparseTensor`.
+//	dense_inputs: 2-D.    Columns represented by dense `Tensor`.
+//	hashed_output: If true, returns the hash of the cross instead of the string.
+// This will allow us avoiding string manipulations.
+//	num_buckets: It is used if hashed_output is true.
+// output = hashed_value%num_buckets if num_buckets > 0 else hashed_value.
+//	hash_key: Specify the hash_key that will be used by the `FingerprintCat64`
+// function to combine the crosses fingerprints.
+//
+//
+//
+// Returns:
+//	output_indices: 2-D.  Indices of the concatenated `SparseTensor`.
+//	output_values: 1-D.  Non-empty values of the concatenated or hashed
+// `SparseTensor`.
+//	output_shape: 1-D.  Shape of the concatenated `SparseTensor`.
+func SparseCross(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, dense_inputs []tf.Output, hashed_output bool, num_buckets int64, hash_key int64, out_type tf.DataType, internal_type tf.DataType) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"hashed_output": hashed_output, "num_buckets": num_buckets, "hash_key": hash_key, "out_type": out_type, "internal_type": internal_type}
+	opspec := tf.OpSpec{
+		Type: "SparseCross",
+		Input: []tf.Input{
+			tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes), tf.OutputList(dense_inputs),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Generate a glob pattern matching all sharded file names.
+func ShardedFilespec(scope *Scope, basename tf.Output, num_shards tf.Output) (filename tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ShardedFilespec",
+		Input: []tf.Input{
+			basename, num_shards,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Inverse 2D fast Fourier transform.
+//
+// Computes the inverse 2-dimensional discrete Fourier transform over the
+// inner-most 2 dimensions of `input`.
+//
+// Arguments:
+//	input: A complex tensor.
+//
+// Returns A complex tensor of the same shape as `input`. The inner-most 2
+//   dimensions of `input` are replaced with their inverse 2D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.ifft2
+// @end_compatibility
+func IFFT2D(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "IFFT2D",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingProximalAdagradParametersAttr is an optional argument to RetrieveTPUEmbeddingProximalAdagradParameters.
+type RetrieveTPUEmbeddingProximalAdagradParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingProximalAdagradParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingProximalAdagradParametersTableId(value int64) RetrieveTPUEmbeddingProximalAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingProximalAdagradParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingProximalAdagradParametersTableName(value string) RetrieveTPUEmbeddingProximalAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingProximalAdagradParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingProximalAdagradParametersConfig(value string) RetrieveTPUEmbeddingProximalAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve proximal Adagrad embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the proximal Adagrad optimization algorithm.
+//	accumulators: Parameter accumulators updated by the proximal Adagrad optimization algorithm.
+func RetrieveTPUEmbeddingProximalAdagradParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingProximalAdagradParametersAttr) (parameters tf.Output, accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingProximalAdagradParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// ReduceJoinAttr is an optional argument to ReduceJoin.
+type ReduceJoinAttr func(optionalAttr)
+
+// ReduceJoinKeepDims sets the optional keep_dims attribute to value.
+//
+// value: If `True`, retain reduced dimensions with length `1`.
+// If not specified, defaults to false
+func ReduceJoinKeepDims(value bool) ReduceJoinAttr {
+	return func(m optionalAttr) {
+		m["keep_dims"] = value
+	}
+}
+
+// ReduceJoinSeparator sets the optional separator attribute to value.
+//
+// value: The separator to use when joining.
+// If not specified, defaults to ""
+func ReduceJoinSeparator(value string) ReduceJoinAttr {
+	return func(m optionalAttr) {
+		m["separator"] = value
+	}
+}
+
+// Joins a string Tensor across the given dimensions.
+//
+// Computes the string join across dimensions in the given string Tensor of shape
+// `[\\(d_0, d_1, ..., d_{n-1}\\)]`.  Returns a new Tensor created by joining the input
+// strings with the given separator (default: empty string).  Negative indices are
+// counted backwards from the end, with `-1` being equivalent to `n - 1`.  If
+// indices are not specified, joins across all dimensions beginning from `n - 1`
+// through `0`.
+//
+// For example:
+//
+// ```python
+// # tensor `a` is [["a", "b"], ["c", "d"]]
+// tf.reduce_join(a, 0) ==> ["ac", "bd"]
+// tf.reduce_join(a, 1) ==> ["ab", "cd"]
+// tf.reduce_join(a, -2) = tf.reduce_join(a, 0) ==> ["ac", "bd"]
+// tf.reduce_join(a, -1) = tf.reduce_join(a, 1) ==> ["ab", "cd"]
+// tf.reduce_join(a, 0, keep_dims=True) ==> [["ac", "bd"]]
+// tf.reduce_join(a, 1, keep_dims=True) ==> [["ab"], ["cd"]]
+// tf.reduce_join(a, 0, separator=".") ==> ["a.c", "b.d"]
+// tf.reduce_join(a, [0, 1]) ==> "acbd"
+// tf.reduce_join(a, [1, 0]) ==> "abcd"
+// tf.reduce_join(a, []) ==> [["a", "b"], ["c", "d"]]
+// tf.reduce_join(a) = tf.reduce_join(a, [1, 0]) ==> "abcd"
+// ```
+//
+// Arguments:
+//	inputs: The input to be joined.  All reduced indices must have non-zero size.
+//	reduction_indices: The dimensions to reduce over.  Dimensions are reduced in the
+// order specified.  Omitting `reduction_indices` is equivalent to passing
+// `[n-1, n-2, ..., 0]`.  Negative indices from `-n` to `-1` are supported.
+//
+// Returns Has shape equal to that of the input with reduced dimensions removed or
+// set to `1` depending on `keep_dims`.
+func ReduceJoin(scope *Scope, inputs tf.Output, reduction_indices tf.Output, optional ...ReduceJoinAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ReduceJoin",
+		Input: []tf.Input{
+			inputs, reduction_indices,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Concatenates a list of `SparseTensor` along the specified dimension.
+//
+// Concatenation is with respect to the dense versions of these sparse tensors.
+// It is assumed that each input is a `SparseTensor` whose elements are ordered
+// along increasing dimension number.
+//
+// All inputs' shapes must match, except for the concat dimension.  The
+// `indices`, `values`, and `shapes` lists must have the same length.
+//
+// The output shape is identical to the inputs', except along the concat
+// dimension, where it is the sum of the inputs' sizes along that dimension.
+//
+// The output elements will be resorted to preserve the sort order along
+// increasing dimension number.
+//
+// This op runs in `O(M log M)` time, where `M` is the total number of non-empty
+// values across all inputs. This is due to the need for an internal sort in
+// order to concatenate efficiently across an arbitrary dimension.
+//
+// For example, if `concat_dim = 1` and the inputs are
+//
+//     sp_inputs[0]: shape = [2, 3]
+//     [0, 2]: "a"
+//     [1, 0]: "b"
+//     [1, 1]: "c"
+//
+//     sp_inputs[1]: shape = [2, 4]
+//     [0, 1]: "d"
+//     [0, 2]: "e"
+//
+// then the output will be
+//
+//     shape = [2, 7]
+//     [0, 2]: "a"
+//     [0, 4]: "d"
+//     [0, 5]: "e"
+//     [1, 0]: "b"
+//     [1, 1]: "c"
+//
+// Graphically this is equivalent to doing
+//
+//     [    a] concat [  d e  ] = [    a   d e  ]
+//     [b c  ]        [       ]   [b c          ]
+//
+// Arguments:
+//	indices: 2-D.  Indices of each input `SparseTensor`.
+//	values: 1-D.  Non-empty values of each `SparseTensor`.
+//	shapes: 1-D.  Shapes of each `SparseTensor`.
+//	concat_dim: Dimension to concatenate along. Must be in range [-rank, rank),
+// where rank is the number of dimensions in each input `SparseTensor`.
+//
+// Returns:
+//	output_indices: 2-D.  Indices of the concatenated `SparseTensor`.
+//	output_values: 1-D.  Non-empty values of the concatenated `SparseTensor`.
+//	output_shape: 1-D.  Shape of the concatenated `SparseTensor`.
+func SparseConcat(scope *Scope, indices []tf.Output, values []tf.Output, shapes []tf.Output, concat_dim int64) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"concat_dim": concat_dim}
+	opspec := tf.OpSpec{
+		Type: "SparseConcat",
+		Input: []tf.Input{
+			tf.OutputList(indices), tf.OutputList(values), tf.OutputList(shapes),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// DestroyResourceOpAttr is an optional argument to DestroyResourceOp.
+type DestroyResourceOpAttr func(optionalAttr)
+
+// DestroyResourceOpIgnoreLookupError sets the optional ignore_lookup_error attribute to value.
+//
+// value: whether to ignore the error when the resource
+// doesn't exist.
+// If not specified, defaults to true
+func DestroyResourceOpIgnoreLookupError(value bool) DestroyResourceOpAttr {
+	return func(m optionalAttr) {
+		m["ignore_lookup_error"] = value
+	}
+}
+
+// Deletes the resource specified by the handle.
+//
+// All subsequent operations using the resource will result in a NotFound
+// error status.
+//
+// Arguments:
+//	resource: handle to the resource to delete.
+//
+// Returns the created operation.
+func DestroyResourceOp(scope *Scope, resource tf.Output, optional ...DestroyResourceOpAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DestroyResourceOp",
+		Input: []tf.Input{
+			resource,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// HistogramFixedWidthAttr is an optional argument to HistogramFixedWidth.
+type HistogramFixedWidthAttr func(optionalAttr)
+
+// HistogramFixedWidthDtype sets the optional dtype attribute to value.
+// If not specified, defaults to DT_INT32
+func HistogramFixedWidthDtype(value tf.DataType) HistogramFixedWidthAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Return histogram of values.
+//
+// Given the tensor `values`, this operation returns a rank 1 histogram counting
+// the number of entries in `values` that fall into every bin.  The bins are
+// equal width and determined by the arguments `value_range` and `nbins`.
+//
+// ```python
+// # Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
+// nbins = 5
+// value_range = [0.0, 5.0]
+// new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]
+//
+// with tf.get_default_session() as sess:
+//   hist = tf.histogram_fixed_width(new_values, value_range, nbins=5)
+//   variables.global_variables_initializer().run()
+//   sess.run(hist) => [2, 1, 1, 0, 2]
+// ```
+//
+// Arguments:
+//	values: Numeric `Tensor`.
+//	value_range: Shape [2] `Tensor` of same `dtype` as `values`.
+// values <= value_range[0] will be mapped to hist[0],
+// values >= value_range[1] will be mapped to hist[-1].
+//	nbins: Scalar `int32 Tensor`.  Number of histogram bins.
+//
+// Returns A 1-D `Tensor` holding histogram of values.
+func HistogramFixedWidth(scope *Scope, values tf.Output, value_range tf.Output, nbins tf.Output, optional ...HistogramFixedWidthAttr) (out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "HistogramFixedWidth",
+		Input: []tf.Input{
+			values, value_range, nbins,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that uses a custom thread pool to compute `input_dataset`.
+//
+// Arguments:
+//
+//	thread_pool: A resource produced by the ThreadPoolHandle op.
+//
+//
+func ThreadPoolDataset(scope *Scope, input_dataset tf.Output, thread_pool tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ThreadPoolDataset",
+		Input: []tf.Input{
+			input_dataset, thread_pool,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Bitcasts a tensor from one type to another without copying data.
+//
+// Given a tensor `input`, this operation returns a tensor that has the same buffer
+// data as `input` with datatype `type`.
+//
+// If the input datatype `T` is larger than the output datatype `type` then the
+// shape changes from [...] to [..., sizeof(`T`)/sizeof(`type`)].
+//
+// If `T` is smaller than `type`, the operator requires that the rightmost
+// dimension be equal to sizeof(`type`)/sizeof(`T`). The shape then goes from
+// [..., sizeof(`type`)/sizeof(`T`)] to [...].
+//
+// tf.bitcast() and tf.cast() work differently when real dtype is casted as a complex dtype
+// (e.g. tf.complex64 or tf.complex128) as tf.cast() make imaginary part 0 while tf.bitcast()
+// gives module error.
+// For example,
+//
+// Example 1:
+//
+// >>> a = [1., 2., 3.]
+// >>> equality_bitcast = tf.bitcast(a, tf.complex128)
+// Traceback (most recent call last):
+// ...
+// InvalidArgumentError: Cannot bitcast from 1 to 18 [Op:Bitcast]
+// >>> equality_cast = tf.cast(a, tf.complex128)
+// >>> print(equality_cast)
+// tf.Tensor([1.+0.j 2.+0.j 3.+0.j], shape=(3,), dtype=complex128)
+//
+// Example 2:
+//
+// >>> tf.bitcast(tf.constant(0xffffffff, dtype=tf.uint32), tf.uint8)
+// <tf.Tensor: shape=(4,), dtype=uint8, numpy=array([255, 255, 255, 255], dtype=uint8)>
+//
+// Example 3:
+//
+// >>> x = [1., 2., 3.]
+// >>> y = [0., 2., 3.]
+// >>> equality= tf.equal(x,y)
+// >>> equality_cast = tf.cast(equality,tf.float32)
+// >>> equality_bitcast = tf.bitcast(equality_cast,tf.uint8)
+// >>> print(equality)
+// tf.Tensor([False True True], shape=(3,), dtype=bool)
+// >>> print(equality_cast)
+// tf.Tensor([0. 1. 1.], shape=(3,), dtype=float32)
+// >>> print(equality_bitcast)
+// tf.Tensor(
+//     [[  0   0   0   0]
+//      [  0   0 128  63]
+//      [  0   0 128  63]], shape=(3, 4), dtype=uint8)
+//
+// *NOTE*: Bitcast is implemented as a low-level cast, so machines with different
+// endian orderings will give different results.
+func Bitcast(scope *Scope, input tf.Output, type_ tf.DataType) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"type": type_}
+	opspec := tf.OpSpec{
+		Type: "Bitcast",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyAdagradDAAttr is an optional argument to ResourceApplyAdagradDA.
+type ResourceApplyAdagradDAAttr func(optionalAttr)
+
+// ResourceApplyAdagradDAUseLocking sets the optional use_locking attribute to value.
+//
+// value: If True, updating of the var and accum tensors will be protected by
+// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceApplyAdagradDAUseLocking(value bool) ResourceApplyAdagradDAAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the proximal adagrad scheme.
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	gradient_accumulator: Should be from a Variable().
+//	gradient_squared_accumulator: Should be from a Variable().
+//	grad: The gradient.
+//	lr: Scaling factor. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	global_step: Training step number. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceApplyAdagradDAAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyAdagradDA",
+		Input: []tf.Input{
+			var_, gradient_accumulator, gradient_squared_accumulator, grad, lr, l1, l2, global_step,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// SparseToDenseAttr is an optional argument to SparseToDense.
+type SparseToDenseAttr func(optionalAttr)
+
+// SparseToDenseValidateIndices sets the optional validate_indices attribute to value.
+//
+// value: If true, indices are checked to make sure they are sorted in
+// lexicographic order and that there are no repeats.
+// If not specified, defaults to true
+func SparseToDenseValidateIndices(value bool) SparseToDenseAttr {
+	return func(m optionalAttr) {
+		m["validate_indices"] = value
+	}
+}
+
+// Converts a sparse representation into a dense tensor.
+//
+// Builds an array `dense` with shape `output_shape` such that
+//
+// ```
+// # If sparse_indices is scalar
+// dense[i] = (i == sparse_indices ? sparse_values : default_value)
+//
+// # If sparse_indices is a vector, then for each i
+// dense[sparse_indices[i]] = sparse_values[i]
+//
+// # If sparse_indices is an n by d matrix, then for each i in [0, n)
+// dense[sparse_indices[i][0], ..., sparse_indices[i][d-1]] = sparse_values[i]
+// ```
+//
+// All other values in `dense` are set to `default_value`.  If `sparse_values` is a
+// scalar, all sparse indices are set to this single value.
+//
+// Indices should be sorted in lexicographic order, and indices must not
+// contain any repeats. If `validate_indices` is true, these properties
+// are checked during execution.
+//
+// Arguments:
+//	sparse_indices: 0-D, 1-D, or 2-D.  `sparse_indices[i]` contains the complete
+// index where `sparse_values[i]` will be placed.
+//	output_shape: 1-D.  Shape of the dense output tensor.
+//	sparse_values: 1-D.  Values corresponding to each row of `sparse_indices`,
+// or a scalar value to be used for all sparse indices.
+//	default_value: Scalar value to set for indices not specified in
+// `sparse_indices`.
+//
+// Returns Dense output tensor of shape `output_shape`.
+func SparseToDense(scope *Scope, sparse_indices tf.Output, output_shape tf.Output, sparse_values tf.Output, default_value tf.Output, optional ...SparseToDenseAttr) (dense tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseToDense",
+		Input: []tf.Input{
+			sparse_indices, output_shape, sparse_values, default_value,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// OrderedMapClearAttr is an optional argument to OrderedMapClear.
+type OrderedMapClearAttr func(optionalAttr)
+
+// OrderedMapClearCapacity sets the optional capacity attribute to value.
 // If not specified, defaults to 0
 //
 // REQUIRES: value >= 0
-func StagePeekCapacity(value int64) StagePeekAttr {
+func OrderedMapClearCapacity(value int64) OrderedMapClearAttr {
 	return func(m optionalAttr) {
 		m["capacity"] = value
 	}
 }
 
-// StagePeekMemoryLimit sets the optional memory_limit attribute to value.
+// OrderedMapClearMemoryLimit sets the optional memory_limit attribute to value.
 // If not specified, defaults to 0
 //
 // REQUIRES: value >= 0
-func StagePeekMemoryLimit(value int64) StagePeekAttr {
+func OrderedMapClearMemoryLimit(value int64) OrderedMapClearAttr {
 	return func(m optionalAttr) {
 		m["memory_limit"] = value
 	}
 }
 
-// StagePeekContainer sets the optional container attribute to value.
+// OrderedMapClearContainer sets the optional container attribute to value.
 // If not specified, defaults to ""
-func StagePeekContainer(value string) StagePeekAttr {
+func OrderedMapClearContainer(value string) OrderedMapClearAttr {
 	return func(m optionalAttr) {
 		m["container"] = value
 	}
 }
 
-// StagePeekSharedName sets the optional shared_name attribute to value.
+// OrderedMapClearSharedName sets the optional shared_name attribute to value.
 // If not specified, defaults to ""
-func StagePeekSharedName(value string) StagePeekAttr {
+func OrderedMapClearSharedName(value string) OrderedMapClearAttr {
 	return func(m optionalAttr) {
 		m["shared_name"] = value
 	}
 }
 
-// Op peeks at the values at the specified index.  If the
+// Op removes all elements in the underlying container.
 //
-// underlying container does not contain sufficient elements
-// this op will block until it does.   This Op is optimized for
-// performance.
-func StagePeek(scope *Scope, index tf.Output, dtypes []tf.DataType, optional ...StagePeekAttr) (values []tf.Output) {
+// Returns the created operation.
+func OrderedMapClear(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapClearAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -34152,67 +34971,52 @@ func StagePeek(scope *Scope, index tf.Output, dtypes []tf.DataType, optional ...
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "StagePeek",
-		Input: []tf.Input{
-			index,
-		},
+		Type: "OrderedMapClear",
+
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
-		scope.UpdateErr("StagePeek", err)
-		return
-	}
-	return values
+	return scope.AddOperation(opspec)
 }
 
-// Returns the truth value of (x != y) element-wise.
+// MaxPoolAttr is an optional argument to MaxPool.
+type MaxPoolAttr func(optionalAttr)
+
+// MaxPoolDataFormat sets the optional data_format attribute to value.
 //
-// *NOTE*: `NotEqual` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func NotEqual(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "NotEqual",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ApproximateEqualAttr is an optional argument to ApproximateEqual.
-type ApproximateEqualAttr func(optionalAttr)
-
-// ApproximateEqualTolerance sets the optional tolerance attribute to value.
-// If not specified, defaults to 1e-05
-func ApproximateEqualTolerance(value float32) ApproximateEqualAttr {
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func MaxPoolDataFormat(value string) MaxPoolAttr {
 	return func(m optionalAttr) {
-		m["tolerance"] = value
+		m["data_format"] = value
 	}
 }
 
-// Returns the truth value of abs(x-y) < tolerance element-wise.
-func ApproximateEqual(scope *Scope, x tf.Output, y tf.Output, optional ...ApproximateEqualAttr) (z tf.Output) {
+// Performs max pooling on the input.
+//
+// Arguments:
+//	input: 4-D input to pool over.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns The max pooled output tensor.
+func MaxPool(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ApproximateEqual",
+		Type: "MaxPool",
 		Input: []tf.Input{
-			x, y,
+			input,
 		},
 		Attrs: attrs,
 	}
@@ -34285,7 +35089,14 @@ func BlockLSTMUsePeephole(value bool) BlockLSTMAttr {
 //	wco: The weight matrix for output gate peephole connection.
 //	b: The bias vector.
 //
-// Returns The input gate over the whole time sequence.The cell state before the tanh over the whole time sequence.The forget gate over the whole time sequence.The output gate over the whole time sequence.The cell input over the whole time sequence.The cell after the tanh over the whole time sequence.The output h vector over the whole time sequence.
+// Returns:
+//	i: The input gate over the whole time sequence.
+//	cs: The cell state before the tanh over the whole time sequence.
+//	f: The forget gate over the whole time sequence.
+//	o: The output gate over the whole time sequence.
+//	ci: The cell input over the whole time sequence.
+//	co: The cell after the tanh over the whole time sequence.
+//	h: The output h vector over the whole time sequence.
 func BlockLSTM(scope *Scope, seq_len_max tf.Output, x tf.Output, cs_prev tf.Output, h_prev tf.Output, w tf.Output, wci tf.Output, wcf tf.Output, wco tf.Output, b tf.Output, optional ...BlockLSTMAttr) (i tf.Output, cs tf.Output, f tf.Output, o tf.Output, ci tf.Output, co tf.Output, h tf.Output) {
 	if scope.Err() != nil {
 		return
@@ -34305,1392 +35116,123 @@ func BlockLSTM(scope *Scope, seq_len_max tf.Output, x tf.Output, cs_prev tf.Outp
 	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6)
 }
 
-// Computes the Cholesky decomposition of one or more square matrices.
+// Computes the GRU cell forward propagation for 1 time step.
 //
-// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-// form square matrices.
+// Args
+//     x: Input to the GRU cell.
+//     h_prev: State input from the previous GRU cell.
+//     w_ru: Weight matrix for the reset and update gate.
+//     w_c: Weight matrix for the cell connection gate.
+//     b_ru: Bias vector for the reset and update gate.
+//     b_c: Bias vector for the cell connection gate.
 //
-// The input has to be symmetric and positive definite. Only the lower-triangular
-// part of the input will be used for this operation. The upper-triangular part
-// will not be read.
+// Returns
+//     r: Output of the reset gate.
+//     u: Output of the update gate.
+//     c: Output of the cell connection gate.
+//     h: Current state of the GRU cell.
 //
-// The output is a tensor of the same shape as the input
-// containing the Cholesky decompositions for all input submatrices `[..., :, :]`.
+// Note on notation of the variables:
 //
-// **Note**: The gradient computation on GPU is faster for large matrices but
-// not for large batch dimensions when the submatrices are small. In this
-// case it might be faster to use the CPU.
+// Concatenation of a and b is represented by a_b
+// Element-wise dot product of a and b is represented by ab
+// Element-wise dot product is represented by \circ
+// Matrix multiplication is represented by *
 //
-// Arguments:
-//	input: Shape is `[..., M, M]`.
+// Biases are initialized with :
+// `b_ru` - constant_initializer(1.0)
+// `b_c` - constant_initializer(0.0)
 //
-// Returns Shape is `[..., M, M]`.
-func Cholesky(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Cholesky",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// NonDeterministicIntsAttr is an optional argument to NonDeterministicInts.
-type NonDeterministicIntsAttr func(optionalAttr)
-
-// NonDeterministicIntsDtype sets the optional dtype attribute to value.
-//
-// value: The type of the output.
-// If not specified, defaults to DT_INT64
-func NonDeterministicIntsDtype(value tf.DataType) NonDeterministicIntsAttr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Non-deterministically generates some integers.
-//
-// This op may use some OS-provided source of non-determinism (e.g. an RNG), so each execution will give different results.
-//
-// Arguments:
-//	shape: The shape of the output tensor.
-//
-// Returns Non-deterministic integer values with specified shape.
-func NonDeterministicInts(scope *Scope, shape tf.Output, optional ...NonDeterministicIntsAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "NonDeterministicInts",
-		Input: []tf.Input{
-			shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the truth value of x AND y element-wise.
-//
-// *NOTE*: `LogicalAnd` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func LogicalAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "LogicalAnd",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the truth value of x OR y element-wise.
-//
-// *NOTE*: `LogicalOr` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func LogicalOr(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "LogicalOr",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Add all input tensors element wise.
-//
-//   Inputs must be of same size and shape.
-//
-//   ```python
-//   x = [9, 7, 10]
-//   tf.math.add_n(x) ==> 26
-//   ```
-func AddN(scope *Scope, inputs []tf.Output) (sum tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "AddN",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MatMulAttr is an optional argument to MatMul.
-type MatMulAttr func(optionalAttr)
-
-// MatMulTransposeA sets the optional transpose_a attribute to value.
-//
-// value: If true, "a" is transposed before multiplication.
-// If not specified, defaults to false
-func MatMulTransposeA(value bool) MatMulAttr {
-	return func(m optionalAttr) {
-		m["transpose_a"] = value
-	}
-}
-
-// MatMulTransposeB sets the optional transpose_b attribute to value.
-//
-// value: If true, "b" is transposed before multiplication.
-// If not specified, defaults to false
-func MatMulTransposeB(value bool) MatMulAttr {
-	return func(m optionalAttr) {
-		m["transpose_b"] = value
-	}
-}
-
-// Multiply the matrix "a" by the matrix "b".
-//
-// The inputs must be two-dimensional matrices and the inner dimension of
-// "a" (after being transposed if transpose_a is true) must match the
-// outer dimension of "b" (after being transposed if transposed_b is
-// true).
-//
-// *Note*: The default kernel implementation for MatMul on GPUs uses
-// cublas.
-func MatMul(scope *Scope, a tf.Output, b tf.Output, optional ...MatMulAttr) (product tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MatMul",
-		Input: []tf.Input{
-			a, b,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Elementwise computes the bitwise left-shift of `x` and `y`.
-//
-// If `y` is negative, or greater than or equal to the width of `x` in bits the
-// result is implementation defined.
-//
-// Example:
-//
-// ```python
-// import tensorflow as tf
-// from tensorflow.python.ops import bitwise_ops
-// import numpy as np
-// dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64]
-//
-// for dtype in dtype_list:
-//   lhs = tf.constant([-1, -5, -3, -14], dtype=dtype)
-//   rhs = tf.constant([5, 0, 7, 11], dtype=dtype)
-//
-//   left_shift_result = bitwise_ops.left_shift(lhs, rhs)
-//
-//   print(left_shift_result)
-//
-// # This will print:
-// # tf.Tensor([ -32   -5 -128    0], shape=(4,), dtype=int8)
-// # tf.Tensor([   -32     -5   -384 -28672], shape=(4,), dtype=int16)
-// # tf.Tensor([   -32     -5   -384 -28672], shape=(4,), dtype=int32)
-// # tf.Tensor([   -32     -5   -384 -28672], shape=(4,), dtype=int64)
-//
-// lhs = np.array([-2, 64, 101, 32], dtype=np.int8)
-// rhs = np.array([-1, -5, -3, -14], dtype=np.int8)
-// bitwise_ops.left_shift(lhs, rhs)
-// # <tf.Tensor: id=139, shape=(4,), dtype=int8, numpy=array([ -2,  64, 101,  32], dtype=int8)>
-// ```
-//
-func LeftShift(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "LeftShift",
-		Input: []tf.Input{
-			x, y,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ProdAttr is an optional argument to Prod.
-type ProdAttr func(optionalAttr)
-
-// ProdKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func ProdKeepDims(value bool) ProdAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
-	}
-}
-
-// Computes the product of elements across dimensions of a tensor.
-//
-// Reduces `input` along the dimensions given in `axis`. Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `axis`. If `keep_dims` is true, the reduced dimensions are
-// retained with length 1.
-//
-// Arguments:
-//	input: The tensor to reduce.
-//	axis: The dimensions to reduce. Must be in the range
-// `[-rank(input), rank(input))`.
-//
-// Returns The reduced tensor.
-func Prod(scope *Scope, input tf.Output, axis tf.Output, optional ...ProdAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Prod",
-		Input: []tf.Input{
-			input, axis,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MinAttr is an optional argument to Min.
-type MinAttr func(optionalAttr)
-
-// MinKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func MinKeepDims(value bool) MinAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
-	}
-}
-
-// Computes the minimum of elements across dimensions of a tensor.
-//
-// Reduces `input` along the dimensions given in `axis`. Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `axis`. If `keep_dims` is true, the reduced dimensions are
-// retained with length 1.
-//
-// Arguments:
-//	input: The tensor to reduce.
-//	axis: The dimensions to reduce. Must be in the range
-// `[-rank(input), rank(input))`.
-//
-// Returns The reduced tensor.
-func Min(scope *Scope, input tf.Output, axis tf.Output, optional ...MinAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Min",
-		Input: []tf.Input{
-			input, axis,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ArgMinAttr is an optional argument to ArgMin.
-type ArgMinAttr func(optionalAttr)
-
-// ArgMinOutputType sets the optional output_type attribute to value.
-// If not specified, defaults to DT_INT64
-func ArgMinOutputType(value tf.DataType) ArgMinAttr {
-	return func(m optionalAttr) {
-		m["output_type"] = value
-	}
-}
-
-// Returns the index with the smallest value across dimensions of a tensor.
-//
-// Note that in case of ties the identity of the return value is not guaranteed.
-//
-// Usage:
-//   ```python
-//   import tensorflow as tf
-//   a = [1, 10, 26.9, 2.8, 166.32, 62.3]
-//   b = tf.math.argmin(input = a)
-//   c = tf.keras.backend.eval(b)
-//   # c = 0
-//   # here a[0] = 1 which is the smallest element of a across axis 0
-//   ```
-//
-// Arguments:
-//
-//	dimension: int32 or int64, must be in the range `[-rank(input), rank(input))`.
-// Describes which dimension of the input Tensor to reduce across. For vectors,
-// use dimension = 0.
-func ArgMin(scope *Scope, input tf.Output, dimension tf.Output, optional ...ArgMinAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ArgMin",
-		Input: []tf.Input{
-			input, dimension,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Records the bytes size of each element of `input_dataset` in a StatsAggregator.
-func BytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "BytesProducedStatsDataset",
-		Input: []tf.Input{
-			input_dataset, tag,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Conv3DBackpropInputAttr is an optional argument to Conv3DBackpropInput.
-type Conv3DBackpropInputAttr func(optionalAttr)
-
-// Conv3DBackpropInputDilations sets the optional dilations attribute to value.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 i:1 >
-func Conv3DBackpropInputDilations(value []int64) Conv3DBackpropInputAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes the gradients of 3-D convolution with respect to the input.
-//
-// DEPRECATED at GraphDef version 10: Use Conv3DBackpropInputV2
-//
-// Arguments:
-//	input: Shape `[batch, depth, rows, cols, in_channels]`.
-//	filter: Shape `[depth, rows, cols, in_channels, out_channels]`.
-// `in_channels` must match between `input` and `filter`.
-//	out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols,
-// out_channels]`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
-func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv3DBackpropInputAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Conv3DBackpropInput",
-		Input: []tf.Input{
-			input, filter, out_backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the mean along segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
-// for an explanation of segments.
-//
-// Computes a tensor such that
-// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is
-// over `j` such that `segment_ids[j] == i` and `N` is the total number of
-// values summed.
-//
-// If the mean is empty for a given segment ID `i`, `output[i] = 0`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMean.png" alt>
-// </div>
-//
-// For example:
+// This kernel op implements the following mathematical equations:
 //
 // ```
-// c = tf.constant([[1.0,2,3,4], [4, 3, 2, 1], [5,6,7,8]])
-// tf.segment_mean(c, tf.constant([0, 0, 1]))
-// # ==> [[2.5, 2.5, 2.5, 2.5],
-// #      [5, 6, 7, 8]]
-// ```
+// x_h_prev = [x, h_prev]
 //
+// [r_bar u_bar] = x_h_prev * w_ru + b_ru
+//
+// r = sigmoid(r_bar)
+// u = sigmoid(u_bar)
+//
+// h_prevr = h_prev \circ r
+//
+// x_h_prevr = [x h_prevr]
+//
+// c_bar = x_h_prevr * w_c + b_c
+// c = tanh(c_bar)
+//
+// h = (1-u) \circ c + u \circ h_prev
+// ```
+func GRUBlockCell(scope *Scope, x tf.Output, h_prev tf.Output, w_ru tf.Output, w_c tf.Output, b_ru tf.Output, b_c tf.Output) (r tf.Output, u tf.Output, c tf.Output, h tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "GRUBlockCell",
+		Input: []tf.Input{
+			x, h_prev, w_ru, w_c, b_ru, b_c,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// Deserialize and concatenate `SparseTensors` from a serialized minibatch.
+//
+// The input `serialized_sparse` must be a string matrix of shape `[N x 3]` where
+// `N` is the minibatch size and the rows correspond to packed outputs of
+// `SerializeSparse`.  The ranks of the original `SparseTensor` objects
+// must all match.  When the final `SparseTensor` is created, it has rank one
+// higher than the ranks of the incoming `SparseTensor` objects
+// (they have been concatenated along a new row dimension).
+//
+// The output `SparseTensor` object's shape values for all dimensions but the
+// first are the max across the input `SparseTensor` objects' shape values
+// for the corresponding dimensions.  Its first shape value is `N`, the minibatch
+// size.
+//
+// The input `SparseTensor` objects' indices are assumed ordered in
+// standard lexicographic order.  If this is not the case, after this
+// step run `SparseReorder` to restore index ordering.
+//
+// For example, if the serialized input is a `[2 x 3]` matrix representing two
+// original `SparseTensor` objects:
+//
+//     index = [ 0]
+//             [10]
+//             [20]
+//     values = [1, 2, 3]
+//     shape = [50]
+//
+// and
+//
+//     index = [ 2]
+//             [10]
+//     values = [4, 5]
+//     shape = [30]
+//
+// then the final deserialized `SparseTensor` will be:
+//
+//     index = [0  0]
+//             [0 10]
+//             [0 20]
+//             [1  2]
+//             [1 10]
+//     values = [1, 2, 3, 4, 5]
+//     shape = [2 50]
 //
 // Arguments:
-//
-//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
-// first dimension.  Values should be sorted and can be repeated.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
+//	serialized_sparse: 2-D, The `N` serialized `SparseTensor` objects.
+// Must have 3 columns.
+//	dtype: The `dtype` of the serialized `SparseTensor` objects.
+func DeserializeManySparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"dtype": dtype}
 	opspec := tf.OpSpec{
-		Type: "SegmentMean",
+		Type: "DeserializeManySparse",
 		Input: []tf.Input{
-			data, segment_ids,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Store the input tensor in the state of the current session.
-//
-// Arguments:
-//	value: The tensor to be stored.
-//
-// Returns The handle for the tensor stored in the session state, represented
-// as a string.
-func GetSessionHandle(scope *Scope, value tf.Output) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "GetSessionHandle",
-		Input: []tf.Input{
-			value,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the product along segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
-// for an explanation of segments.
-//
-// Computes a tensor such that
-// \\(output_i = \prod_j data_j\\) where the product is over `j` such
-// that `segment_ids[j] == i`.
-//
-// If the product is empty for a given segment ID `i`, `output[i] = 1`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentProd.png" alt>
-// </div>
-//
-// For example:
-//
-// ```
-// c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]])
-// tf.segment_prod(c, tf.constant([0, 0, 1]))
-// # ==> [[4, 6, 6, 4],
-// #      [5, 6, 7, 8]]
-// ```
-//
-//
-// Arguments:
-//
-//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
-// first dimension.  Values should be sorted and can be repeated.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SegmentProd",
-		Input: []tf.Input{
-			data, segment_ids,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Fetches multiple values from infeed as an XLA tuple.
-//
-// Arguments:
-//	dtypes: The element types of each element in `outputs`.
-//	shapes: The shapes of each tensor in `outputs`.
-//
-// Returns A list of tensors that will be provided using the infeed mechanism.
-func InfeedDequeueTuple(scope *Scope, dtypes []tf.DataType, shapes []tf.Shape) (outputs []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes, "shapes": shapes}
-	opspec := tf.OpSpec{
-		Type: "InfeedDequeueTuple",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
-		scope.UpdateErr("InfeedDequeueTuple", err)
-		return
-	}
-	return outputs
-}
-
-// Computes the maximum along segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
-// for an explanation of segments.
-//
-// Computes a tensor such that
-// \\(output_i = \max_j(data_j)\\) where `max` is over `j` such
-// that `segment_ids[j] == i`.
-//
-// If the max is empty for a given segment ID `i`, `output[i] = 0`.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMax.png" alt>
-// </div>
-//
-// For example:
-//
-// ```
-// c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]])
-// tf.segment_max(c, tf.constant([0, 0, 1]))
-// # ==> [[4, 3, 3, 4],
-// #      [5, 6, 7, 8]]
-// ```
-//
-//
-// Arguments:
-//
-//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
-// first dimension.  Values should be sorted and can be repeated.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SegmentMax",
-		Input: []tf.Input{
-			data, segment_ids,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the maximum along segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
-// for an explanation of segments.
-//
-// This operator is similar to the unsorted segment sum operator found
-// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum).
-// Instead of computing the sum over segments, it computes the maximum such that:
-//
-// \\(output_i = \max_{j...} data[j...]\\) where max is over tuples `j...` such
-// that `segment_ids[j...] == i`.
-//
-// If the maximum is empty for a given segment ID `i`, it outputs the smallest
-// possible value for the specific numeric type,
-// `output[i] = numeric_limits<T>::lowest()`.
-//
-// If the given segment ID `i` is negative, then the corresponding value is
-// dropped, and will not be included in the result.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentMax.png" alt>
-// </div>
-//
-// For example:
-//
-// ``` python
-// c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]])
-// tf.unsorted_segment_max(c, tf.constant([0, 1, 0]), num_segments=2)
-// # ==> [[ 4,  3, 3, 4],
-// #       [5,  6, 7, 8]]
-// ```
-//
-//
-// Arguments:
-//
-//	segment_ids: A tensor whose shape is a prefix of `data.shape`.
-//
-//
-// Returns Has same shape as data, except for the first `segment_ids.rank`
-// dimensions, which are replaced with a single dimension which has size
-// `num_segments`.
-func UnsortedSegmentMax(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "UnsortedSegmentMax",
-		Input: []tf.Input{
-			data, segment_ids, num_segments,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the product along segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
-// for an explanation of segments.
-//
-// This operator is similar to the unsorted segment sum operator found
-// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum).
-// Instead of computing the sum over segments, it computes the product of all
-// entries belonging to a segment such that:
-//
-// \\(output_i = \prod_{j...} data[j...]\\) where the product is over tuples
-// `j...` such that `segment_ids[j...] == i`.
-//
-// For example:
-//
-// ``` python
-// c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]])
-// tf.unsorted_segment_prod(c, tf.constant([0, 1, 0]), num_segments=2)
-// # ==> [[ 4,  6, 6, 4],
-// #       [5,  6, 7, 8]]
-// ```
-//
-// If there is no entry for a given segment ID `i`, it outputs 1.
-//
-// If the given segment ID `i` is negative, then the corresponding value is
-// dropped, and will not be included in the result.
-//
-// Arguments:
-//
-//	segment_ids: A tensor whose shape is a prefix of `data.shape`.
-//
-//
-// Returns Has same shape as data, except for the first `segment_ids.rank`
-// dimensions, which are replaced with a single dimension which has size
-// `num_segments`.
-func UnsortedSegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "UnsortedSegmentProd",
-		Input: []tf.Input{
-			data, segment_ids, num_segments,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SelfAdjointEigV2Attr is an optional argument to SelfAdjointEigV2.
-type SelfAdjointEigV2Attr func(optionalAttr)
-
-// SelfAdjointEigV2ComputeV sets the optional compute_v attribute to value.
-//
-// value: If `True` then eigenvectors will be computed and returned in `v`.
-// Otherwise, only the eigenvalues will be computed.
-// If not specified, defaults to true
-func SelfAdjointEigV2ComputeV(value bool) SelfAdjointEigV2Attr {
-	return func(m optionalAttr) {
-		m["compute_v"] = value
-	}
-}
-
-// Computes the eigen decomposition of one or more square self-adjoint matrices.
-//
-// Computes the eigenvalues and (optionally) eigenvectors of each inner matrix in
-// `input` such that `input[..., :, :] = v[..., :, :] * diag(e[..., :])`. The eigenvalues
-// are sorted in non-decreasing order.
-//
-// ```python
-// # a is a tensor.
-// # e is a tensor of eigenvalues.
-// # v is a tensor of eigenvectors.
-// e, v = self_adjoint_eig(a)
-// e = self_adjoint_eig(a, compute_v=False)
-// ```
-//
-// Arguments:
-//	input: `Tensor` input of shape `[N, N]`.
-//
-// Returns Eigenvalues. Shape is `[N]`.Eigenvectors. Shape is `[N, N]`.
-func SelfAdjointEigV2(scope *Scope, input tf.Output, optional ...SelfAdjointEigV2Attr) (e tf.Output, v tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SelfAdjointEigV2",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Computes the sum along sparse segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
-// for an explanation of segments.
-//
-// Like `SegmentSum`, but `segment_ids` can have rank less than `data`'s first
-// dimension, selecting a subset of dimension 0, specified by `indices`.
-//
-// For example:
-//
-// ```python
-// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
-//
-// # Select two rows, one segment.
-// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 0]))
-// # => [[0 0 0 0]]
-//
-// # Select two rows, two segment.
-// tf.sparse_segment_sum(c, tf.constant([0, 1]), tf.constant([0, 1]))
-// # => [[ 1  2  3  4]
-// #     [-1 -2 -3 -4]]
-//
-// # Select all rows, two segments.
-// tf.sparse_segment_sum(c, tf.constant([0, 1, 2]), tf.constant([0, 0, 1]))
-// # => [[0 0 0 0]
-// #     [5 6 7 8]]
-//
-// # Which is equivalent to:
-// tf.segment_sum(c, tf.constant([0, 0, 1]))
-// ```
-//
-// Arguments:
-//
-//	indices: A 1-D tensor. Has same rank as `segment_ids`.
-//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SparseSegmentSum(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSegmentSum",
-		Input: []tf.Input{
-			data, indices, segment_ids,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Compare values of `input` to `threshold` and pack resulting bits into a `uint8`.
-//
-// Each comparison returns a boolean `true` (if `input_value > threshold`)
-// or and `false` otherwise.
-//
-// This operation is useful for Locality-Sensitive-Hashing (LSH) and other
-// algorithms that use hashing approximations of cosine and `L2` distances;
-// codes can be generated from an input via:
-//
-// ```python
-// codebook_size = 50
-// codebook_bits = codebook_size * 32
-// codebook = tf.get_variable('codebook', [x.shape[-1].value, codebook_bits],
-//                            dtype=x.dtype,
-//                            initializer=tf.orthogonal_initializer())
-// codes = compare_and_threshold(tf.matmul(x, codebook), threshold=0.)
-// codes = tf.bitcast(codes, tf.int32)  # go from uint8 to int32
-// # now codes has shape x.shape[:-1] + [codebook_size]
-// ```
-//
-// **NOTE**: Currently, the innermost dimension of the tensor must be divisible
-// by 8.
-//
-// Given an `input` shaped `[s0, s1, ..., s_n]`, the output is
-// a `uint8` tensor shaped `[s0, s1, ..., s_n / 8]`.
-//
-// Arguments:
-//	input: Values to compare against `threshold` and bitpack.
-//	threshold: Threshold to compare against.
-//
-// Returns The bitpacked comparisons.
-func CompareAndBitpack(scope *Scope, input tf.Output, threshold tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "CompareAndBitpack",
-		Input: []tf.Input{
-			input, threshold,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the absolute value of a tensor.
-//
-// Given a tensor `x`, this operation returns a tensor containing the absolute
-// value of each element in `x`. For example, if x is an input element and y is
-// an output element, this operation computes \\(y = |x|\\).
-func Abs(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Abs",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes hyperbolic sine of x element-wise.
-//
-//   Given an input tensor, this function computes hyperbolic sine of every
-//   element in the tensor. Input range is `[-inf,inf]` and output range
-//   is `[-inf,inf]`.
-//
-//   ```python
-//   x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 2, 10, float("inf")])
-//   tf.math.sinh(x) ==> [-inf -4.0515420e+03 -5.2109528e-01 1.1752012e+00 1.5094614e+00 3.6268604e+00 1.1013232e+04 inf]
-//   ```
-func Sinh(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Sinh",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the log of the absolute value of `Gamma(x)` element-wise.
-func Lgamma(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Lgamma",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the sum along sparse segments of a tensor.
-//
-// Like `SparseSegmentSum`, but allows missing ids in `segment_ids`. If an id is
-// misisng, the `output` tensor at that position will be zeroed.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/sparse#Segmentation)
-// for an explanation of segments.
-//
-// For example:
-//
-// ```python
-// c = tf.constant([[1,2,3,4], [-1,-2,-3,-4], [5,6,7,8]])
-//
-// tf.sparse_segment_sum_with_num_segments(
-//     c, tf.constant([0, 1]), tf.constant([0, 0]), num_segments=3)
-// # => [[0 0 0 0]
-// #     [0 0 0 0]
-// #     [0 0 0 0]]
-//
-// tf.sparse_segment_sum_with_num_segments(c,
-//                                         tf.constant([0, 1]),
-//                                         tf.constant([0, 2],
-//                                         num_segments=4))
-// # => [[ 1  2  3  4]
-// #     [ 0  0  0  0]
-// #     [-1 -2 -3 -4]
-// #     [ 0  0  0  0]]
-// ```
-//
-// Arguments:
-//
-//	indices: A 1-D tensor. Has same rank as `segment_ids`.
-//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
-//	num_segments: Should equal the number of distinct segment IDs.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `num_segments`.
-func SparseSegmentSumWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSegmentSumWithNumSegments",
-		Input: []tf.Input{
-			data, indices, segment_ids, num_segments,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Store the input tensor in the state of the current session.
-//
-// Arguments:
-//	value: The tensor to be stored.
-//
-// Returns The handle for the tensor stored in the session state, represented
-// as a ResourceHandle object.
-func GetSessionHandleV2(scope *Scope, value tf.Output) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "GetSessionHandleV2",
-		Input: []tf.Input{
-			value,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// DepthwiseConv2dNativeAttr is an optional argument to DepthwiseConv2dNative.
-type DepthwiseConv2dNativeAttr func(optionalAttr)
-
-// DepthwiseConv2dNativeDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, height, width, channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, channels, height, width].
-// If not specified, defaults to "NHWC"
-func DepthwiseConv2dNativeDataFormat(value string) DepthwiseConv2dNativeAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// DepthwiseConv2dNativeDilations sets the optional dilations attribute to value.
-//
-// value: 1-D tensor of length 4.  The dilation factor for each dimension of
-// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
-// element on that dimension. The dimension order is determined by the value of
-// `data_format`, see above for details. Dilations in the batch and depth
-// dimensions must be 1.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 >
-func DepthwiseConv2dNativeDilations(value []int64) DepthwiseConv2dNativeAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors.
-//
-// Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
-// and a filter / kernel tensor of shape
-// `[filter_height, filter_width, in_channels, channel_multiplier]`, containing
-// `in_channels` convolutional filters of depth 1, `depthwise_conv2d` applies
-// a different filter to each input channel (expanding from 1 channel to
-// `channel_multiplier` channels for each), then concatenates the results
-// together. Thus, the output has `in_channels * channel_multiplier` channels.
-//
-// ```
-// for k in 0..in_channels-1
-//   for q in 0..channel_multiplier-1
-//     output[b, i, j, k * channel_multiplier + q] =
-//       sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] *
-//                         filter[di, dj, k, q]
-// ```
-//
-// Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
-// horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
-//
-// Arguments:
-//
-//
-//	strides: 1-D of length 4.  The stride of the sliding window for each dimension
-// of `input`.
-//	padding: The type of padding algorithm to use.
-func DepthwiseConv2dNative(scope *Scope, input tf.Output, filter tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DepthwiseConv2dNative",
-		Input: []tf.Input{
-			input, filter,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates and returns an empty tensor list.
-//
-// All list elements must be tensors of dtype element_dtype and shape compatible
-// with element_shape.
-//
-// handle: an empty tensor list.
-// element_dtype: the type of elements in the list.
-// element_shape: a shape compatible with that of elements in the list.
-func EmptyTensorList(scope *Scope, element_shape tf.Output, max_num_elements tf.Output, element_dtype tf.DataType) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"element_dtype": element_dtype}
-	opspec := tf.OpSpec{
-		Type: "EmptyTensorList",
-		Input: []tf.Input{
-			element_shape, max_num_elements,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QueueEnqueueManyV2Attr is an optional argument to QueueEnqueueManyV2.
-type QueueEnqueueManyV2Attr func(optionalAttr)
-
-// QueueEnqueueManyV2TimeoutMs sets the optional timeout_ms attribute to value.
-//
-// value: If the queue is too full, this operation will block for up
-// to timeout_ms milliseconds.
-// Note: This option is not supported yet.
-// If not specified, defaults to -1
-func QueueEnqueueManyV2TimeoutMs(value int64) QueueEnqueueManyV2Attr {
-	return func(m optionalAttr) {
-		m["timeout_ms"] = value
-	}
-}
-
-// Enqueues zero or more tuples of one or more tensors in the given queue.
-//
-// This operation slices each component tensor along the 0th dimension to
-// make multiple queue elements. All of the tuple components must have the
-// same size in the 0th dimension.
-//
-// The components input has k elements, which correspond to the components of
-// tuples stored in the given queue.
-//
-// N.B. If the queue is full, this operation will block until the given
-// elements have been enqueued (or 'timeout_ms' elapses, if specified).
-//
-// Arguments:
-//	handle: The handle to a queue.
-//	components: One or more tensors from which the enqueued tensors should
-// be taken.
-//
-// Returns the created operation.
-func QueueEnqueueManyV2(scope *Scope, handle tf.Output, components []tf.Output, optional ...QueueEnqueueManyV2Attr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QueueEnqueueManyV2",
-		Input: []tf.Input{
-			handle, tf.OutputList(components),
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Creates a dataset that zips together `input_datasets`.
-func ZipDataset(scope *Scope, input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ZipDataset",
-		Input: []tf.Input{
-			tf.OutputList(input_datasets),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the mean along sparse segments of a tensor.
-//
-// See `tf.sparse.segment_sum` for usage examples.
-//
-// Like `SegmentMean`, but `segment_ids` can have rank less than `data`'s first
-// dimension, selecting a subset of dimension 0, specified by `indices`.
-//
-// Arguments:
-//
-//	indices: A 1-D tensor. Has same rank as `segment_ids`.
-//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SparseSegmentMean(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSegmentMean",
-		Input: []tf.Input{
-			data, indices, segment_ids,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceApplyFtrlAttr is an optional argument to ResourceApplyFtrl.
-type ResourceApplyFtrlAttr func(optionalAttr)
-
-// ResourceApplyFtrlUseLocking sets the optional use_locking attribute to value.
-//
-// value: If `True`, updating of the var and accum tensors will be protected
-// by a lock; otherwise the behavior is undefined, but may exhibit less
-// contention.
-// If not specified, defaults to false
-func ResourceApplyFtrlUseLocking(value bool) ResourceApplyFtrlAttr {
-	return func(m optionalAttr) {
-		m["use_locking"] = value
-	}
-}
-
-// Update '*var' according to the Ftrl-proximal scheme.
-//
-// accum_new = accum + grad * grad
-// linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
-// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
-// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
-// accum = accum_new
-//
-// Arguments:
-//	var_: Should be from a Variable().
-//	accum: Should be from a Variable().
-//	linear: Should be from a Variable().
-//	grad: The gradient.
-//	lr: Scaling factor. Must be a scalar.
-//	l1: L1 regulariation. Must be a scalar.
-//	l2: L2 regulariation. Must be a scalar.
-//	lr_power: Scaling factor. Must be a scalar.
-//
-// Returns the created operation.
-func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlAttr) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceApplyFtrl",
-		Input: []tf.Input{
-			var_, accum, linear, grad, lr, l1, l2, lr_power,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Computes tan of x element-wise.
-//
-//   Given an input tensor, this function computes tangent of every
-//   element in the tensor. Input range is `(-inf, inf)` and
-//   output range is `(-inf, inf)`. If input lies outside the boundary, `nan`
-//   is returned.
-//
-//   ```python
-//   x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 200, 10000, float("inf")])
-//   tf.math.tan(x) ==> [nan 0.45231566 -0.5463025 1.5574077 2.572152 -1.7925274 0.32097113 nan]
-//   ```
-func Tan(scope *Scope, x tf.Output) (y tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Tan",
-		Input: []tf.Input{
-			x,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes gradients for SparseSegmentMean.
-//
-// Returns tensor "output" with same shape as grad, except for dimension 0 whose
-// value is output_dim0.
-//
-// Arguments:
-//	grad: gradient propagated to the SparseSegmentMean op.
-//	indices: indices passed to the corresponding SparseSegmentMean op.
-//	segment_ids: segment_ids passed to the corresponding SparseSegmentMean op.
-//	output_dim0: dimension 0 of "data" passed to SparseSegmentMean op.
-func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSegmentMeanGrad",
-		Input: []tf.Input{
-			grad, indices, segment_ids, output_dim0,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// UniqueWithCountsV2Attr is an optional argument to UniqueWithCountsV2.
-type UniqueWithCountsV2Attr func(optionalAttr)
-
-// UniqueWithCountsV2OutIdx sets the optional out_idx attribute to value.
-// If not specified, defaults to DT_INT32
-func UniqueWithCountsV2OutIdx(value tf.DataType) UniqueWithCountsV2Attr {
-	return func(m optionalAttr) {
-		m["out_idx"] = value
-	}
-}
-
-// Finds unique elements along an axis of a tensor.
-//
-// This operation either returns a tensor `y` containing unique elements
-// along the `axis` of a tensor. The returned unique elements is sorted
-// in the same order as they occur along `axis` in `x`.
-// This operation also returns a tensor `idx` and a tensor `count`
-// that are the same size as the number of the elements in `x` along the
-// `axis` dimension. The `idx` contains the index in the unique output `y`
-// and the `count` contains the count in the unique output `y`.
-// In other words, for an `1-D` tensor `x` with `axis = None:
-//
-// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
-//
-// For example:
-//
-// ```
-// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
-// y, idx, count = unique_with_counts(x)
-// y ==> [1, 2, 4, 7, 8]
-// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
-// count ==> [2, 1, 3, 1, 2]
-// ```
-//
-// For an `2-D` tensor `x` with `axis = 0`:
-//
-// ```
-// # tensor 'x' is [[1, 0, 0],
-// #                [1, 0, 0],
-// #                [2, 0, 0]]
-// y, idx, count = unique_with_counts(x, axis=0)
-// y ==> [[1, 0, 0],
-//        [2, 0, 0]]
-// idx ==> [0, 0, 1]
-// count ==> [2, 1]
-// ```
-//
-// For an `2-D` tensor `x` with `axis = 1`:
-//
-// ```
-// # tensor 'x' is [[1, 0, 0],
-// #                [1, 0, 0],
-// #                [2, 0, 0]]
-// y, idx, count = unique_with_counts(x, axis=1)
-// y ==> [[1, 0],
-//        [1, 0],
-//        [2, 0]]
-// idx ==> [0, 1, 1]
-// count ==> [1, 2]
-// ```
-//
-// Arguments:
-//	x: A `Tensor`.
-//	axis: A `Tensor` of type `int32` (default: None). The axis of the Tensor to
-// find the unique elements.
-//
-// Returns A `Tensor`. Unique elements along the `axis` of `Tensor` x.A 1-D Tensor. Has the same type as x that contains the index of each
-// value of x in the output y.A 1-D Tensor. The count of each value of x in the output y.
-func UniqueWithCountsV2(scope *Scope, x tf.Output, axis tf.Output, optional ...UniqueWithCountsV2Attr) (y tf.Output, idx tf.Output, count tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "UniqueWithCountsV2",
-		Input: []tf.Input{
-			x, axis,
+			serialized_sparse,
 		},
 		Attrs: attrs,
 	}
@@ -35698,98 +35240,595 @@ func UniqueWithCountsV2(scope *Scope, x tf.Output, axis tf.Output, optional ...U
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// LoadTPUEmbeddingAdagradParametersAttr is an optional argument to LoadTPUEmbeddingAdagradParameters.
-type LoadTPUEmbeddingAdagradParametersAttr func(optionalAttr)
+// MapStageAttr is an optional argument to MapStage.
+type MapStageAttr func(optionalAttr)
 
-// LoadTPUEmbeddingAdagradParametersTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
+// MapStageCapacity sets the optional capacity attribute to value.
 //
-// REQUIRES: value >= -1
-func LoadTPUEmbeddingAdagradParametersTableId(value int64) LoadTPUEmbeddingAdagradParametersAttr {
+// value: Maximum number of elements in the Staging Area. If > 0, inserts
+// on the container will block when the capacity is reached.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapStageCapacity(value int64) MapStageAttr {
 	return func(m optionalAttr) {
-		m["table_id"] = value
+		m["capacity"] = value
 	}
 }
 
-// LoadTPUEmbeddingAdagradParametersTableName sets the optional table_name attribute to value.
+// MapStageMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapStageMemoryLimit(value int64) MapStageAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// MapStageContainer sets the optional container attribute to value.
+//
+// value: If non-empty, this queue is placed in the given container. Otherwise,
+// a default container is used.
 // If not specified, defaults to ""
-func LoadTPUEmbeddingAdagradParametersTableName(value string) LoadTPUEmbeddingAdagradParametersAttr {
+func MapStageContainer(value string) MapStageAttr {
 	return func(m optionalAttr) {
-		m["table_name"] = value
+		m["container"] = value
 	}
 }
 
-// Load Adagrad embedding parameters.
+// MapStageSharedName sets the optional shared_name attribute to value.
 //
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
+// value: It is necessary to match this name to the matching Unstage Op.
+// If not specified, defaults to ""
+func MapStageSharedName(value string) MapStageAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Stage (key, values) in the underlying container which behaves like a hashtable.
 //
 // Arguments:
-//	parameters: Value of parameters used in the Adagrad optimization algorithm.
-//	accumulators: Value of accumulators used in the Adagrad optimization algorithm.
+//	key: int64
 //
+//	values: a list of tensors
+// dtypes A list of data types that inserted values should adhere to.
 //
 //
 // Returns the created operation.
-func LoadTPUEmbeddingAdagradParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdagradParametersAttr) (o *tf.Operation) {
+func MapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...MapStageAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	attrs := map[string]interface{}{"dtypes": dtypes}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingAdagradParameters",
+		Type: "MapStage",
 		Input: []tf.Input{
-			parameters, accumulators,
+			key, indices, tf.OutputList(values),
 		},
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// NonMaxSuppressionV5Attr is an optional argument to NonMaxSuppressionV5.
-type NonMaxSuppressionV5Attr func(optionalAttr)
+// FusedBatchNormGradV2Attr is an optional argument to FusedBatchNormGradV2.
+type FusedBatchNormGradV2Attr func(optionalAttr)
 
-// NonMaxSuppressionV5PadToMaxOutputSize sets the optional pad_to_max_output_size attribute to value.
+// FusedBatchNormGradV2Epsilon sets the optional epsilon attribute to value.
 //
-// value: If true, the output `selected_indices` is padded to be of length
-// `max_output_size`. Defaults to false.
-// If not specified, defaults to false
-func NonMaxSuppressionV5PadToMaxOutputSize(value bool) NonMaxSuppressionV5Attr {
+// value: A small float number added to the variance of x.
+// If not specified, defaults to 0.0001
+func FusedBatchNormGradV2Epsilon(value float32) FusedBatchNormGradV2Attr {
 	return func(m optionalAttr) {
-		m["pad_to_max_output_size"] = value
+		m["epsilon"] = value
+	}
+}
+
+// FusedBatchNormGradV2DataFormat sets the optional data_format attribute to value.
+//
+// value: The data format for y_backprop, x, x_backprop.
+// Either "NHWC" (default) or "NCHW".
+// If not specified, defaults to "NHWC"
+func FusedBatchNormGradV2DataFormat(value string) FusedBatchNormGradV2Attr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// FusedBatchNormGradV2IsTraining sets the optional is_training attribute to value.
+//
+// value: A bool value to indicate the operation is for training (default)
+// or inference.
+// If not specified, defaults to true
+func FusedBatchNormGradV2IsTraining(value bool) FusedBatchNormGradV2Attr {
+	return func(m optionalAttr) {
+		m["is_training"] = value
+	}
+}
+
+// Gradient for batch normalization.
+//
+// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
+// The size of 1D Tensors matches the dimension C of the 4D Tensors.
+//
+// Arguments:
+//	y_backprop: A 4D Tensor for the gradient with respect to y.
+//	x: A 4D Tensor for input data.
+//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
+//	reserve_space_1: When is_training is True, a 1D Tensor for the computed batch
+// mean to be reused in gradient computation. When is_training is
+// False, a 1D Tensor for the population mean to be reused in both
+// 1st and 2nd order gradient computation.
+//	reserve_space_2: When is_training is True, a 1D Tensor for the computed batch
+// variance (inverted variance in the cuDNN case) to be reused in
+// gradient computation. When is_training is False, a 1D Tensor
+// for the population variance to be reused in both 1st and 2nd
+// order gradient computation.
+//
+// Returns:
+//	x_backprop: A 4D Tensor for the gradient with respect to x.
+//	scale_backprop: A 1D Tensor for the gradient with respect to scale.
+//	offset_backprop: A 1D Tensor for the gradient with respect to offset.
+//	reserve_space_3: Unused placeholder to match the mean input in FusedBatchNorm.
+//	reserve_space_4: Unused placeholder to match the variance input
+// in FusedBatchNorm.
+func FusedBatchNormGradV2(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradV2Attr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "FusedBatchNormGradV2",
+		Input: []tf.Input{
+			y_backprop, x, scale, reserve_space_1, reserve_space_2,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+}
+
+// SparseTensorDenseMatMulAttr is an optional argument to SparseTensorDenseMatMul.
+type SparseTensorDenseMatMulAttr func(optionalAttr)
+
+// SparseTensorDenseMatMulAdjointA sets the optional adjoint_a attribute to value.
+//
+// value: Use the adjoint of A in the matrix multiply.  If A is complex, this
+// is transpose(conj(A)).  Otherwise it's transpose(A).
+// If not specified, defaults to false
+func SparseTensorDenseMatMulAdjointA(value bool) SparseTensorDenseMatMulAttr {
+	return func(m optionalAttr) {
+		m["adjoint_a"] = value
+	}
+}
+
+// SparseTensorDenseMatMulAdjointB sets the optional adjoint_b attribute to value.
+//
+// value: Use the adjoint of B in the matrix multiply.  If B is complex, this
+// is transpose(conj(B)).  Otherwise it's transpose(B).
+// If not specified, defaults to false
+func SparseTensorDenseMatMulAdjointB(value bool) SparseTensorDenseMatMulAttr {
+	return func(m optionalAttr) {
+		m["adjoint_b"] = value
+	}
+}
+
+// Multiply SparseTensor (of rank 2) "A" by dense matrix "B".
+//
+// No validity checking is performed on the indices of A.  However, the following
+// input format is recommended for optimal behavior:
+//
+// if adjoint_a == false:
+//   A should be sorted in lexicographically increasing order.  Use SparseReorder
+//   if you're not sure.
+// if adjoint_a == true:
+//   A should be sorted in order of increasing dimension 1 (i.e., "column major"
+//   order instead of "row major" order).
+//
+// Arguments:
+//	a_indices: 2-D.  The `indices` of the `SparseTensor`, size `[nnz, 2]` Matrix.
+//	a_values: 1-D.  The `values` of the `SparseTensor`, size `[nnz]` Vector.
+//	a_shape: 1-D.  The `shape` of the `SparseTensor`, size `[2]` Vector.
+//	b: 2-D.  A dense Matrix.
+func SparseTensorDenseMatMul(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output, optional ...SparseTensorDenseMatMulAttr) (product tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseTensorDenseMatMul",
+		Input: []tf.Input{
+			a_indices, a_values, a_shape, b,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Sets the index-th position of the list to contain the given tensor.
+//
+// input_handle: the list
+// index: the position in the list to which the tensor will be assigned
+// item: the element to be assigned to that position
+// output_handle: the new list, with the element in the proper position
+//
+func TensorListSetItem(scope *Scope, input_handle tf.Output, index tf.Output, item tf.Output) (output_handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorListSetItem",
+		Input: []tf.Input{
+			input_handle, index, item,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Writes the given dataset to the given file using the TFRecord format.
+//
+// Arguments:
+//	input_dataset: A variant tensor representing the dataset to write.
+//	filename: A scalar string tensor representing the filename to use.
+//	compression_type: A scalar string tensor containing either (i) the empty string (no
+// compression), (ii) "ZLIB", or (iii) "GZIP".
+//
+// Returns the created operation.
+func DatasetToTFRecord(scope *Scope, input_dataset tf.Output, filename tf.Output, compression_type tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DatasetToTFRecord",
+		Input: []tf.Input{
+			input_dataset, filename, compression_type,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// ResourceSparseApplyCenteredRMSPropAttr is an optional argument to ResourceSparseApplyCenteredRMSProp.
+type ResourceSparseApplyCenteredRMSPropAttr func(optionalAttr)
+
+// ResourceSparseApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var, mg, ms, and mom tensors is
+// protected by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceSparseApplyCenteredRMSPropUseLocking(value bool) ResourceSparseApplyCenteredRMSPropAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the centered RMSProp algorithm.
+//
+// The centered RMSProp algorithm uses an estimate of the centered second moment
+// (i.e., the variance) for normalization, as opposed to regular RMSProp, which
+// uses the (uncentered) second moment. This often helps with training, but is
+// slightly more expensive in terms of computation and memory.
+//
+// Note that in dense implementation of this algorithm, mg, ms, and mom will
+// update even if the grad is zero, but in this sparse implementation, mg, ms,
+// and mom will not update in iterations during which the grad is zero.
+//
+// mean_square = decay * mean_square + (1-decay) * gradient ** 2
+// mean_grad = decay * mean_grad + (1-decay) * gradient
+// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
+//
+// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
+// var <- var - mom
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	mg: Should be from a Variable().
+//	ms: Should be from a Variable().
+//	mom: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	rho: Decay rate. Must be a scalar.
+//
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var, ms and mom.
+//
+// Returns the created operation.
+func ResourceSparseApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyCenteredRMSPropAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceSparseApplyCenteredRMSProp",
+		Input: []tf.Input{
+			var_, mg, ms, mom, lr, rho, momentum, epsilon, grad, indices,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Elementwise computes the bitwise XOR of `x` and `y`.
+//
+// The result will have those bits set, that are different in `x` and `y`. The
+// computation is performed on the underlying representations of `x` and `y`.
+//
+// For example:
+//
+// ```python
+// import tensorflow as tf
+// from tensorflow.python.ops import bitwise_ops
+// dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64,
+//               tf.uint8, tf.uint16, tf.uint32, tf.uint64]
+//
+// for dtype in dtype_list:
+//   lhs = tf.constant([0, 5, 3, 14], dtype=dtype)
+//   rhs = tf.constant([5, 0, 7, 11], dtype=dtype)
+//   exp = tf.constant([5, 5, 4, 5],  dtype=tf.float32)
+//
+//   res = bitwise_ops.bitwise_xor(lhs, rhs)
+//   tf.assert_equal(tf.cast(res, tf.float32), exp) # TRUE
+// ```
+//
+func BitwiseXor(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BitwiseXor",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Adds two `SparseTensor` objects to produce another `SparseTensor`.
+//
+// The input `SparseTensor` objects' indices are assumed ordered in standard
+// lexicographic order.  If this is not the case, before this step run
+// `SparseReorder` to restore index ordering.
+//
+// By default, if two values sum to zero at some index, the output `SparseTensor`
+// would still include that particular location in its index, storing a zero in the
+// corresponding value slot.  To override this, callers can specify `thresh`,
+// indicating that if the sum has a magnitude strictly smaller than `thresh`, its
+// corresponding value and index would then not be included.  In particular,
+// `thresh == 0` (default) means everything is kept and actual thresholding happens
+// only for a positive value.
+//
+// In the following shapes, `nnz` is the count after taking `thresh` into account.
+//
+// Arguments:
+//	a_indices: 2-D.  The `indices` of the first `SparseTensor`, size `[nnz, ndims]` Matrix.
+//	a_values: 1-D.  The `values` of the first `SparseTensor`, size `[nnz]` Vector.
+//	a_shape: 1-D.  The `shape` of the first `SparseTensor`, size `[ndims]` Vector.
+//	b_indices: 2-D.  The `indices` of the second `SparseTensor`, size `[nnz, ndims]` Matrix.
+//	b_values: 1-D.  The `values` of the second `SparseTensor`, size `[nnz]` Vector.
+//	b_shape: 1-D.  The `shape` of the second `SparseTensor`, size `[ndims]` Vector.
+//	thresh: 0-D.  The magnitude threshold that determines if an output value/index
+// pair takes space.
+func SparseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output, thresh tf.Output) (sum_indices tf.Output, sum_values tf.Output, sum_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseAdd",
+		Input: []tf.Input{
+			a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// RandomShuffleAttr is an optional argument to RandomShuffle.
+type RandomShuffleAttr func(optionalAttr)
+
+// RandomShuffleSeed sets the optional seed attribute to value.
+//
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func RandomShuffleSeed(value int64) RandomShuffleAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// RandomShuffleSeed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomShuffleSeed2(value int64) RandomShuffleAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Randomly shuffles a tensor along its first dimension.
+//
+//   The tensor is shuffled along dimension 0, such that each `value[j]` is mapped
+//   to one and only one `output[i]`. For example, a mapping that might occur for a
+//   3x2 tensor is:
+//
+// ```
+// [[1, 2],       [[5, 6],
+//  [3, 4],  ==>   [1, 2],
+//  [5, 6]]        [3, 4]]
+// ```
+//
+// Arguments:
+//	value: The tensor to be shuffled.
+//
+// Returns A tensor of same shape and type as `value`, shuffled along its first
+// dimension.
+func RandomShuffle(scope *Scope, value tf.Output, optional ...RandomShuffleAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RandomShuffle",
+		Input: []tf.Input{
+			value,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Selects elements from `x` or `y`, depending on `condition`.
+//
+// The `x`, and `y` tensors must all have the same shape, and the
+// output will also have that shape.
+//
+// The `condition` tensor must be a scalar if `x` and `y` are scalars.
+// If `x` and `y` are vectors or higher rank, then `condition` must be either a
+// scalar, a vector with size matching the first dimension of `x`, or must have
+// the same shape as `x`.
+//
+// The `condition` tensor acts as a mask that chooses, based on the value at each
+// element, whether the corresponding element / row in the output should be
+// taken from `x` (if true) or `y` (if false).
+//
+// If `condition` is a vector and `x` and `y` are higher rank matrices, then
+// it chooses which row (outer dimension) to copy from `x` and `y`.
+// If `condition` has the same shape as `x` and `y`, then it chooses which
+// element to copy from `x` and `y`.
+//
+// For example:
+//
+// ```python
+// # 'condition' tensor is [[True,  False]
+// #                        [False, True]]
+// # 't' is [[1, 2],
+// #         [3, 4]]
+// # 'e' is [[5, 6],
+// #         [7, 8]]
+// select(condition, t, e)  # => [[1, 6], [7, 4]]
+//
+//
+// # 'condition' tensor is [True, False]
+// # 't' is [[1, 2],
+// #         [3, 4]]
+// # 'e' is [[5, 6],
+// #         [7, 8]]
+// select(condition, t, e) ==> [[1, 2],
+//                              [7, 8]]
+//
+// ```
+//
+// Arguments:
+//
+//	x: = A `Tensor` which may have the same shape as `condition`.
+// If `condition` is rank 1, `x` may have higher rank,
+// but its first dimension must match the size of `condition`.
+//	y: = A `Tensor` with the same type and shape as `x`.
+//
+// Returns = A `Tensor` with the same type and shape as `x` and `y`.
+func Select(scope *Scope, condition tf.Output, x tf.Output, y tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Select",
+		Input: []tf.Input{
+			condition, x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// The gradient operator for the SparseAdd op.
+//
+// The SparseAdd op calculates A + B, where A, B, and the sum are all represented
+// as `SparseTensor` objects.  This op takes in the upstream gradient w.r.t.
+// non-empty values of the sum, and outputs the gradients w.r.t. the non-empty
+// values of A and B.
+//
+// Arguments:
+//	backprop_val_grad: 1-D with shape `[nnz(sum)]`.  The gradient with respect to
+// the non-empty values of the sum.
+//	a_indices: 2-D.  The `indices` of the `SparseTensor` A, size `[nnz(A), ndims]`.
+//	b_indices: 2-D.  The `indices` of the `SparseTensor` B, size `[nnz(B), ndims]`.
+//	sum_indices: 2-D.  The `indices` of the sum `SparseTensor`, size
+// `[nnz(sum), ndims]`.
+//
+// Returns:
+//	a_val_grad: 1-D with shape `[nnz(A)]`. The gradient with respect to the
+// non-empty values of A.
+//	b_val_grad: 1-D with shape `[nnz(B)]`. The gradient with respect to the
+// non-empty values of B.
+func SparseAddGrad(scope *Scope, backprop_val_grad tf.Output, a_indices tf.Output, b_indices tf.Output, sum_indices tf.Output) (a_val_grad tf.Output, b_val_grad tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseAddGrad",
+		Input: []tf.Input{
+			backprop_val_grad, a_indices, b_indices, sum_indices,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// NonMaxSuppressionAttr is an optional argument to NonMaxSuppression.
+type NonMaxSuppressionAttr func(optionalAttr)
+
+// NonMaxSuppressionIouThreshold sets the optional iou_threshold attribute to value.
+//
+// value: A float representing the threshold for deciding whether boxes
+// overlap too much with respect to IOU.
+// If not specified, defaults to 0.5
+func NonMaxSuppressionIouThreshold(value float32) NonMaxSuppressionAttr {
+	return func(m optionalAttr) {
+		m["iou_threshold"] = value
 	}
 }
 
 // Greedily selects a subset of bounding boxes in descending order of score,
 //
 // pruning away boxes that have high intersection-over-union (IOU) overlap
-// with previously selected boxes.  Bounding boxes with score less than
-// `score_threshold` are removed.  Bounding boxes are supplied as
+// with previously selected boxes.  Bounding boxes are supplied as
 // [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
 // diagonal pair of box corners and the coordinates can be provided as normalized
 // (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
-// is agnostic to where the origin is in the coordinate system and more
-// generally is invariant to orthogonal transformations and translations
+// is agnostic to where the origin is in the coordinate system.  Note that this
+// algorithm is invariant to orthogonal transformations and translations
 // of the coordinate system; thus translating or reflections of the coordinate
 // system result in the same boxes being selected by the algorithm.
 // The output of this operation is a set of integers indexing into the input
 // collection of bounding boxes representing the selected boxes.  The bounding
 // box coordinates corresponding to the selected indices can then be obtained
 // using the `tf.gather operation`.  For example:
-//   selected_indices = tf.image.non_max_suppression_v2(
-//       boxes, scores, max_output_size, iou_threshold, score_threshold)
+//   selected_indices = tf.image.non_max_suppression(
+//       boxes, scores, max_output_size, iou_threshold)
 //   selected_boxes = tf.gather(boxes, selected_indices)
-// This op also supports a Soft-NMS (with Gaussian weighting) mode (c.f.
-// Bodla et al, https://arxiv.org/abs/1704.04503) where boxes reduce the score
-// of other overlapping boxes instead of directly causing them to be pruned.
-// To enable this Soft-NMS mode, set the `soft_nms_sigma` parameter to be
-// larger than 0.
 //
 // Arguments:
 //	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
@@ -35797,21 +35836,10 @@ func NonMaxSuppressionV5PadToMaxOutputSize(value bool) NonMaxSuppressionV5Attr {
 // score corresponding to each box (each row of boxes).
 //	max_output_size: A scalar integer tensor representing the maximum number of
 // boxes to be selected by non max suppression.
-//	iou_threshold: A 0-D float tensor representing the threshold for deciding whether
-// boxes overlap too much with respect to IOU.
-//	score_threshold: A 0-D float tensor representing the threshold for deciding when to remove
-// boxes based on score.
-//	soft_nms_sigma: A 0-D float tensor representing the sigma parameter for Soft NMS; see Bodla et
-// al (c.f. https://arxiv.org/abs/1704.04503).  When `soft_nms_sigma=0.0` (which
-// is default), we fall back to standard (hard) NMS.
 //
 // Returns A 1-D integer tensor of shape `[M]` representing the selected
-// indices from the boxes tensor, where `M <= max_output_size`.A 1-D float tensor of shape `[M]` representing the corresponding
-// scores for each selected box, where `M <= max_output_size`.  Scores only differ
-// from corresponding input scores when using Soft NMS (i.e. when
-// `soft_nms_sigma>0`)A 0-D integer tensor representing the number of valid elements in
-// `selected_indices`, with the valid elements appearing first.
-func NonMaxSuppressionV5(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output, score_threshold tf.Output, soft_nms_sigma tf.Output, optional ...NonMaxSuppressionV5Attr) (selected_indices tf.Output, selected_scores tf.Output, valid_outputs tf.Output) {
+// indices from the boxes tensor, where `M <= max_output_size`.
+func NonMaxSuppression(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, optional ...NonMaxSuppressionAttr) (selected_indices tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -35820,67 +35848,80 @@ func NonMaxSuppressionV5(scope *Scope, boxes tf.Output, scores tf.Output, max_ou
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "NonMaxSuppressionV5",
+		Type: "NonMaxSuppression",
 		Input: []tf.Input{
-			boxes, scores, max_output_size, iou_threshold, score_threshold, soft_nms_sigma,
+			boxes, scores, max_output_size,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Computes the sum along sparse segments of a tensor divided by the sqrt of N.
-//
-// N is the size of the segment being reduced.
-//
-// Like `SparseSegmentSqrtN`, but allows missing ids in `segment_ids`. If an id is
-// misisng, the `output` tensor at that position will be zeroed.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
-// for an explanation of segments.
-//
-// Arguments:
-//
-//	indices: A 1-D tensor. Has same rank as `segment_ids`.
-//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
-//	num_segments: Should equal the number of distinct segment IDs.
-//
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SparseSegmentSqrtNWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSegmentSqrtNWithNumSegments",
-		Input: []tf.Input{
-			data, indices, segment_ids, num_segments,
-		},
-	}
-	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes inverse hyperbolic tangent of x element-wise.
+// Removes keys and its associated values from a table.
 //
-//   Given an input tensor, this function computes inverse hyperbolic tangent
-//   for every element in the tensor. Input range is `[-1,1]` and output range is
-//   `[-inf, inf]`. If input is `-1`, output will be `-inf` and if the
-//   input is `1`, output will be `inf`. Values outside the range will have
-//   `nan` as output.
+// The tensor `keys` must of the same type as the keys of the table. Keys not
+// already in the table are silently ignored.
 //
-//   ```python
-//   x = tf.constant([-float("inf"), -1, -0.5, 1, 0, 0.5, 10, float("inf")])
-//   tf.math.atanh(x) ==> [nan -inf -0.54930615 inf  0. 0.54930615 nan nan]
-//   ```
-func Atanh(scope *Scope, x tf.Output) (y tf.Output) {
+// Arguments:
+//	table_handle: Handle to the table.
+//	keys: Any shape.  Keys of the elements to remove.
+//
+// Returns the created operation.
+func LookupTableRemoveV2(scope *Scope, table_handle tf.Output, keys tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Atanh",
+		Type: "LookupTableRemoveV2",
+		Input: []tf.Input{
+			table_handle, keys,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// NotEqualAttr is an optional argument to NotEqual.
+type NotEqualAttr func(optionalAttr)
+
+// NotEqualIncompatibleShapeError sets the optional incompatible_shape_error attribute to value.
+// If not specified, defaults to true
+func NotEqualIncompatibleShapeError(value bool) NotEqualAttr {
+	return func(m optionalAttr) {
+		m["incompatible_shape_error"] = value
+	}
+}
+
+// Returns the truth value of (x != y) element-wise.
+//
+// *NOTE*: `NotEqual` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func NotEqual(scope *Scope, x tf.Output, y tf.Output, optional ...NotEqualAttr) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "NotEqual",
+		Input: []tf.Input{
+			x, y,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the complementary error function of `x` element-wise.
+func Erfc(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Erfc",
 		Input: []tf.Input{
 			x,
 		},
@@ -35889,101 +35930,48 @@ func Atanh(scope *Scope, x tf.Output) (y tf.Output) {
 	return op.Output(0)
 }
 
-// Creates a dataset that uses a custom thread pool to compute `input_dataset`.
-//
-// Arguments:
-//
-//	num_threads: Identifies the number of threads to use for the private threadpool.
-//
-//
-func ExperimentalPrivateThreadPoolDataset(scope *Scope, input_dataset tf.Output, num_threads tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalPrivateThreadPoolDataset",
-		Input: []tf.Input{
-			input_dataset, num_threads,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
+// RandomUniformIntAttr is an optional argument to RandomUniformInt.
+type RandomUniformIntAttr func(optionalAttr)
 
-// A container for an iterator resource.
+// RandomUniformIntSeed sets the optional seed attribute to value.
 //
-// Returns A handle to the iterator that can be passed to a "MakeIterator" or
-// "IteratorGetNext" op. In contrast to Iterator, AnonymousIterator prevents
-// resource sharing by name, and does not keep a reference to the resource
-// container.
-func AnonymousIterator(scope *Scope, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "AnonymousIterator",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes gradients for SparseSegmentSqrtN.
-//
-// Returns tensor "output" with same shape as grad, except for dimension 0 whose
-// value is output_dim0.
-//
-// Arguments:
-//	grad: gradient propagated to the SparseSegmentSqrtN op.
-//	indices: indices passed to the corresponding SparseSegmentSqrtN op.
-//	segment_ids: segment_ids passed to the corresponding SparseSegmentSqrtN op.
-//	output_dim0: dimension 0 of "data" passed to SparseSegmentSqrtN op.
-func SparseSegmentSqrtNGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSegmentSqrtNGrad",
-		Input: []tf.Input{
-			grad, indices, segment_ids, output_dim0,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ComplexAttr is an optional argument to Complex.
-type ComplexAttr func(optionalAttr)
-
-// ComplexTout sets the optional Tout attribute to value.
-// If not specified, defaults to DT_COMPLEX64
-func ComplexTout(value tf.DataType) ComplexAttr {
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func RandomUniformIntSeed(value int64) RandomUniformIntAttr {
 	return func(m optionalAttr) {
-		m["Tout"] = value
+		m["seed"] = value
 	}
 }
 
-// Converts two real numbers to a complex number.
+// RandomUniformIntSeed2 sets the optional seed2 attribute to value.
 //
-// Given a tensor `real` representing the real part of a complex number, and a
-// tensor `imag` representing the imaginary part of a complex number, this
-// operation returns complex numbers elementwise of the form \\(a + bj\\), where
-// *a* represents the `real` part and *b* represents the `imag` part.
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomUniformIntSeed2(value int64) RandomUniformIntAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Outputs random integers from a uniform distribution.
 //
-// The input tensors `real` and `imag` must have the same shape.
+// The generated values are uniform integers in the range `[minval, maxval)`.
+// The lower bound `minval` is included in the range, while the upper bound
+// `maxval` is excluded.
 //
-// For example:
+// The random integers are slightly biased unless `maxval - minval` is an exact
+// power of two.  The bias is small for values of `maxval - minval` significantly
+// smaller than the range of the output (either `2^32` or `2^64`).
 //
-// ```
-// # tensor 'real' is [2.25, 3.25]
-// # tensor `imag` is [4.75, 5.75]
-// tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]]
-// ```
-func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAttr) (out tf.Output) {
+// Arguments:
+//	shape: The shape of the output tensor.
+//	minval: 0-D.  Inclusive lower bound on the generated integers.
+//	maxval: 0-D.  Exclusive upper bound on the generated integers.
+//
+// Returns A tensor of the specified shape filled with uniform random integers.
+func RandomUniformInt(scope *Scope, shape tf.Output, minval tf.Output, maxval tf.Output, optional ...RandomUniformIntAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -35992,554 +35980,9 @@ func Complex(scope *Scope, real tf.Output, imag tf.Output, optional ...ComplexAt
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Complex",
+		Type: "RandomUniformInt",
 		Input: []tf.Input{
-			real, imag,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RealAttr is an optional argument to Real.
-type RealAttr func(optionalAttr)
-
-// RealTout sets the optional Tout attribute to value.
-// If not specified, defaults to DT_FLOAT
-func RealTout(value tf.DataType) RealAttr {
-	return func(m optionalAttr) {
-		m["Tout"] = value
-	}
-}
-
-// Returns the real part of a complex number.
-//
-// Given a tensor `input` of complex numbers, this operation returns a tensor of
-// type `float` that is the real part of each element in `input`. All elements in
-// `input` must be complex numbers of the form \\(a + bj\\), where *a* is the real
-//  part returned by this operation and *b* is the imaginary part.
-//
-// For example:
-//
-// ```
-// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
-// tf.real(input) ==> [-2.25, 3.25]
-// ```
-func Real(scope *Scope, input tf.Output, optional ...RealAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Real",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Convert the quantized 'input' tensor into a lower-precision 'output', using the
-//
-// actual distribution of the values to maximize the usage of the lower bit depth
-// and adjusting the output min and max ranges accordingly.
-//
-// [input_min, input_max] are scalar floats that specify the range for the float
-// interpretation of the 'input' data. For example, if input_min is -1.0f and
-// input_max is 1.0f, and we are dealing with quint16 quantized data, then a 0
-// value in the 16-bit data should be interpreted as -1.0f, and a 65535 means 1.0f.
-//
-// This operator tries to squeeze as much precision as possible into an output with
-// a lower bit depth by calculating the actual min and max values found in the
-// data. For example, maybe that quint16 input has no values lower than 16,384 and
-// none higher than 49,152. That means only half the range is actually needed, all
-// the float interpretations are between -0.5f and 0.5f, so if we want to compress
-// the data into a quint8 output, we can use that range rather than the theoretical
-// -1.0f to 1.0f that is suggested by the input min and max.
-//
-// In practice, this is most useful for taking output from operations like
-// QuantizedMatMul that can produce higher bit-depth outputs than their inputs and
-// may have large potential output ranges, but in practice have a distribution of
-// input values that only uses a small fraction of the possible range. By feeding
-// that output into this operator, we can reduce it from 32 bits down to 8 with
-// minimal loss of accuracy.
-//
-// Arguments:
-//
-//	input_min: The float value that the minimum quantized input value represents.
-//	input_max: The float value that the maximum quantized input value represents.
-//	out_type: The type of the output. Should be a lower bit depth than Tinput.
-//
-// Returns The float value that the minimum quantized output value represents.The float value that the maximum quantized output value represents.
-func QuantizeDownAndShrinkRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, out_type tf.DataType) (output tf.Output, output_min tf.Output, output_max tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"out_type": out_type}
-	opspec := tf.OpSpec{
-		Type: "QuantizeDownAndShrinkRange",
-		Input: []tf.Input{
-			input, input_min, input_max,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Scatter the data from the input value into specific TensorArray elements.
-//
-// `indices` must be a vector, its length must match the first dim of `value`.
-//
-// Arguments:
-//	handle: The handle to a TensorArray.
-//	indices: The locations at which to write the tensor elements.
-//	value: The concatenated tensor to write to the TensorArray.
-//	flow_in: A float scalar that enforces proper chaining of operations.
-//
-// Returns A float scalar that enforces proper chaining of operations.
-func TensorArrayScatterV3(scope *Scope, handle tf.Output, indices tf.Output, value tf.Output, flow_in tf.Output) (flow_out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayScatterV3",
-		Input: []tf.Input{
-			handle, indices, value, flow_in,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// AngleAttr is an optional argument to Angle.
-type AngleAttr func(optionalAttr)
-
-// AngleTout sets the optional Tout attribute to value.
-// If not specified, defaults to DT_FLOAT
-func AngleTout(value tf.DataType) AngleAttr {
-	return func(m optionalAttr) {
-		m["Tout"] = value
-	}
-}
-
-// Returns the argument of a complex number.
-//
-// Given a tensor `input` of complex numbers, this operation returns a tensor of
-// type `float` that is the argument of each element in `input`. All elements in
-// `input` must be complex numbers of the form \\(a + bj\\), where *a*
-// is the real part and *b* is the imaginary part.
-//
-// The argument returned by this operation is of the form \\(atan2(b, a)\\).
-//
-// For example:
-//
-// ```
-// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
-// tf.angle(input) ==> [2.0132, 1.056]
-// ```
-//
-// @compatibility(numpy)
-// Equivalent to np.angle.
-// @end_compatibility
-func Angle(scope *Scope, input tf.Output, optional ...AngleAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Angle",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResizeNearestNeighborAttr is an optional argument to ResizeNearestNeighbor.
-type ResizeNearestNeighborAttr func(optionalAttr)
-
-// ResizeNearestNeighborAlignCorners sets the optional align_corners attribute to value.
-//
-// value: If true, the centers of the 4 corner pixels of the input and output tensors are
-// aligned, preserving the values at the corner pixels. Defaults to false.
-// If not specified, defaults to false
-func ResizeNearestNeighborAlignCorners(value bool) ResizeNearestNeighborAttr {
-	return func(m optionalAttr) {
-		m["align_corners"] = value
-	}
-}
-
-// ResizeNearestNeighborHalfPixelCenters sets the optional half_pixel_centers attribute to value.
-// If not specified, defaults to false
-func ResizeNearestNeighborHalfPixelCenters(value bool) ResizeNearestNeighborAttr {
-	return func(m optionalAttr) {
-		m["half_pixel_centers"] = value
-	}
-}
-
-// Resize `images` to `size` using nearest neighbor interpolation.
-//
-// Arguments:
-//	images: 4-D with shape `[batch, height, width, channels]`.
-//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-// new size for the images.
-//
-// Returns 4-D with shape
-// `[batch, new_height, new_width, channels]`.
-func ResizeNearestNeighbor(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeNearestNeighborAttr) (resized_images tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResizeNearestNeighbor",
-		Input: []tf.Input{
-			images, size,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the complex conjugate of a complex number.
-//
-// Given a tensor `input` of complex numbers, this operation returns a tensor of
-// complex numbers that are the complex conjugate of each element in `input`. The
-// complex numbers in `input` must be of the form \\(a + bj\\), where *a* is the
-// real part and *b* is the imaginary part.
-//
-// The complex conjugate returned by this operation is of the form \\(a - bj\\).
-//
-// For example:
-//
-// ```
-// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
-// tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
-// ```
-func Conj(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Conj",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// StaticRegexReplaceAttr is an optional argument to StaticRegexReplace.
-type StaticRegexReplaceAttr func(optionalAttr)
-
-// StaticRegexReplaceReplaceGlobal sets the optional replace_global attribute to value.
-//
-// value: If True, the replacement is global, otherwise the replacement
-// is done only on the first match.
-// If not specified, defaults to true
-func StaticRegexReplaceReplaceGlobal(value bool) StaticRegexReplaceAttr {
-	return func(m optionalAttr) {
-		m["replace_global"] = value
-	}
-}
-
-// Replaces the match of pattern in input with rewrite.
-//
-// It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
-//
-// Arguments:
-//	input: The text to be processed.
-//	pattern: The regular expression to match the input.
-//	rewrite: The rewrite to be applied to the matched expression.
-//
-// Returns The text after applying pattern and rewrite.
-func StaticRegexReplace(scope *Scope, input tf.Output, pattern string, rewrite string, optional ...StaticRegexReplaceAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"pattern": pattern, "rewrite": rewrite}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StaticRegexReplace",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Counts the number of occurrences of each value in an integer array.
-//
-// Outputs a vector with length `size` and the same dtype as `weights`. If
-// `weights` are empty, then index `i` stores the number of times the value `i` is
-// counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of
-// the value in `weights` at each index where the corresponding value in `arr` is
-// `i`.
-//
-// Values in `arr` outside of the range [0, size) are ignored.
-//
-// Arguments:
-//	arr: int32 `Tensor`.
-//	size: non-negative int32 scalar `Tensor`.
-//	weights: is an int32, int64, float32, or float64 `Tensor` with the same
-// shape as `arr`, or a length-0 `Tensor`, in which case it acts as all weights
-// equal to 1.
-//
-// Returns 1D `Tensor` with length equal to `size`. The counts or summed weights for
-// each value in the range [0, size).
-func Bincount(scope *Scope, arr tf.Output, size tf.Output, weights tf.Output) (bins tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Bincount",
-		Input: []tf.Input{
-			arr, size, weights,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QuantizedMulAttr is an optional argument to QuantizedMul.
-type QuantizedMulAttr func(optionalAttr)
-
-// QuantizedMulToutput sets the optional Toutput attribute to value.
-// If not specified, defaults to DT_QINT32
-func QuantizedMulToutput(value tf.DataType) QuantizedMulAttr {
-	return func(m optionalAttr) {
-		m["Toutput"] = value
-	}
-}
-
-// Returns x * y element-wise, working on quantized buffers.
-//
-// Arguments:
-//
-//
-//	min_x: The float value that the lowest quantized `x` value represents.
-//	max_x: The float value that the highest quantized `x` value represents.
-//	min_y: The float value that the lowest quantized `y` value represents.
-//	max_y: The float value that the highest quantized `y` value represents.
-//
-// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
-//
-// *NOTE*: `QuantizedMul` supports limited forms of broadcasting. More about
-// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func QuantizedMul(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedMulAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedMul",
-		Input: []tf.Input{
-			x, y, min_x, max_x, min_y, max_y,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// QuantizedAddAttr is an optional argument to QuantizedAdd.
-type QuantizedAddAttr func(optionalAttr)
-
-// QuantizedAddToutput sets the optional Toutput attribute to value.
-// If not specified, defaults to DT_QINT32
-func QuantizedAddToutput(value tf.DataType) QuantizedAddAttr {
-	return func(m optionalAttr) {
-		m["Toutput"] = value
-	}
-}
-
-// Returns x + y element-wise, working on quantized buffers.
-//
-// Arguments:
-//
-//
-//	min_x: The float value that the lowest quantized `x` value represents.
-//	max_x: The float value that the highest quantized `x` value represents.
-//	min_y: The float value that the lowest quantized `y` value represents.
-//	max_y: The float value that the highest quantized `y` value represents.
-//
-// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
-//
-// *NOTE*: `QuantizedAdd` supports limited forms of broadcasting. More about
-// broadcasting [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func QuantizedAdd(scope *Scope, x tf.Output, y tf.Output, min_x tf.Output, max_x tf.Output, min_y tf.Output, max_y tf.Output, optional ...QuantizedAddAttr) (z tf.Output, min_z tf.Output, max_z tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedAdd",
-		Input: []tf.Input{
-			x, y, min_x, max_x, min_y, max_y,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Computes the sum along segments of a tensor.
-//
-// Read
-// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
-// for an explanation of segments.
-//
-// Computes a tensor such that
-// \\(output[i] = \sum_{j...} data[j...]\\) where the sum is over tuples `j...` such
-// that `segment_ids[j...] == i`.  Unlike `SegmentSum`, `segment_ids`
-// need not be sorted and need not cover all values in the full
-// range of valid values.
-//
-// If the sum is empty for a given segment ID `i`, `output[i] = 0`.
-// If the given segment ID `i` is negative, the value is dropped and will not be
-// added to the sum of the segment.
-//
-// `num_segments` should equal the number of distinct segment IDs.
-//
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/UnsortedSegmentSum.png" alt>
-// </div>
-//
-// ``` python
-// c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]])
-// tf.unsorted_segment_sum(c, tf.constant([0, 1, 0]), num_segments=2)
-// # ==> [[ 5,  5, 5, 5],
-// #       [5,  6, 7, 8]]
-// ```
-//
-//
-// Arguments:
-//
-//	segment_ids: A tensor whose shape is a prefix of `data.shape`.
-//
-//
-// Returns Has same shape as data, except for the first `segment_ids.rank`
-// dimensions, which are replaced with a single dimension which has size
-// `num_segments`.
-func UnsortedSegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "UnsortedSegmentSum",
-		Input: []tf.Input{
-			data, segment_ids, num_segments,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Resizes the list.
-//
-//
-// input_handle: the input list
-// size: size of the output list
-//
-func TensorListResize(scope *Scope, input_handle tf.Output, size tf.Output) (output_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorListResize",
-		Input: []tf.Input{
-			input_handle, size,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes a range that covers the actual values present in a quantized tensor.
-//
-// Given a quantized tensor described by `(input, input_min, input_max)`, outputs a
-// range that covers the actual values present in that tensor. This op is typically
-// used to produce the `requested_output_min` and `requested_output_max` for
-// `Requantize`.
-//
-// Arguments:
-//
-//	input_min: The float value that the minimum quantized input value represents.
-//	input_max: The float value that the maximum quantized input value represents.
-//
-// Returns The computed min output.the computed max output.
-func RequantizationRange(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output) (output_min tf.Output, output_max tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "RequantizationRange",
-		Input: []tf.Input{
-			input, input_min, input_max,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Bucketizes 'input' based on 'boundaries'.
-//
-// For example, if the inputs are
-//     boundaries = [0, 10, 100]
-//     input = [[-5, 10000]
-//              [150,   10]
-//              [5,    100]]
-//
-// then the output will be
-//     output = [[0, 3]
-//               [3, 2]
-//               [1, 3]]
-//
-// Arguments:
-//	input: Any shape of Tensor contains with int or float type.
-//	boundaries: A sorted list of floats gives the boundary of the buckets.
-//
-// Returns Same shape with 'input', each value of input replaced with bucket index.
-//
-// @compatibility(numpy)
-// Equivalent to np.digitize.
-// @end_compatibility
-func Bucketize(scope *Scope, input tf.Output, boundaries []float32) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"boundaries": boundaries}
-	opspec := tf.OpSpec{
-		Type: "Bucketize",
-		Input: []tf.Input{
-			input,
+			shape, minval, maxval,
 		},
 		Attrs: attrs,
 	}
@@ -36619,94 +36062,41 @@ func StridedSliceGrad(scope *Scope, shape tf.Output, begin tf.Output, end tf.Out
 	return op.Output(0)
 }
 
-// Selects num_to_sample rows of input using the KMeans++ criterion.
+// ResourceApplyFtrlAttr is an optional argument to ResourceApplyFtrl.
+type ResourceApplyFtrlAttr func(optionalAttr)
+
+// ResourceApplyFtrlUseLocking sets the optional use_locking attribute to value.
 //
-// Rows of points are assumed to be input points. One row is selected at random.
-// Subsequent rows are sampled with probability proportional to the squared L2
-// distance from the nearest row selected thus far till num_to_sample rows have
-// been sampled.
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyFtrlUseLocking(value bool) ResourceApplyFtrlAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the Ftrl-proximal scheme.
+//
+// accum_new = accum + grad * grad
+// linear += grad - (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+// accum = accum_new
 //
 // Arguments:
-//	points: Matrix of shape (n, d). Rows are assumed to be input points.
-//	num_to_sample: Scalar. The number of rows to sample. This value must not be larger than n.
-//	seed: Scalar. Seed for initializing the random number generator.
-//	num_retries_per_sample: Scalar. For each row that is sampled, this parameter
-// specifies the number of additional points to draw from the current
-// distribution before selecting the best. If a negative value is specified, a
-// heuristic is used to sample O(log(num_to_sample)) additional points.
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	linear: Should be from a Variable().
+//	grad: The gradient.
+//	lr: Scaling factor. Must be a scalar.
+//	l1: L1 regulariation. Must be a scalar.
+//	l2: L2 regulariation. Must be a scalar.
+//	lr_power: Scaling factor. Must be a scalar.
 //
-// Returns Matrix of shape (num_to_sample, d). The sampled rows.
-func KmeansPlusPlusInitialization(scope *Scope, points tf.Output, num_to_sample tf.Output, seed tf.Output, num_retries_per_sample tf.Output) (samples tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "KmeansPlusPlusInitialization",
-		Input: []tf.Input{
-			points, num_to_sample, seed, num_retries_per_sample,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// FusedBatchNormGradV2Attr is an optional argument to FusedBatchNormGradV2.
-type FusedBatchNormGradV2Attr func(optionalAttr)
-
-// FusedBatchNormGradV2Epsilon sets the optional epsilon attribute to value.
-//
-// value: A small float number added to the variance of x.
-// If not specified, defaults to 0.0001
-func FusedBatchNormGradV2Epsilon(value float32) FusedBatchNormGradV2Attr {
-	return func(m optionalAttr) {
-		m["epsilon"] = value
-	}
-}
-
-// FusedBatchNormGradV2DataFormat sets the optional data_format attribute to value.
-//
-// value: The data format for y_backprop, x, x_backprop.
-// Either "NHWC" (default) or "NCHW".
-// If not specified, defaults to "NHWC"
-func FusedBatchNormGradV2DataFormat(value string) FusedBatchNormGradV2Attr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// FusedBatchNormGradV2IsTraining sets the optional is_training attribute to value.
-//
-// value: A bool value to indicate the operation is for training (default)
-// or inference.
-// If not specified, defaults to true
-func FusedBatchNormGradV2IsTraining(value bool) FusedBatchNormGradV2Attr {
-	return func(m optionalAttr) {
-		m["is_training"] = value
-	}
-}
-
-// Gradient for batch normalization.
-//
-// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
-// The size of 1D Tensors matches the dimension C of the 4D Tensors.
-//
-// Arguments:
-//	y_backprop: A 4D Tensor for the gradient with respect to y.
-//	x: A 4D Tensor for input data.
-//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
-//	reserve_space_1: When is_training is True, a 1D Tensor for the computed batch
-// mean to be reused in gradient computation. When is_training is
-// False, a 1D Tensor for the population mean to be reused in both
-// 1st and 2nd order gradient computation.
-//	reserve_space_2: When is_training is True, a 1D Tensor for the computed batch
-// variance (inverted variance in the cuDNN case) to be reused in
-// gradient computation. When is_training is False, a 1D Tensor
-// for the population variance to be reused in both 1st and 2nd
-// order gradient computation.
-//
-// Returns A 4D Tensor for the gradient with respect to x.A 1D Tensor for the gradient with respect to scale.A 1D Tensor for the gradient with respect to offset.Unused placeholder to match the mean input in FusedBatchNorm.Unused placeholder to match the variance input
-// in FusedBatchNorm.
-func FusedBatchNormGradV2(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, optional ...FusedBatchNormGradV2Attr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_3 tf.Output, reserve_space_4 tf.Output) {
+// Returns the created operation.
+func ResourceApplyFtrl(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, lr_power tf.Output, optional ...ResourceApplyFtrlAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -36715,33 +36105,660 @@ func FusedBatchNormGradV2(scope *Scope, y_backprop tf.Output, x tf.Output, scale
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "FusedBatchNormGradV2",
+		Type: "ResourceApplyFtrl",
 		Input: []tf.Input{
-			y_backprop, x, scale, reserve_space_1, reserve_space_2,
+			var_, accum, linear, grad, lr, l1, l2, lr_power,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Creates a dataset that contains the unique elements of `input_dataset`.
+func ExperimentalUniqueDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalUniqueDataset",
+		Input: []tf.Input{
+			input_dataset,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+	return op.Output(0)
 }
 
-// Ensures that the tensor's shape matches the expected shape.
+// StringFormatAttr is an optional argument to StringFormat.
+type StringFormatAttr func(optionalAttr)
+
+// StringFormatTemplate sets the optional template attribute to value.
 //
-// Raises an error if the input tensor's shape does not match the specified shape.
-// Returns the input tensor otherwise.
+// value: A string, the template to format tensor summaries into.
+// If not specified, defaults to "%s"
+func StringFormatTemplate(value string) StringFormatAttr {
+	return func(m optionalAttr) {
+		m["template"] = value
+	}
+}
+
+// StringFormatPlaceholder sets the optional placeholder attribute to value.
+//
+// value: A string, at each placeholder in the template a subsequent tensor summary will be inserted.
+// If not specified, defaults to "%s"
+func StringFormatPlaceholder(value string) StringFormatAttr {
+	return func(m optionalAttr) {
+		m["placeholder"] = value
+	}
+}
+
+// StringFormatSummarize sets the optional summarize attribute to value.
+//
+// value: When formatting the tensor summaries print the first and last summarize entries of each tensor dimension.
+// If not specified, defaults to 3
+func StringFormatSummarize(value int64) StringFormatAttr {
+	return func(m optionalAttr) {
+		m["summarize"] = value
+	}
+}
+
+// Formats a string template using a list of tensors.
+//
+// Formats a string template using a list of tensors, pretty-printing tensor summaries.
 //
 // Arguments:
-//	input: A tensor, whose shape is to be validated.
-//	shape: The expected (possibly partially specified) shape of the input tensor.
+//	inputs: The list of tensors to format into the placeholder string.
 //
-// Returns A tensor with the same shape and contents as the input tensor or value.
-func EnsureShape(scope *Scope, input tf.Output, shape tf.Shape) (output tf.Output) {
+// Returns = The resulting string scalar.
+func StringFormat(scope *Scope, inputs []tf.Output, optional ...StringFormatAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"shape": shape}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "EnsureShape",
+		Type: "StringFormat",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Converts a SparseTensor to a (possibly batched) CSRSparseMatrix.
+//
+// Arguments:
+//	indices: SparseTensor indices.
+//	values: SparseTensor values.
+//	dense_shape: SparseTensor dense shape.
+//
+// Returns A (possibly batched) CSRSparseMatrix.
+func SparseTensorToCSRSparseMatrix(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output) (sparse_matrix tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseTensorToCSRSparseMatrix",
+		Input: []tf.Input{
+			indices, values, dense_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes fingerprints of the input strings.
+//
+// Arguments:
+//	input: vector of strings to compute fingerprints on.
+//
+// Returns a (N,2) shaped matrix where N is the number of elements in the input
+// vector. Each row contains the low and high parts of the fingerprint.
+func SdcaFprint(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SdcaFprint",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ParseSequenceExampleAttr is an optional argument to ParseSequenceExample.
+type ParseSequenceExampleAttr func(optionalAttr)
+
+// ParseSequenceExampleNcontextSparse sets the optional Ncontext_sparse attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func ParseSequenceExampleNcontextSparse(value int64) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["Ncontext_sparse"] = value
+	}
+}
+
+// ParseSequenceExampleNcontextDense sets the optional Ncontext_dense attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func ParseSequenceExampleNcontextDense(value int64) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["Ncontext_dense"] = value
+	}
+}
+
+// ParseSequenceExampleNfeatureListSparse sets the optional Nfeature_list_sparse attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func ParseSequenceExampleNfeatureListSparse(value int64) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["Nfeature_list_sparse"] = value
+	}
+}
+
+// ParseSequenceExampleNfeatureListDense sets the optional Nfeature_list_dense attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func ParseSequenceExampleNfeatureListDense(value int64) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["Nfeature_list_dense"] = value
+	}
+}
+
+// ParseSequenceExampleContextSparseTypes sets the optional context_sparse_types attribute to value.
+//
+// value: A list of Ncontext_sparse types; the data types of data in
+// each context Feature given in context_sparse_keys.
+// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
+// DT_INT64 (Int64List), and DT_STRING (BytesList).
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleContextSparseTypes(value []tf.DataType) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["context_sparse_types"] = value
+	}
+}
+
+// ParseSequenceExampleFeatureListDenseTypes sets the optional feature_list_dense_types attribute to value.
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleFeatureListDenseTypes(value []tf.DataType) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["feature_list_dense_types"] = value
+	}
+}
+
+// ParseSequenceExampleContextDenseShapes sets the optional context_dense_shapes attribute to value.
+//
+// value: A list of Ncontext_dense shapes; the shapes of data in
+// each context Feature given in context_dense_keys.
+// The number of elements in the Feature corresponding to context_dense_key[j]
+// must always equal context_dense_shapes[j].NumEntries().
+// The shape of context_dense_values[j] will match context_dense_shapes[j].
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleContextDenseShapes(value []tf.Shape) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["context_dense_shapes"] = value
+	}
+}
+
+// ParseSequenceExampleFeatureListSparseTypes sets the optional feature_list_sparse_types attribute to value.
+//
+// value: A list of Nfeature_list_sparse types; the data types
+// of data in each FeatureList given in feature_list_sparse_keys.
+// Currently the ParseSingleSequenceExample supports DT_FLOAT (FloatList),
+// DT_INT64 (Int64List), and DT_STRING (BytesList).
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleFeatureListSparseTypes(value []tf.DataType) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["feature_list_sparse_types"] = value
+	}
+}
+
+// ParseSequenceExampleFeatureListDenseShapes sets the optional feature_list_dense_shapes attribute to value.
+//
+// value: A list of Nfeature_list_dense shapes; the shapes of
+// data in each FeatureList given in feature_list_dense_keys.
+// The shape of each Feature in the FeatureList corresponding to
+// feature_list_dense_key[j] must always equal
+// feature_list_dense_shapes[j].NumEntries().
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func ParseSequenceExampleFeatureListDenseShapes(value []tf.Shape) ParseSequenceExampleAttr {
+	return func(m optionalAttr) {
+		m["feature_list_dense_shapes"] = value
+	}
+}
+
+// Transforms a vector of brain.SequenceExample protos (as strings) into typed tensors.
+//
+// Arguments:
+//	serialized: A vector containing binary serialized SequenceExample protos.
+//	debug_name: A vector containing the names of the serialized protos.
+// May contain, for example, table key (descriptive) name for the
+// corresponding serialized proto.  This is purely useful for debugging
+// purposes, and the presence of values here has no effect on the output.
+// May also be an empty vector if no name is available.
+//	context_dense_defaults: A list of Ncontext_dense Tensors (some may be empty).
+// context_dense_defaults[j] provides default values
+// when the SequenceExample's context map lacks context_dense_key[j].
+// If an empty Tensor is provided for context_dense_defaults[j],
+// then the Feature context_dense_keys[j] is required.
+// The input type is inferred from context_dense_defaults[j], even when it's
+// empty.  If context_dense_defaults[j] is not empty, its shape must match
+// context_dense_shapes[j].
+//	feature_list_dense_missing_assumed_empty: A vector listing the
+// FeatureList keys which may be missing from the SequenceExamples.  If the
+// associated FeatureList is missing, it is treated as empty.  By default,
+// any FeatureList not listed in this vector must exist in the SequenceExamples.
+//	context_sparse_keys: A list of Ncontext_sparse string Tensors (scalars).
+// The keys expected in the Examples' features associated with context_sparse
+// values.
+//	context_dense_keys: A list of Ncontext_dense string Tensors (scalars).
+// The keys expected in the SequenceExamples' context features associated with
+// dense values.
+//	feature_list_sparse_keys: A list of Nfeature_list_sparse string Tensors
+// (scalars).  The keys expected in the FeatureLists associated with sparse
+// values.
+//	feature_list_dense_keys: A list of Nfeature_list_dense string Tensors (scalars).
+// The keys expected in the SequenceExamples' feature_lists associated
+// with lists of dense values.
+func ParseSequenceExample(scope *Scope, serialized tf.Output, debug_name tf.Output, context_dense_defaults []tf.Output, feature_list_dense_missing_assumed_empty []string, context_sparse_keys []string, context_dense_keys []string, feature_list_sparse_keys []string, feature_list_dense_keys []string, optional ...ParseSequenceExampleAttr) (context_sparse_indices []tf.Output, context_sparse_values []tf.Output, context_sparse_shapes []tf.Output, context_dense_values []tf.Output, feature_list_sparse_indices []tf.Output, feature_list_sparse_values []tf.Output, feature_list_sparse_shapes []tf.Output, feature_list_dense_values []tf.Output, feature_list_dense_lengths []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"feature_list_dense_missing_assumed_empty": feature_list_dense_missing_assumed_empty, "context_sparse_keys": context_sparse_keys, "context_dense_keys": context_dense_keys, "feature_list_sparse_keys": feature_list_sparse_keys, "feature_list_dense_keys": feature_list_dense_keys}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ParseSequenceExample",
+		Input: []tf.Input{
+			serialized, debug_name, tf.OutputList(context_dense_defaults),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if context_sparse_indices, idx, err = makeOutputList(op, idx, "context_sparse_indices"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	if context_sparse_values, idx, err = makeOutputList(op, idx, "context_sparse_values"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	if context_sparse_shapes, idx, err = makeOutputList(op, idx, "context_sparse_shapes"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	if context_dense_values, idx, err = makeOutputList(op, idx, "context_dense_values"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	if feature_list_sparse_indices, idx, err = makeOutputList(op, idx, "feature_list_sparse_indices"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	if feature_list_sparse_values, idx, err = makeOutputList(op, idx, "feature_list_sparse_values"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	if feature_list_sparse_shapes, idx, err = makeOutputList(op, idx, "feature_list_sparse_shapes"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	if feature_list_dense_values, idx, err = makeOutputList(op, idx, "feature_list_dense_values"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	if feature_list_dense_lengths, idx, err = makeOutputList(op, idx, "feature_list_dense_lengths"); err != nil {
+		scope.UpdateErr("ParseSequenceExample", err)
+		return
+	}
+	return context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, feature_list_sparse_indices, feature_list_sparse_values, feature_list_sparse_shapes, feature_list_dense_values, feature_list_dense_lengths
+}
+
+// Returns true if queue is closed.
+//
+// This operation returns true if the queue is closed and false if the queue
+// is open.
+//
+// Arguments:
+//	handle: The handle to a queue.
+func QueueIsClosedV2(scope *Scope, handle tf.Output) (is_closed tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "QueueIsClosedV2",
+		Input: []tf.Input{
+			handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Inverse 3D fast Fourier transform.
+//
+// Computes the inverse 3-dimensional discrete Fourier transform over the
+// inner-most 3 dimensions of `input`.
+//
+// Arguments:
+//	input: A complex tensor.
+//
+// Returns A complex tensor of the same shape as `input`. The inner-most 3
+//   dimensions of `input` are replaced with their inverse 3D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.ifftn with 3 dimensions.
+// @end_compatibility
+func IFFT3D(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "IFFT3D",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// QueueDequeueUpToV2Attr is an optional argument to QueueDequeueUpToV2.
+type QueueDequeueUpToV2Attr func(optionalAttr)
+
+// QueueDequeueUpToV2TimeoutMs sets the optional timeout_ms attribute to value.
+//
+// value: If the queue has fewer than n elements, this operation
+// will block for up to timeout_ms milliseconds.
+// Note: This option is not supported yet.
+// If not specified, defaults to -1
+func QueueDequeueUpToV2TimeoutMs(value int64) QueueDequeueUpToV2Attr {
+	return func(m optionalAttr) {
+		m["timeout_ms"] = value
+	}
+}
+
+// Dequeues `n` tuples of one or more tensors from the given queue.
+//
+// This operation is not supported by all queues.  If a queue does not support
+// DequeueUpTo, then an Unimplemented error is returned.
+//
+// If the queue is closed and there are more than 0 but less than `n`
+// elements remaining, then instead of returning an OutOfRange error like
+// QueueDequeueMany, less than `n` elements are returned immediately.  If
+// the queue is closed and there are 0 elements left in the queue, then
+// an OutOfRange error is returned just like in QueueDequeueMany.
+// Otherwise the behavior is identical to QueueDequeueMany:
+//
+// This operation concatenates queue-element component tensors along the
+// 0th dimension to make a single component tensor.  All of the components
+// in the dequeued tuple will have size n in the 0th dimension.
+//
+// This operation has `k` outputs, where `k` is the number of components in
+// the tuples stored in the given queue, and output `i` is the ith
+// component of the dequeued tuple.
+//
+// Arguments:
+//	handle: The handle to a queue.
+//	n: The number of tuples to dequeue.
+//	component_types: The type of each component in a tuple.
+//
+// Returns One or more tensors that were dequeued as a tuple.
+func QueueDequeueUpToV2(scope *Scope, handle tf.Output, n tf.Output, component_types []tf.DataType, optional ...QueueDequeueUpToV2Attr) (components []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"component_types": component_types}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QueueDequeueUpToV2",
+		Input: []tf.Input{
+			handle, n,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("QueueDequeueUpToV2", err)
+		return
+	}
+	return components
+}
+
+// Adjust the hue of one or more images.
+//
+// `images` is a tensor of at least 3 dimensions.  The last dimension is
+// interpretted as channels, and must be three.
+//
+// The input image is considered in the RGB colorspace. Conceptually, the RGB
+// colors are first mapped into HSV. A delta is then applied all the hue values,
+// and then remapped back to RGB colorspace.
+//
+// Arguments:
+//	images: Images to adjust.  At least 3-D.
+//	delta: A float delta to add to the hue.
+//
+// Returns The hue-adjusted image or images.
+func AdjustHue(scope *Scope, images tf.Output, delta tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "AdjustHue",
+		Input: []tf.Input{
+			images, delta,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the product along segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+// for an explanation of segments.
+//
+// This operator is similar to the unsorted segment sum operator found
+// [(here)](../../../api_docs/python/math_ops.md#UnsortedSegmentSum).
+// Instead of computing the sum over segments, it computes the product of all
+// entries belonging to a segment such that:
+//
+// \\(output_i = \prod_{j...} data[j...]\\) where the product is over tuples
+// `j...` such that `segment_ids[j...] == i`.
+//
+// For example:
+//
+// ``` python
+// c = tf.constant([[1,2,3,4], [5,6,7,8], [4,3,2,1]])
+// tf.unsorted_segment_prod(c, tf.constant([0, 1, 0]), num_segments=2)
+// # ==> [[ 4,  6, 6, 4],
+// #       [5,  6, 7, 8]]
+// ```
+//
+// If there is no entry for a given segment ID `i`, it outputs 1.
+//
+// If the given segment ID `i` is negative, then the corresponding value is
+// dropped, and will not be included in the result.
+//
+// Arguments:
+//
+//	segment_ids: A tensor whose shape is a prefix of `data.shape`.
+//
+//
+// Returns Has same shape as data, except for the first `segment_ids.rank`
+// dimensions, which are replaced with a single dimension which has size
+// `num_segments`.
+func UnsortedSegmentProd(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "UnsortedSegmentProd",
+		Input: []tf.Input{
+			data, segment_ids, num_segments,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// FakeQuantWithMinMaxVarsPerChannelAttr is an optional argument to FakeQuantWithMinMaxVarsPerChannel.
+type FakeQuantWithMinMaxVarsPerChannelAttr func(optionalAttr)
+
+// FakeQuantWithMinMaxVarsPerChannelNumBits sets the optional num_bits attribute to value.
+// If not specified, defaults to 8
+func FakeQuantWithMinMaxVarsPerChannelNumBits(value int64) FakeQuantWithMinMaxVarsPerChannelAttr {
+	return func(m optionalAttr) {
+		m["num_bits"] = value
+	}
+}
+
+// FakeQuantWithMinMaxVarsPerChannelNarrowRange sets the optional narrow_range attribute to value.
+// If not specified, defaults to false
+func FakeQuantWithMinMaxVarsPerChannelNarrowRange(value bool) FakeQuantWithMinMaxVarsPerChannelAttr {
+	return func(m optionalAttr) {
+		m["narrow_range"] = value
+	}
+}
+
+// Fake-quantize the 'inputs' tensor of type float and one of the shapes: `[d]`,
+//
+// `[b, d]` `[b, h, w, d]` via per-channel floats `min` and `max` of shape `[d]`
+// to 'outputs' tensor of same shape as `inputs`.
+//
+// `[min; max]` define the clamping range for the `inputs` data.
+// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
+// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
+// then de-quantized and output as floats in `[min; max]` interval.
+// `num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive.
+//
+// Before quantization, `min` and `max` values are adjusted with the following
+// logic.
+// It is suggested to have `min <= 0 <= max`. If `0` is not in the range of values,
+// the behavior can be unexpected:
+// If `0 < min < max`: `min_adj = 0` and `max_adj = max - min`.
+// If `min < max < 0`: `min_adj = min - max` and `max_adj = 0`.
+// If `min <= 0 <= max`: `scale = (max - min) / (2^num_bits - 1) `,
+// `min_adj = scale * round(min / scale)` and `max_adj = max + min_adj - min`.
+//
+// This operation has a gradient and thus allows for training `min` and `max`
+// values.
+func FakeQuantWithMinMaxVarsPerChannel(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsPerChannelAttr) (outputs tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "FakeQuantWithMinMaxVarsPerChannel",
+		Input: []tf.Input{
+			inputs, min, max,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Checks whether a resource handle-based variable has been initialized.
+//
+// Arguments:
+//	resource: the input resource handle.
+//
+// Returns a scalar boolean which is true if the variable has been
+// initialized.
+func VarIsInitializedOp(scope *Scope, resource tf.Output) (is_initialized tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "VarIsInitializedOp",
+		Input: []tf.Input{
+			resource,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// AngleAttr is an optional argument to Angle.
+type AngleAttr func(optionalAttr)
+
+// AngleTout sets the optional Tout attribute to value.
+// If not specified, defaults to DT_FLOAT
+func AngleTout(value tf.DataType) AngleAttr {
+	return func(m optionalAttr) {
+		m["Tout"] = value
+	}
+}
+
+// Returns the argument of a complex number.
+//
+// Given a tensor `input` of complex numbers, this operation returns a tensor of
+// type `float` that is the argument of each element in `input`. All elements in
+// `input` must be complex numbers of the form \\(a + bj\\), where *a*
+// is the real part and *b* is the imaginary part.
+//
+// The argument returned by this operation is of the form \\(atan2(b, a)\\).
+//
+// For example:
+//
+// ```
+// # tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+// tf.angle(input) ==> [2.0132, 1.056]
+// ```
+//
+// @compatibility(numpy)
+// Equivalent to np.angle.
+// @end_compatibility
+func Angle(scope *Scope, input tf.Output, optional ...AngleAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Angle",
 		Input: []tf.Input{
 			input,
 		},
@@ -36751,687 +36768,538 @@ func EnsureShape(scope *Scope, input tf.Output, shape tf.Shape) (output tf.Outpu
 	return op.Output(0)
 }
 
-// Returns x / y element-wise for integer types.
+// ShardDatasetAttr is an optional argument to ShardDataset.
+type ShardDatasetAttr func(optionalAttr)
+
+// ShardDatasetRequireNonEmpty sets the optional require_non_empty attribute to value.
+// If not specified, defaults to false
+func ShardDatasetRequireNonEmpty(value bool) ShardDatasetAttr {
+	return func(m optionalAttr) {
+		m["require_non_empty"] = value
+	}
+}
+
+// Creates a `Dataset` that includes only 1/`num_shards` of this dataset.
 //
-// Truncation designates that negative numbers will round fractional quantities
-// toward zero. I.e. -7 / 5 = -1. This matches C semantics but it is different
-// than Python semantics. See `FloorDiv` for a division function that matches
-// Python Semantics.
+// Arguments:
 //
-// *NOTE*: `TruncateDiv` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func TruncateDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+//	num_shards: An integer representing the number of shards operating in parallel.
+//	index: An integer representing the current worker index.
+//
+//
+func ShardDataset(scope *Scope, input_dataset tf.Output, num_shards tf.Output, index tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ShardDatasetAttr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ShardDataset",
+		Input: []tf.Input{
+			input_dataset, num_shards, index,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes natural logarithm of x element-wise.
+//
+// I.e., \\(y = \log_e x\\).
+//
+// Example:
+//
+// ```python
+// x = tf.constant([0, 0.5, 1, 5])
+// tf.math.log(x) ==> [-inf, -0.6931472,  0. ,  1.609438]
+// ```
+func Log(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "TruncateDiv",
+		Type: "Log",
 		Input: []tf.Input{
-			x, y,
+			x,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Fills empty rows in the input 2-D `SparseTensor` with a default value.
-//
-// The input `SparseTensor` is represented via the tuple of inputs
-// (`indices`, `values`, `dense_shape`).  The output `SparseTensor` has the
-// same `dense_shape` but with indices `output_indices` and values
-// `output_values`.
-//
-// This op inserts a single entry for every row that doesn't have any values.
-// The index is created as `[row, 0, ..., 0]` and the inserted value
-// is `default_value`.
-//
-// For example, suppose `sp_input` has shape `[5, 6]` and non-empty values:
-//
-//     [0, 1]: a
-//     [0, 3]: b
-//     [2, 0]: c
-//     [3, 1]: d
-//
-// Rows 1 and 4 are empty, so the output will be of shape `[5, 6]` with values:
-//
-//     [0, 1]: a
-//     [0, 3]: b
-//     [1, 0]: default_value
-//     [2, 0]: c
-//     [3, 1]: d
-//     [4, 0]: default_value
-//
-// The output `SparseTensor` will be in row-major order and will have the
-// same shape as the input.
-//
-// This op also returns an indicator vector shaped `[dense_shape[0]]` such that
-//
-//     empty_row_indicator[i] = True iff row i was an empty row.
-//
-// And a reverse index map vector shaped `[indices.shape[0]]` that is used during
-// backpropagation,
-//
-//     reverse_index_map[j] = out_j s.t. indices[j, :] == output_indices[out_j, :]
-//
-// Arguments:
-//	indices: 2-D. the indices of the sparse tensor.
-//	values: 1-D. the values of the sparse tensor.
-//	dense_shape: 1-D. the shape of the sparse tensor.
-//	default_value: 0-D. default value to insert into location `[row, 0, ..., 0]`
-//   for rows missing from the input sparse tensor.
-// output indices: 2-D. the indices of the filled sparse tensor.
-//
-// Returns 1-D. the values of the filled sparse tensor.1-D. whether the dense row was missing in the
-// input sparse tensor.1-D. a map from the input indices to the output indices.
-func SparseFillEmptyRows(scope *Scope, indices tf.Output, values tf.Output, dense_shape tf.Output, default_value tf.Output) (output_indices tf.Output, output_values tf.Output, empty_row_indicator tf.Output, reverse_index_map tf.Output) {
-	if scope.Err() != nil {
-		return
+// IRFFT2DAttr is an optional argument to IRFFT2D.
+type IRFFT2DAttr func(optionalAttr)
+
+// IRFFT2DTreal sets the optional Treal attribute to value.
+// If not specified, defaults to DT_FLOAT
+func IRFFT2DTreal(value tf.DataType) IRFFT2DAttr {
+	return func(m optionalAttr) {
+		m["Treal"] = value
 	}
-	opspec := tf.OpSpec{
-		Type: "SparseFillEmptyRows",
-		Input: []tf.Input{
-			indices, values, dense_shape, default_value,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
 }
 
-// Clips tensor values to a specified min and max.
+// Inverse 2D real-valued fast Fourier transform.
 //
-// Given a tensor `t`, this operation returns a tensor of the same type and
-// shape as `t` with its values clipped to `clip_value_min` and `clip_value_max`.
-// Any values less than `clip_value_min` are set to `clip_value_min`. Any values
-// greater than `clip_value_max` are set to `clip_value_max`.
+// Computes the inverse 2-dimensional discrete Fourier transform of a real-valued
+// signal over the inner-most 2 dimensions of `input`.
+//
+// The inner-most 2 dimensions of `input` are assumed to be the result of `RFFT2D`:
+// The inner-most dimension contains the `fft_length / 2 + 1` unique components of
+// the DFT of a real-valued signal. If `fft_length` is not provided, it is computed
+// from the size of the inner-most 2 dimensions of `input`. If the FFT length used
+// to compute `input` is odd, it should be provided since it cannot be inferred
+// properly.
+//
+// Along each axis `IRFFT2D` is computed on, if `fft_length` (or
+// `fft_length / 2 + 1` for the inner-most dimension) is smaller than the
+// corresponding dimension of `input`, the dimension is cropped. If it is larger,
+// the dimension is padded with zeros.
 //
 // Arguments:
-//	t: A `Tensor`.
-//	clip_value_min: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape
-// as `t`. The minimum value to clip by.
-//	clip_value_max: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape
-// as `t`. The maximum value to clip by.
+//	input: A complex tensor.
+//	fft_length: An int32 tensor of shape [2]. The FFT length for each dimension.
 //
-// Returns A clipped `Tensor` with the same shape as input 't'.
-func ClipByValue(scope *Scope, t tf.Output, clip_value_min tf.Output, clip_value_max tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ClipByValue",
-		Input: []tf.Input{
-			t, clip_value_min, clip_value_max,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the next representable value of `x1` in the direction of `x2`, element-wise.
+// Returns A float32 tensor of the same rank as `input`. The inner-most 2
+//   dimensions of `input` are replaced with the `fft_length` samples of their
+//   inverse 2D Fourier transform.
 //
-// This operation returns the same result as the C++ std::nextafter function.
-//
-// It can also return a subnormal number.
-//
-// @compatibility(cpp)
-// Equivalent to C++ std::nextafter function.
+// @compatibility(numpy)
+// Equivalent to np.fft.irfft2
 // @end_compatibility
-func NextAfter(scope *Scope, x1 tf.Output, x2 tf.Output) (output tf.Output) {
+func IRFFT2D(scope *Scope, input tf.Output, fft_length tf.Output, optional ...IRFFT2DAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
 	opspec := tf.OpSpec{
-		Type: "NextAfter",
+		Type: "IRFFT2D",
 		Input: []tf.Input{
-			x1, x2,
+			input, fft_length,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Computes the sum along sparse segments of a tensor divided by the sqrt of N.
+// ResizeBicubicAttr is an optional argument to ResizeBicubic.
+type ResizeBicubicAttr func(optionalAttr)
+
+// ResizeBicubicAlignCorners sets the optional align_corners attribute to value.
 //
-// N is the size of the segment being reduced.
+// value: If true, the centers of the 4 corner pixels of the input and output tensors are
+// aligned, preserving the values at the corner pixels. Defaults to false.
+// If not specified, defaults to false
+func ResizeBicubicAlignCorners(value bool) ResizeBicubicAttr {
+	return func(m optionalAttr) {
+		m["align_corners"] = value
+	}
+}
+
+// ResizeBicubicHalfPixelCenters sets the optional half_pixel_centers attribute to value.
+// If not specified, defaults to false
+func ResizeBicubicHalfPixelCenters(value bool) ResizeBicubicAttr {
+	return func(m optionalAttr) {
+		m["half_pixel_centers"] = value
+	}
+}
+
+// Resize `images` to `size` using bicubic interpolation.
 //
-// See `tf.sparse.segment_sum` for usage examples.
+// Input images can be of different types but output images are always float.
 //
+// Arguments:
+//	images: 4-D with shape `[batch, height, width, channels]`.
+//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
+// new size for the images.
+//
+// Returns 4-D with shape
+// `[batch, new_height, new_width, channels]`.
+func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBicubicAttr) (resized_images tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResizeBicubic",
+		Input: []tf.Input{
+			images, size,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the mean along sparse segments of a tensor.
+//
+// Like `SparseSegmentMean`, but allows missing ids in `segment_ids`. If an id is
+// misisng, the `output` tensor at that position will be zeroed.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+// for an explanation of segments.
 //
 // Arguments:
 //
 //	indices: A 1-D tensor. Has same rank as `segment_ids`.
 //	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+//	num_segments: Should equal the number of distinct segment IDs.
 //
-// Returns Has same shape as data, except for dimension 0 which
-// has size `k`, the number of segments.
-func SparseSegmentSqrtN(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) {
+// Returns Has same shape as data, except for dimension 0 which has size
+// `num_segments`.
+func SparseSegmentMeanWithNumSegments(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "SparseSegmentSqrtN",
+		Type: "SparseSegmentMeanWithNumSegments",
 		Input: []tf.Input{
-			data, indices, segment_ids,
+			data, indices, segment_ids, num_segments,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Concats all tensors in the list along the 0th dimension.
-//
-// Requires that all tensors have the same shape except the first dimension.
-//
-// input_handle: The input list.
-// element_shape: The shape of the uninitialized elements in the list. If the first
-//   dimension is not -1, it is assumed that all list elements have the same
-//   leading dim.
-// leading_dims: The list of leading dims of uninitialized list elements. Used if
-//   the leading dim of input_handle.element_shape or the element_shape input arg
-//   is not already set.
-// tensor: The concated result.
-// lengths: Output tensor containing sizes of the 0th dimension of tensors in the list, used for computing the gradient.
-//
-func TensorListConcatV2(scope *Scope, input_handle tf.Output, element_shape tf.Output, leading_dims tf.Output, element_dtype tf.DataType) (tensor tf.Output, lengths tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"element_dtype": element_dtype}
-	opspec := tf.OpSpec{
-		Type: "TensorListConcatV2",
-		Input: []tf.Input{
-			input_handle, element_shape, leading_dims,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
+// RFFT2DAttr is an optional argument to RFFT2D.
+type RFFT2DAttr func(optionalAttr)
 
-// Rolls the elements of a tensor along an axis.
-//
-// The elements are shifted positively (towards larger indices) by the offset of
-// `shift` along the dimension of `axis`. Negative `shift` values will shift
-// elements in the opposite direction. Elements that roll passed the last position
-// will wrap around to the first and vice versa. Multiple shifts along multiple
-// axes may be specified.
-//
-// For example:
-//
-// ```
-// # 't' is [0, 1, 2, 3, 4]
-// roll(t, shift=2, axis=0) ==> [3, 4, 0, 1, 2]
-//
-// # shifting along multiple dimensions
-// # 't' is [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]
-// roll(t, shift=[1, -2], axis=[0, 1]) ==> [[7, 8, 9, 5, 6], [2, 3, 4, 0, 1]]
-//
-// # shifting along the same axis multiple times
-// # 't' is [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]
-// roll(t, shift=[2, -3], axis=[1, 1]) ==> [[1, 2, 3, 4, 0], [6, 7, 8, 9, 5]]
-// ```
-//
-// Arguments:
-//
-//	shift: Dimension must be 0-D or 1-D. `shift[i]` specifies the number of places by which
-// elements are shifted positively (towards larger indices) along the dimension
-// specified by `axis[i]`. Negative shifts will roll the elements in the opposite
-// direction.
-//	axis: Dimension must be 0-D or 1-D. `axis[i]` specifies the dimension that the shift
-// `shift[i]` should occur. If the same axis is referenced more than once, the
-// total shift for that axis will be the sum of all the shifts that belong to that
-// axis.
-//
-// Returns Has the same shape and size as the input. The elements are shifted
-// positively (towards larger indices) by the offsets of `shift` along the
-// dimensions of `axis`.
-func Roll(scope *Scope, input tf.Output, shift tf.Output, axis tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Roll",
-		Input: []tf.Input{
-			input, shift, axis,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Removes keys and its associated values from a table.
-//
-// The tensor `keys` must of the same type as the keys of the table. Keys not
-// already in the table are silently ignored.
-//
-// Arguments:
-//	table_handle: Handle to the table.
-//	keys: Any shape.  Keys of the elements to remove.
-//
-// Returns the created operation.
-func LookupTableRemoveV2(scope *Scope, table_handle tf.Output, keys tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "LookupTableRemoveV2",
-		Input: []tf.Input{
-			table_handle, keys,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Computes the number of elements in the given table.
-//
-// Arguments:
-//	table_handle: Handle to the table.
-//
-// Returns Scalar that contains number of elements in the table.
-func LookupTableSizeV2(scope *Scope, table_handle tf.Output) (size tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "LookupTableSizeV2",
-		Input: []tf.Input{
-			table_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Replaces the contents of the table with the specified keys and values.
-//
-// The tensor `keys` must be of the same type as the keys of the table.
-// The tensor `values` must be of the type of the table values.
-//
-// Arguments:
-//	table_handle: Handle to the table.
-//	keys: Any shape.  Keys to look up.
-//	values: Values to associate with keys.
-//
-// Returns the created operation.
-func LookupTableImportV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "LookupTableImportV2",
-		Input: []tf.Input{
-			table_handle, keys, values,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// HashTableV2Attr is an optional argument to HashTableV2.
-type HashTableV2Attr func(optionalAttr)
-
-// HashTableV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this table is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func HashTableV2Container(value string) HashTableV2Attr {
+// RFFT2DTcomplex sets the optional Tcomplex attribute to value.
+// If not specified, defaults to DT_COMPLEX64
+func RFFT2DTcomplex(value tf.DataType) RFFT2DAttr {
 	return func(m optionalAttr) {
-		m["container"] = value
+		m["Tcomplex"] = value
 	}
 }
 
-// HashTableV2SharedName sets the optional shared_name attribute to value.
+// 2D real-valued fast Fourier transform.
 //
-// value: If non-empty, this table is shared under the given name across
-// multiple sessions.
-// If not specified, defaults to ""
-func HashTableV2SharedName(value string) HashTableV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// HashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value.
+// Computes the 2-dimensional discrete Fourier transform of a real-valued signal
+// over the inner-most 2 dimensions of `input`.
 //
-// value: If true and shared_name is empty, the table is shared
-// using the node name.
-// If not specified, defaults to false
-func HashTableV2UseNodeNameSharing(value bool) HashTableV2Attr {
-	return func(m optionalAttr) {
-		m["use_node_name_sharing"] = value
-	}
-}
-
-// Creates a non-initialized hash table.
+// Since the DFT of a real signal is Hermitian-symmetric, `RFFT2D` only returns the
+// `fft_length / 2 + 1` unique components of the FFT for the inner-most dimension
+// of `output`: the zero-frequency term, followed by the `fft_length / 2`
+// positive-frequency terms.
 //
-// This op creates a hash table, specifying the type of its keys and values.
-// Before using the table you will have to initialize it.  After initialization the
-// table will be immutable.
+// Along each axis `RFFT2D` is computed on, if `fft_length` is smaller than the
+// corresponding dimension of `input`, the dimension is cropped. If it is larger,
+// the dimension is padded with zeros.
 //
 // Arguments:
-//	key_dtype: Type of the table keys.
-//	value_dtype: Type of the table values.
+//	input: A float32 tensor.
+//	fft_length: An int32 tensor of shape [2]. The FFT length for each dimension.
 //
-// Returns Handle to a table.
-func HashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...HashTableV2Attr) (table_handle tf.Output) {
+// Returns A complex64 tensor of the same rank as `input`. The inner-most 2
+//   dimensions of `input` are replaced with their 2D Fourier transform. The
+//   inner-most dimension contains `fft_length / 2 + 1` unique frequency
+//   components.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.rfft2
+// @end_compatibility
+func RFFT2D(scope *Scope, input tf.Output, fft_length tf.Output, optional ...RFFT2DAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype}
+	attrs := map[string]interface{}{}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "HashTableV2",
-
+		Type: "RFFT2D",
+		Input: []tf.Input{
+			input, fft_length,
+		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// MergeV2CheckpointsAttr is an optional argument to MergeV2Checkpoints.
-type MergeV2CheckpointsAttr func(optionalAttr)
-
-// MergeV2CheckpointsDeleteOldDirs sets the optional delete_old_dirs attribute to value.
+// JPEG encode input image with provided compression quality.
 //
-// value: see above.
+// `image` is a 3-D uint8 Tensor of shape `[height, width, channels]`.
+// `quality` is an int32 jpeg compression quality value between 0 and 100.
+//
+//
+// Arguments:
+//	images: Images to adjust.  At least 3-D.
+//	quality: An int quality to encode to.
+//
+// Returns 0-D. JPEG-encoded image.
+func EncodeJpegVariableQuality(scope *Scope, images tf.Output, quality tf.Output) (contents tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "EncodeJpegVariableQuality",
+		Input: []tf.Input{
+			images, quality,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// 3D fast Fourier transform.
+//
+// Computes the 3-dimensional discrete Fourier transform over the inner-most 3
+// dimensions of `input`.
+//
+// Arguments:
+//	input: A complex tensor.
+//
+// Returns A complex tensor of the same shape as `input`. The inner-most 3
+//   dimensions of `input` are replaced with their 3D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.fftn with 3 dimensions.
+// @end_compatibility
+func FFT3D(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "FFT3D",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SdcaOptimizerAttr is an optional argument to SdcaOptimizer.
+type SdcaOptimizerAttr func(optionalAttr)
+
+// SdcaOptimizerAdaptative sets the optional adaptative attribute to value.
+//
+// value: Whether to use Adaptive SDCA for the inner loop.
 // If not specified, defaults to true
-func MergeV2CheckpointsDeleteOldDirs(value bool) MergeV2CheckpointsAttr {
+func SdcaOptimizerAdaptative(value bool) SdcaOptimizerAttr {
 	return func(m optionalAttr) {
-		m["delete_old_dirs"] = value
+		m["adaptative"] = value
 	}
 }
 
-// V2 format specific: merges the metadata files of sharded checkpoints.  The
+// Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for
 //
-// result is one logical checkpoint, with one physical metadata file and renamed
-// data files.
+// linear models with L1 + L2 regularization. As global optimization objective is
+// strongly-convex, the optimizer optimizes the dual objective at each step. The
+// optimizer applies each update one example at a time. Examples are sampled
+// uniformly, and the optimizer is learning rate free and enjoys linear convergence
+// rate.
 //
-// Intended for "grouping" multiple checkpoints in a sharded checkpoint setup.
+// [Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).<br>
+// Shai Shalev-Shwartz, Tong Zhang. 2012
 //
-// If delete_old_dirs is true, attempts to delete recursively the dirname of each
-// path in the input checkpoint_prefixes.  This is useful when those paths are non
-// user-facing temporary locations.
+// $$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$
+//
+// [Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).<br>
+// Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan,
+// Peter Richtarik, Martin Takac. 2015
+//
+// [Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).<br>
+// Dominik Csiba, Zheng Qu, Peter Richtarik. 2015
 //
 // Arguments:
-//	checkpoint_prefixes: prefixes of V2 checkpoints to merge.
-//	destination_prefix: scalar.  The desired final prefix.  Allowed to be the same
-// as one of the checkpoint_prefixes.
+//	sparse_example_indices: a list of vectors which contain example indices.
+//	sparse_feature_indices: a list of vectors which contain feature indices.
+//	sparse_feature_values: a list of vectors which contains feature value
+// associated with each feature group.
+//	dense_features: a list of matrices which contains the dense feature values.
+//	example_weights: a vector which contains the weight associated with each
+// example.
+//	example_labels: a vector which contains the label/target associated with each
+// example.
+//	sparse_indices: a list of vectors where each value is the indices which has
+// corresponding weights in sparse_weights. This field maybe omitted for the
+// dense approach.
+//	sparse_weights: a list of vectors where each value is the weight associated with
+// a sparse feature group.
+//	dense_weights: a list of vectors where the values are the weights associated
+// with a dense feature group.
+//	example_state_data: a list of vectors containing the example state data.
+//	loss_type: Type of the primal loss. Currently SdcaSolver supports logistic,
+// squared and hinge losses.
+//	l1: Symmetric l1 regularization strength.
+//	l2: Symmetric l2 regularization strength.
+//	num_loss_partitions: Number of partitions of the global loss function.
+//	num_inner_iterations: Number of iterations per mini-batch.
 //
-// Returns the created operation.
-func MergeV2Checkpoints(scope *Scope, checkpoint_prefixes tf.Output, destination_prefix tf.Output, optional ...MergeV2CheckpointsAttr) (o *tf.Operation) {
+// Returns:
+//	out_example_state_data: a list of vectors containing the updated example state
+// data.
+//	out_delta_sparse_weights: a list of vectors where each value is the delta
+// weights associated with a sparse feature group.
+//	out_delta_dense_weights: a list of vectors where the values are the delta
+// weights associated with a dense feature group.
+func SdcaOptimizer(scope *Scope, sparse_example_indices []tf.Output, sparse_feature_indices []tf.Output, sparse_feature_values []tf.Output, dense_features []tf.Output, example_weights tf.Output, example_labels tf.Output, sparse_indices []tf.Output, sparse_weights []tf.Output, dense_weights []tf.Output, example_state_data tf.Output, loss_type string, l1 float32, l2 float32, num_loss_partitions int64, num_inner_iterations int64, optional ...SdcaOptimizerAttr) (out_example_state_data tf.Output, out_delta_sparse_weights []tf.Output, out_delta_dense_weights []tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
+	attrs := map[string]interface{}{"loss_type": loss_type, "l1": l1, "l2": l2, "num_loss_partitions": num_loss_partitions, "num_inner_iterations": num_inner_iterations}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MergeV2Checkpoints",
+		Type: "SdcaOptimizer",
 		Input: []tf.Input{
-			checkpoint_prefixes, destination_prefix,
+			tf.OutputList(sparse_example_indices), tf.OutputList(sparse_feature_indices), tf.OutputList(sparse_feature_values), tf.OutputList(dense_features), example_weights, example_labels, tf.OutputList(sparse_indices), tf.OutputList(sparse_weights), tf.OutputList(dense_weights), example_state_data,
 		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	out_example_state_data = op.Output(idx)
+	if out_delta_sparse_weights, idx, err = makeOutputList(op, idx, "out_delta_sparse_weights"); err != nil {
+		scope.UpdateErr("SdcaOptimizer", err)
+		return
+	}
+	if out_delta_dense_weights, idx, err = makeOutputList(op, idx, "out_delta_dense_weights"); err != nil {
+		scope.UpdateErr("SdcaOptimizer", err)
+		return
+	}
+	return out_example_state_data, out_delta_sparse_weights, out_delta_dense_weights
 }
 
-// Writes contents to the file at input filename. Creates file and recursively
+// Inverse fast Fourier transform.
 //
-// creates directory if not existing.
+// Computes the inverse 1-dimensional discrete Fourier transform over the
+// inner-most dimension of `input`.
 //
 // Arguments:
-//	filename: scalar. The name of the file to which we write the contents.
-//	contents: scalar. The content to be written to the output file.
+//	input: A complex tensor.
 //
-// Returns the created operation.
-func WriteFile(scope *Scope, filename tf.Output, contents tf.Output) (o *tf.Operation) {
+// Returns A complex tensor of the same shape as `input`. The inner-most
+//   dimension of `input` is replaced with its inverse 1D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.ifft
+// @end_compatibility
+func IFFT(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "WriteFile",
+		Type: "IFFT",
 		Input: []tf.Input{
-			filename, contents,
+			input,
 		},
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// MutableHashTableV2Attr is an optional argument to MutableHashTableV2.
-type MutableHashTableV2Attr func(optionalAttr)
+// CollectiveGatherAttr is an optional argument to CollectiveGather.
+type CollectiveGatherAttr func(optionalAttr)
 
-// MutableHashTableV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this table is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func MutableHashTableV2Container(value string) MutableHashTableV2Attr {
+// CollectiveGatherCommunicationHint sets the optional communication_hint attribute to value.
+// If not specified, defaults to "auto"
+func CollectiveGatherCommunicationHint(value string) CollectiveGatherAttr {
 	return func(m optionalAttr) {
-		m["container"] = value
+		m["communication_hint"] = value
 	}
 }
 
-// MutableHashTableV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this table is shared under the given name across
-// multiple sessions.
-// If not specified, defaults to ""
-func MutableHashTableV2SharedName(value string) MutableHashTableV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// MutableHashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value.
-//
-// value: If true and shared_name is empty, the table is shared
-// using the node name.
-// If not specified, defaults to false
-func MutableHashTableV2UseNodeNameSharing(value bool) MutableHashTableV2Attr {
-	return func(m optionalAttr) {
-		m["use_node_name_sharing"] = value
-	}
-}
-
-// Creates an empty hash table.
-//
-// This op creates a mutable hash table, specifying the type of its keys and
-// values. Each value must be a scalar. Data can be inserted into the table using
-// the insert operations. It does not support the initialization operation.
-//
-// Arguments:
-//	key_dtype: Type of the table keys.
-//	value_dtype: Type of the table values.
-//
-// Returns Handle to a table.
-func MutableHashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableV2Attr) (table_handle tf.Output) {
+// Mutually accumulates multiple tensors of identical type and shape.
+func CollectiveGather(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, shape tf.Shape, optional ...CollectiveGatherAttr) (data tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype}
+	attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "shape": shape}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MutableHashTableV2",
-
+		Type: "CollectiveGather",
+		Input: []tf.Input{
+			input,
+		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ComputeAccidentalHitsAttr is an optional argument to ComputeAccidentalHits.
-type ComputeAccidentalHitsAttr func(optionalAttr)
-
-// ComputeAccidentalHitsSeed sets the optional seed attribute to value.
+// L2 Loss.
 //
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func ComputeAccidentalHitsSeed(value int64) ComputeAccidentalHitsAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// ComputeAccidentalHitsSeed2 sets the optional seed2 attribute to value.
+// Computes half the L2 norm of a tensor without the `sqrt`:
 //
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func ComputeAccidentalHitsSeed2(value int64) ComputeAccidentalHitsAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Computes the ids of the positions in sampled_candidates that match true_labels.
-//
-// When doing log-odds NCE, the result of this op should be passed through a
-// SparseToDense op, then added to the logits of the sampled candidates. This has
-// the effect of 'removing' the sampled labels that match the true labels by
-// making the classifier sure that they are sampled labels.
+//     output = sum(t ** 2) / 2
 //
 // Arguments:
-//	true_classes: The true_classes output of UnpackSparseLabels.
-//	sampled_candidates: The sampled_candidates output of CandidateSampler.
-//	num_true: Number of true labels per context.
+//	t: Typically 2-D, but may have any dimensions.
 //
-// Returns A vector of indices corresponding to rows of true_candidates.A vector of IDs of positions in sampled_candidates that match a true_label
-// for the row with the corresponding index in indices.A vector of the same length as indices and ids, in which each element
-// is -FLOAT_MAX.
-func ComputeAccidentalHits(scope *Scope, true_classes tf.Output, sampled_candidates tf.Output, num_true int64, optional ...ComputeAccidentalHitsAttr) (indices tf.Output, ids tf.Output, weights tf.Output) {
+// Returns 0-D.
+func L2Loss(scope *Scope, t tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_true": num_true}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "ComputeAccidentalHits",
+		Type: "L2Loss",
 		Input: []tf.Input{
-			true_classes, sampled_candidates,
+			t,
 		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// DequantizeAttr is an optional argument to Dequantize.
-type DequantizeAttr func(optionalAttr)
-
-// DequantizeMode sets the optional mode attribute to value.
-// If not specified, defaults to "MIN_COMBINED"
-func DequantizeMode(value string) DequantizeAttr {
-	return func(m optionalAttr) {
-		m["mode"] = value
-	}
-}
-
-// Dequantize the 'input' tensor into a float Tensor.
-//
-// [min_range, max_range] are scalar floats that specify the range for
-// the 'input' data. The 'mode' attribute controls exactly which calculations are
-// used to convert the float values to their quantized equivalents.
-//
-// In 'MIN_COMBINED' mode, each value of the tensor will undergo the following:
-//
-// ```
-// if T == qint8: in[i] += (range(T) + 1)/ 2.0
-// out[i] = min_range + (in[i]* (max_range - min_range) / range(T))
-// ```
-// here `range(T) = numeric_limits<T>::max() - numeric_limits<T>::min()`
-//
-// *MIN_COMBINED Mode Example*
-//
-// If the input comes from a QuantizedRelu6, the output type is
-// quint8 (range of 0-255) but the possible range of QuantizedRelu6 is
-// 0-6.  The min_range and max_range values are therefore 0.0 and 6.0.
-// Dequantize on quint8 will take each value, cast to float, and multiply
-// by 6 / 255.
-// Note that if quantizedtype is qint8, the operation will additionally add
-// each value by 128 prior to casting.
-//
-// If the mode is 'MIN_FIRST', then this approach is used:
-//
-// ```c++
-// num_discrete_values = 1 << (# of bits in T)
-// range_adjust = num_discrete_values / (num_discrete_values - 1)
-// range = (range_max - range_min) * range_adjust
-// range_scale = range / num_discrete_values
-// const double offset_input = static_cast<double>(input) - lowest_quantized;
-// result = range_min + ((input - numeric_limits<T>::min()) * range_scale)
-// ```
-//
-// *SCALED mode Example*
-//
-// `SCALED` mode matches the quantization approach used in
-// `QuantizeAndDequantize{V2|V3}`.
-//
-// If the mode is `SCALED`, we do not use the full range of the output type,
-// choosing to elide the lowest possible value for symmetry (e.g., output range is
-// -127 to 127, not -128 to 127 for signed 8 bit quantization), so that 0.0 maps to
-// 0.
-//
-// We first find the range of values in our tensor. The
-// range we use is always centered on 0, so we find m such that
-// ```c++
-//   m = max(abs(input_min), abs(input_max))
-// ```
-//
-// Our input tensor range is then `[-m, m]`.
-//
-// Next, we choose our fixed-point quantization buckets, `[min_fixed, max_fixed]`.
-// If T is signed, this is
-// ```
-//   num_bits = sizeof(T) * 8
-//   [min_fixed, max_fixed] =
-//       [-(1 << (num_bits - 1) - 1), (1 << (num_bits - 1)) - 1]
-// ```
-//
-// Otherwise, if T is unsigned, the fixed-point range is
-// ```
-//   [min_fixed, max_fixed] = [0, (1 << num_bits) - 1]
-// ```
-//
-// From this we compute our scaling factor, s:
-// ```c++
-//   s = (2 * m) / (max_fixed - min_fixed)
-// ```
-//
-// Now we can dequantize the elements of our tensor:
-// ```c++
-// result = input * s
-// ```
-//
-// Arguments:
-//
-//	min_range: The minimum scalar value possibly produced for the input.
-//	max_range: The maximum scalar value possibly produced for the input.
-func Dequantize(scope *Scope, input tf.Output, min_range tf.Output, max_range tf.Output, optional ...DequantizeAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Dequantize",
-		Input: []tf.Input{
-			input, min_range, max_range,
-		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
+// An op that receives embedding activations on the TPU.
+//
+// The TPU system performs the embedding lookups and aggregations specified by
+// the arguments to TPUEmbeddingEnqueue(Integer/Sparse/SparseTensor)Batch. The
+// results of these aggregations are visible to the Tensorflow Graph as the
+// outputs of a RecvTPUEmbeddingActivations op. This op returns a list containing
+// one Tensor of activations per table specified in the model. There can be at
+// most one RecvTPUEmbeddingActivations op in the TPU graph.
+//
+// Arguments:
+//	num_outputs: The number of output activation tensors, equal to the number of
+// embedding tables in the model.
+//	config: Serialized TPUEmbeddingConfiguration proto.
+//
+// Returns A TensorList of embedding activations containing one Tensor per
+// embedding table in the model.
+func RecvTPUEmbeddingActivations(scope *Scope, num_outputs int64, config string) (outputs []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_outputs": num_outputs, "config": config}
+	opspec := tf.OpSpec{
+		Type: "RecvTPUEmbeddingActivations",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
+		scope.UpdateErr("RecvTPUEmbeddingActivations", err)
+		return
+	}
+	return outputs
+}
+
 // InitializeTableFromTextFileV2Attr is an optional argument to InitializeTableFromTextFileV2.
 type InitializeTableFromTextFileV2Attr func(optionalAttr)
 
@@ -37496,105 +37364,49 @@ func InitializeTableFromTextFileV2(scope *Scope, table_handle tf.Output, filenam
 	return scope.AddOperation(opspec)
 }
 
-// MutableHashTableOfTensorsV2Attr is an optional argument to MutableHashTableOfTensorsV2.
-type MutableHashTableOfTensorsV2Attr func(optionalAttr)
+// GenerateBoundingBoxProposalsAttr is an optional argument to GenerateBoundingBoxProposals.
+type GenerateBoundingBoxProposalsAttr func(optionalAttr)
 
-// MutableHashTableOfTensorsV2Container sets the optional container attribute to value.
+// GenerateBoundingBoxProposalsPostNmsTopn sets the optional post_nms_topn attribute to value.
 //
-// value: If non-empty, this table is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func MutableHashTableOfTensorsV2Container(value string) MutableHashTableOfTensorsV2Attr {
+// value: An integer. Maximum number of rois in the output.
+// If not specified, defaults to 300
+func GenerateBoundingBoxProposalsPostNmsTopn(value int64) GenerateBoundingBoxProposalsAttr {
 	return func(m optionalAttr) {
-		m["container"] = value
+		m["post_nms_topn"] = value
 	}
 }
 
-// MutableHashTableOfTensorsV2SharedName sets the optional shared_name attribute to value.
+// This op produces Region of Interests from given bounding boxes(bbox_deltas) encoded wrt anchors according to eq.2 in arXiv:1506.01497
 //
-// value: If non-empty, this table is shared under the given name across
-// multiple sessions.
-// If not specified, defaults to ""
-func MutableHashTableOfTensorsV2SharedName(value string) MutableHashTableOfTensorsV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// MutableHashTableOfTensorsV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value.
-// If not specified, defaults to false
-func MutableHashTableOfTensorsV2UseNodeNameSharing(value bool) MutableHashTableOfTensorsV2Attr {
-	return func(m optionalAttr) {
-		m["use_node_name_sharing"] = value
-	}
-}
-
-// MutableHashTableOfTensorsV2ValueShape sets the optional value_shape attribute to value.
-// If not specified, defaults to <>
-func MutableHashTableOfTensorsV2ValueShape(value tf.Shape) MutableHashTableOfTensorsV2Attr {
-	return func(m optionalAttr) {
-		m["value_shape"] = value
-	}
-}
-
-// Creates an empty hash table.
-//
-// This op creates a mutable hash table, specifying the type of its keys and
-// values. Each value must be a vector. Data can be inserted into the table using
-// the insert operations. It does not support the initialization operation.
+//       The op selects top `pre_nms_topn` scoring boxes, decodes them with respect to anchors,
+//       applies non-maximal suppression on overlapping boxes with higher than
+//       `nms_threshold` intersection-over-union (iou) value, discarding boxes where shorter
+//       side is less than `min_size`.
+//       Inputs:
+//       `scores`: A 4D tensor of shape [Batch, Height, Width, Num Anchors] containing the scores per anchor at given postion
+//       `bbox_deltas`: is a tensor of shape [Batch, Height, Width, 4 x Num Anchors] boxes encoded to each anchor
+//       `anchors`: A 1D tensor of shape [4 x Num Anchors], representing the anchors.
+//       Outputs:
+//       `rois`: output RoIs, a 3D tensor of shape [Batch, post_nms_topn, 4], padded by 0 if less than post_nms_topn candidates found.
+//       `roi_probabilities`: probability scores of each roi in 'rois', a 2D tensor of shape [Batch,post_nms_topn], padded with 0 if needed, sorted by scores.
 //
 // Arguments:
-//	key_dtype: Type of the table keys.
-//	value_dtype: Type of the table values.
+//	scores: A 4-D float tensor of shape `[num_images, height, width, num_achors]` containing scores of the boxes for given anchors, can be unsorted.
+//	bbox_deltas: A 4-D float tensor of shape `[num_images, height, width, 4 x num_anchors]`. encoding boxes with respec to each anchor.
+// Coordinates are given in the form [dy, dx, dh, dw].
+//	image_info: A 2-D float tensor of shape `[num_images, 5]` containing image information Height, Width, Scale.
+//	anchors: A 2-D float tensor of shape `[num_anchors, 4]` describing the anchor boxes. Boxes are formatted in the form [y1, x1, y2, x2].
+//	nms_threshold: A scalar float tensor for non-maximal-suppression threshold.
+//	pre_nms_topn: A scalar int tensor for the number of top scoring boxes to be used as input.
+//	min_size: A scalar float tensor. Any box that has a smaller size than min_size will be discarded.
 //
-// Returns Handle to a table.
-func MutableHashTableOfTensorsV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableOfTensorsV2Attr) (table_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MutableHashTableOfTensorsV2",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// AbortAttr is an optional argument to Abort.
-type AbortAttr func(optionalAttr)
-
-// AbortErrorMsg sets the optional error_msg attribute to value.
-//
-// value: A string which is the message associated with the exception.
-// If not specified, defaults to ""
-func AbortErrorMsg(value string) AbortAttr {
-	return func(m optionalAttr) {
-		m["error_msg"] = value
-	}
-}
-
-// AbortExitWithoutError sets the optional exit_without_error attribute to value.
-// If not specified, defaults to false
-func AbortExitWithoutError(value bool) AbortAttr {
-	return func(m optionalAttr) {
-		m["exit_without_error"] = value
-	}
-}
-
-// Raise a exception to abort the process when called.
-//
-// If exit_without_error is true, the process will exit normally,
-// otherwise it will exit with a SIGABORT signal.
-//
-// Returns nothing but an exception.
-//
-// Returns the created operation.
-func Abort(scope *Scope, optional ...AbortAttr) (o *tf.Operation) {
+// Returns:
+//	rois: A 3-D float tensor of shape `[num_images,post_nms_topn,4]` representing the selected
+// region of interest boxes. Sorted in descending order in scores.
+//	roi_probabilities: A 2-D float tensor of shape `[num_images, post_nms_topn]` representing the score of the
+// region of interest box in `rois` tensor at the same index.
+func GenerateBoundingBoxProposals(scope *Scope, scores tf.Output, bbox_deltas tf.Output, image_info tf.Output, anchors tf.Output, nms_threshold tf.Output, pre_nms_topn tf.Output, min_size tf.Output, optional ...GenerateBoundingBoxProposalsAttr) (rois tf.Output, roi_probabilities tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -37603,57 +37415,144 @@ func Abort(scope *Scope, optional ...AbortAttr) (o *tf.Operation) {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Abort",
-
+		Type: "GenerateBoundingBoxProposals",
+		Input: []tf.Input{
+			scores, bbox_deltas, image_info, anchors, nms_threshold, pre_nms_topn, min_size,
+		},
 		Attrs: attrs,
 	}
-	return scope.AddOperation(opspec)
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
 }
 
-// DecodePngAttr is an optional argument to DecodePng.
-type DecodePngAttr func(optionalAttr)
-
-// DecodePngChannels sets the optional channels attribute to value.
+// Computes the sparse Cholesky decomposition of `input`.
 //
-// value: Number of color channels for the decoded image.
-// If not specified, defaults to 0
-func DecodePngChannels(value int64) DecodePngAttr {
-	return func(m optionalAttr) {
-		m["channels"] = value
+// Computes the Sparse Cholesky decomposition of a sparse matrix, with the given
+// fill-in reducing permutation.
+//
+// The input sparse matrix and the fill-in reducing permutation `permutation` must
+// have compatible shapes. If the sparse matrix has rank 3; with the batch
+// dimension `B`, then the `permutation` must be of rank 2; with the same batch
+// dimension `B`. There is no support for broadcasting.
+//
+// Furthermore, each component vector of `permutation` must be of length `N`,
+// containing each of the integers {0, 1, ..., N - 1} exactly once, where `N` is
+// the number of rows of each component of the sparse matrix.
+//
+// Each component of the input sparse matrix must represent a symmetric positive
+// definite (SPD) matrix; although only the lower triangular part of the matrix is
+// read. If any individual component is not SPD, then an InvalidArgument error is
+// thrown.
+//
+// The returned sparse matrix has the same dense shape as the input sparse matrix.
+// For each component `A` of the input sparse matrix, the corresponding output
+// sparse matrix represents `L`, the lower triangular Cholesky factor satisfying
+// the following identity:
+//
+// ```
+//   A = L * Lt
+// ```
+//
+// where Lt denotes the transpose of L (or its conjugate transpose, if `type` is
+// `complex64` or `complex128`).
+//
+// The `type` parameter denotes the type of the matrix elements. The supported
+// types are: `float32`, `float64`, `complex64` and `complex128`.
+//
+// Usage example:
+//
+// ```python
+//     from tensorflow.python.ops.linalg.sparse import sparse_csr_matrix_ops
+//
+//     a_indices = np.array([[0, 0], [1, 1], [2, 1], [2, 2], [3, 3]])
+//     a_values = np.array([1.0, 2.0, 1.0, 3.0, 4.0], np.float32)
+//     a_dense_shape = [4, 4]
+//
+//     with tf.Session() as sess:
+//       # Define (COO format) SparseTensor over Numpy array.
+//       a_st = tf.SparseTensor(a_indices, a_values, a_dense_shape)
+//
+//       # Convert SparseTensors to CSR SparseMatrix.
+//       a_sm = sparse_csr_matrix_ops.sparse_tensor_to_csr_sparse_matrix(
+//           a_st.indices, a_st.values, a_st.dense_shape)
+//
+//       # Obtain the Sparse Cholesky factor using AMD Ordering for reducing zero
+//       # fill-in (number of structural non-zeros in the sparse Cholesky factor).
+//       ordering_amd = sparse_csr_matrix_ops.sparse_matrix_ordering_amd(sparse_matrix)
+//       cholesky_sparse_matrices = (
+//           sparse_csr_matrix_ops.sparse_matrix_sparse_cholesky(
+//               sparse_matrix, ordering_amd, type=tf.float32))
+//
+//       # Convert the CSRSparseMatrix Cholesky factor to a dense Tensor
+//       dense_cholesky = sparse_csr_matrix_ops.csr_sparse_matrix_to_dense(
+//           cholesky_sparse_matrices, tf.float32)
+//
+//       # Evaluate the dense Tensor value.
+//       dense_cholesky_value = sess.run(dense_cholesky)
+// ```
+//
+// `dense_cholesky_value` stores the dense Cholesky factor:
+//
+// ```
+//     [[  1.  0.    0.    0.]
+//      [  0.  1.41  0.    0.]
+//      [  0.  0.70  1.58  0.]
+//      [  0.  0.    0.    2.]]
+// ```
+//
+//
+// input: A `CSRSparseMatrix`.
+// permutation: A `Tensor`.
+// type: The type of `input`.
+//
+// Arguments:
+//	input: A `CSRSparseMatrix`.
+//	permutation: A fill-in reducing permutation matrix.
+//
+//
+// Returns The sparse Cholesky decompsition of `input`.
+func SparseMatrixSparseCholesky(scope *Scope, input tf.Output, permutation tf.Output, type_ tf.DataType) (output tf.Output) {
+	if scope.Err() != nil {
+		return
 	}
+	attrs := map[string]interface{}{"type": type_}
+	opspec := tf.OpSpec{
+		Type: "SparseMatrixSparseCholesky",
+		Input: []tf.Input{
+			input, permutation,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
 }
 
-// DecodePngDtype sets the optional dtype attribute to value.
-// If not specified, defaults to DT_UINT8
-func DecodePngDtype(value tf.DataType) DecodePngAttr {
+// StatefulTruncatedNormalAttr is an optional argument to StatefulTruncatedNormal.
+type StatefulTruncatedNormalAttr func(optionalAttr)
+
+// StatefulTruncatedNormalDtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_FLOAT
+func StatefulTruncatedNormalDtype(value tf.DataType) StatefulTruncatedNormalAttr {
 	return func(m optionalAttr) {
 		m["dtype"] = value
 	}
 }
 
-// Decode a PNG-encoded image to a uint8 or uint16 tensor.
+// Outputs random values from a truncated normal distribution.
 //
-// The attr `channels` indicates the desired number of color channels for the
-// decoded image.
-//
-// Accepted values are:
-//
-// *   0: Use the number of channels in the PNG-encoded image.
-// *   1: output a grayscale image.
-// *   3: output an RGB image.
-// *   4: output an RGBA image.
-//
-// If needed, the PNG-encoded image is transformed to match the requested number
-// of color channels.
-//
-// This op also supports decoding JPEGs and non-animated GIFs since the interface
-// is the same, though it is cleaner to use `tf.image.decode_image`.
+// The generated values follow a normal distribution with mean 0 and standard
+// deviation 1, except that values whose magnitude is more than 2 standard
+// deviations from the mean are dropped and re-picked.
 //
 // Arguments:
-//	contents: 0-D.  The PNG-encoded image.
+//	resource: The handle of the resource variable that stores the state of the RNG.
+//	algorithm: The RNG algorithm.
+//	shape: The shape of the output tensor.
 //
-// Returns 3-D with shape `[height, width, channels]`.
-func DecodePng(scope *Scope, contents tf.Output, optional ...DecodePngAttr) (image tf.Output) {
+// Returns Random values with specified shape.
+func StatefulTruncatedNormal(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, optional ...StatefulTruncatedNormalAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -37662,9 +37561,9 @@ func DecodePng(scope *Scope, contents tf.Output, optional ...DecodePngAttr) (ima
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "DecodePng",
+		Type: "StatefulTruncatedNormal",
 		Input: []tf.Input{
-			contents,
+			resource, algorithm, shape,
 		},
 		Attrs: attrs,
 	}
@@ -37672,30 +37571,94 @@ func DecodePng(scope *Scope, contents tf.Output, optional ...DecodePngAttr) (ima
 	return op.Output(0)
 }
 
-// AssertAttr is an optional argument to Assert.
-type AssertAttr func(optionalAttr)
-
-// AssertSummarize sets the optional summarize attribute to value.
+// Returns the next record (key, value pair) produced by a Reader.
 //
-// value: Print this many entries of each tensor.
-// If not specified, defaults to 3
-func AssertSummarize(value int64) AssertAttr {
+// Will dequeue from the input queue if necessary (e.g. when the
+// Reader needs to start reading from a new file since it has finished
+// with the previous file).
+//
+// Arguments:
+//	reader_handle: Handle to a Reader.
+//	queue_handle: Handle to a Queue, with string work items.
+//
+// Returns:
+//	key: A scalar.
+//	value: A scalar.
+func ReaderReadV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output) (key tf.Output, value tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReaderReadV2",
+		Input: []tf.Input{
+			reader_handle, queue_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// CumprodAttr is an optional argument to Cumprod.
+type CumprodAttr func(optionalAttr)
+
+// CumprodExclusive sets the optional exclusive attribute to value.
+//
+// value: If `True`, perform exclusive cumprod.
+// If not specified, defaults to false
+func CumprodExclusive(value bool) CumprodAttr {
 	return func(m optionalAttr) {
-		m["summarize"] = value
+		m["exclusive"] = value
 	}
 }
 
-// Asserts that the given condition is true.
+// CumprodReverse sets the optional reverse attribute to value.
 //
-// If `condition` evaluates to false, print the list of tensors in `data`.
-// `summarize` determines how many entries of the tensors to print.
+// value: A `bool` (default: False).
+// If not specified, defaults to false
+func CumprodReverse(value bool) CumprodAttr {
+	return func(m optionalAttr) {
+		m["reverse"] = value
+	}
+}
+
+// Compute the cumulative product of the tensor `x` along `axis`.
+//
+// By default, this op performs an inclusive cumprod, which means that the first
+// element of the input is identical to the first element of the output:
+//
+// ```python
+// tf.cumprod([a, b, c])  # => [a, a * b, a * b * c]
+// ```
+//
+// By setting the `exclusive` kwarg to `True`, an exclusive cumprod is
+// performed instead:
+//
+// ```python
+// tf.cumprod([a, b, c], exclusive=True)  # => [1, a, a * b]
+// ```
+//
+// By setting the `reverse` kwarg to `True`, the cumprod is performed in the
+// opposite direction:
+//
+// ```python
+// tf.cumprod([a, b, c], reverse=True)  # => [a * b * c, b * c, c]
+// ```
+//
+// This is more efficient than using separate `tf.reverse` ops.
+//
+// The `reverse` and `exclusive` kwargs can also be combined:
+//
+// ```python
+// tf.cumprod([a, b, c], exclusive=True, reverse=True)  # => [b * c, c, 1]
+// ```
 //
 // Arguments:
-//	condition: The condition to evaluate.
-//	data: The tensors to print out when condition is false.
-//
-// Returns the created operation.
-func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...AssertAttr) (o *tf.Operation) {
+//	x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
+// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
+// `complex128`, `qint8`, `quint8`, `qint32`, `half`.
+//	axis: A `Tensor` of type `int32` (default: 0). Must be in the range
+// `[-rank(x), rank(x))`.
+func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) (out tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -37704,122 +37667,1076 @@ func Assert(scope *Scope, condition tf.Output, data []tf.Output, optional ...Ass
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Assert",
+		Type: "Cumprod",
 		Input: []tf.Input{
-			condition, tf.OutputList(data),
+			x, axis,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the rank of a tensor.
+//
+// This operation returns an integer representing the rank of `input`.
+//
+// For example:
+//
+// ```
+// # 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
+// # shape of tensor 't' is [2, 2, 3]
+// rank(t) ==> 3
+// ```
+//
+// **Note**: The rank of a tensor is not the same as the rank of a matrix. The rank
+// of a tensor is the number of indices required to uniquely select each element
+// of the tensor. Rank is also known as "order", "degree", or "ndims."
+func Rank(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Rank",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// EncodeJpegAttr is an optional argument to EncodeJpeg.
+type EncodeJpegAttr func(optionalAttr)
+
+// EncodeJpegFormat sets the optional format attribute to value.
+//
+// value: Per pixel image format.
+// If not specified, defaults to ""
+func EncodeJpegFormat(value string) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["format"] = value
+	}
+}
+
+// EncodeJpegQuality sets the optional quality attribute to value.
+//
+// value: Quality of the compression from 0 to 100 (higher is better and slower).
+// If not specified, defaults to 95
+func EncodeJpegQuality(value int64) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["quality"] = value
+	}
+}
+
+// EncodeJpegProgressive sets the optional progressive attribute to value.
+//
+// value: If True, create a JPEG that loads progressively (coarse to fine).
+// If not specified, defaults to false
+func EncodeJpegProgressive(value bool) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["progressive"] = value
+	}
+}
+
+// EncodeJpegOptimizeSize sets the optional optimize_size attribute to value.
+//
+// value: If True, spend CPU/RAM to reduce size with no quality change.
+// If not specified, defaults to false
+func EncodeJpegOptimizeSize(value bool) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["optimize_size"] = value
+	}
+}
+
+// EncodeJpegChromaDownsampling sets the optional chroma_downsampling attribute to value.
+//
+// value: See http://en.wikipedia.org/wiki/Chroma_subsampling.
+// If not specified, defaults to true
+func EncodeJpegChromaDownsampling(value bool) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["chroma_downsampling"] = value
+	}
+}
+
+// EncodeJpegDensityUnit sets the optional density_unit attribute to value.
+//
+// value: Unit used to specify `x_density` and `y_density`:
+// pixels per inch (`'in'`) or centimeter (`'cm'`).
+// If not specified, defaults to "in"
+func EncodeJpegDensityUnit(value string) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["density_unit"] = value
+	}
+}
+
+// EncodeJpegXDensity sets the optional x_density attribute to value.
+//
+// value: Horizontal pixels per density unit.
+// If not specified, defaults to 300
+func EncodeJpegXDensity(value int64) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["x_density"] = value
+	}
+}
+
+// EncodeJpegYDensity sets the optional y_density attribute to value.
+//
+// value: Vertical pixels per density unit.
+// If not specified, defaults to 300
+func EncodeJpegYDensity(value int64) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["y_density"] = value
+	}
+}
+
+// EncodeJpegXmpMetadata sets the optional xmp_metadata attribute to value.
+//
+// value: If not empty, embed this XMP metadata in the image header.
+// If not specified, defaults to ""
+func EncodeJpegXmpMetadata(value string) EncodeJpegAttr {
+	return func(m optionalAttr) {
+		m["xmp_metadata"] = value
+	}
+}
+
+// JPEG-encode an image.
+//
+// `image` is a 3-D uint8 Tensor of shape `[height, width, channels]`.
+//
+// The attr `format` can be used to override the color format of the encoded
+// output.  Values can be:
+//
+// *   `''`: Use a default format based on the number of channels in the image.
+// *   `grayscale`: Output a grayscale JPEG image.  The `channels` dimension
+//     of `image` must be 1.
+// *   `rgb`: Output an RGB JPEG image. The `channels` dimension
+//     of `image` must be 3.
+//
+// If `format` is not specified or is the empty string, a default format is picked
+// in function of the number of channels in `image`:
+//
+// *   1: Output a grayscale image.
+// *   3: Output an RGB image.
+//
+// Arguments:
+//	image: 3-D with shape `[height, width, channels]`.
+//
+// Returns 0-D. JPEG-encoded image.
+func EncodeJpeg(scope *Scope, image tf.Output, optional ...EncodeJpegAttr) (contents tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "EncodeJpeg",
+		Input: []tf.Input{
+			image,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Checks whether a quantile stream has been initialized.
+//
+// An Op that checks if quantile stream resource is initialized.
+//
+// Arguments:
+//	quantile_stream_resource_handle: resource; The reference to quantile stream resource handle.
+//
+// Returns bool; True if the resource is initialized, False otherwise.
+func IsBoostedTreesQuantileStreamResourceInitialized(scope *Scope, quantile_stream_resource_handle tf.Output) (is_initialized tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "IsBoostedTreesQuantileStreamResourceInitialized",
+		Input: []tf.Input{
+			quantile_stream_resource_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingAdagradParametersGradAccumDebug.
+type RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingAdagradParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingAdagradParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingAdagradParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingAdagradParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve Adagrad embedding parameters with debug support.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the Adagrad optimization algorithm.
+//	accumulators: Parameter accumulators updated by the Adagrad optimization algorithm.
+//	gradient_accumulators: Parameter gradient_accumulators updated by the Adagrad optimization algorithm.
+func RetrieveTPUEmbeddingAdagradParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdagradParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingAdagradParametersGradAccumDebug",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// StatelessMultinomialAttr is an optional argument to StatelessMultinomial.
+type StatelessMultinomialAttr func(optionalAttr)
+
+// StatelessMultinomialOutputDtype sets the optional output_dtype attribute to value.
+// If not specified, defaults to DT_INT64
+func StatelessMultinomialOutputDtype(value tf.DataType) StatelessMultinomialAttr {
+	return func(m optionalAttr) {
+		m["output_dtype"] = value
+	}
+}
+
+// Draws samples from a multinomial distribution.
+//
+// Arguments:
+//	logits: 2-D Tensor with shape `[batch_size, num_classes]`.  Each slice `[i, :]`
+// represents the unnormalized log probabilities for all classes.
+//	num_samples: 0-D.  Number of independent samples to draw for each row slice.
+//	seed: 2 seeds (shape [2]).
+//
+// Returns 2-D Tensor with shape `[batch_size, num_samples]`.  Each slice `[i, :]`
+// contains the drawn class labels with range `[0, num_classes)`.
+func StatelessMultinomial(scope *Scope, logits tf.Output, num_samples tf.Output, seed tf.Output, optional ...StatelessMultinomialAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StatelessMultinomial",
+		Input: []tf.Input{
+			logits, num_samples, seed,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns a copy of the input tensor.
+func Snapshot(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Snapshot",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Adds up a `SparseTensor` and a dense `Tensor`, producing a dense `Tensor`.
+//
+// This Op does not require `a_indices` be sorted in standard lexicographic order.
+//
+// Arguments:
+//	a_indices: 2-D.  The `indices` of the `SparseTensor`, with shape `[nnz, ndims]`.
+//	a_values: 1-D.  The `values` of the `SparseTensor`, with shape `[nnz]`.
+//	a_shape: 1-D.  The `shape` of the `SparseTensor`, with shape `[ndims]`.
+//	b: `ndims`-D Tensor.  With shape `a_shape`.
+func SparseTensorDenseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseTensorDenseAdd",
+		Input: []tf.Input{
+			a_indices, a_values, a_shape, b,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// StatelessRandomNormalAttr is an optional argument to StatelessRandomNormal.
+type StatelessRandomNormalAttr func(optionalAttr)
+
+// StatelessRandomNormalDtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_FLOAT
+func StatelessRandomNormalDtype(value tf.DataType) StatelessRandomNormalAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs deterministic pseudorandom values from a normal distribution.
+//
+// The generated values will have mean 0 and standard deviation 1.
+//
+// The outputs are a deterministic function of `shape` and `seed`.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
+//
+// Returns Random values with specified shape.
+func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomNormalAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StatelessRandomNormal",
+		Input: []tf.Input{
+			shape, seed,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the sum along sparse segments of a tensor divided by the sqrt of N.
+//
+// N is the size of the segment being reduced.
+//
+// See `tf.sparse.segment_sum` for usage examples.
+//
+//
+// Arguments:
+//
+//	indices: A 1-D tensor. Has same rank as `segment_ids`.
+//	segment_ids: A 1-D tensor. Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SparseSegmentSqrtN(scope *Scope, data tf.Output, indices tf.Output, segment_ids tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSegmentSqrtN",
+		Input: []tf.Input{
+			data, indices, segment_ids,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// UnicodeDecodeWithOffsetsAttr is an optional argument to UnicodeDecodeWithOffsets.
+type UnicodeDecodeWithOffsetsAttr func(optionalAttr)
+
+// UnicodeDecodeWithOffsetsErrors sets the optional errors attribute to value.
+//
+// value: Error handling policy when there is invalid formatting found in the input.
+// The value of 'strict' will cause the operation to produce a InvalidArgument
+// error on any invalid input formatting. A value of 'replace' (the default) will
+// cause the operation to replace any invalid formatting in the input with the
+// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
+// skip any invalid formatting in the input and produce no corresponding output
+// character.
+// If not specified, defaults to "replace"
+func UnicodeDecodeWithOffsetsErrors(value string) UnicodeDecodeWithOffsetsAttr {
+	return func(m optionalAttr) {
+		m["errors"] = value
+	}
+}
+
+// UnicodeDecodeWithOffsetsReplacementChar sets the optional replacement_char attribute to value.
+//
+// value: The replacement character codepoint to be used in place of any invalid
+// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
+// be used. The default value is the default unicode replacement character is
+// 0xFFFD or U+65533.)
+// If not specified, defaults to 65533
+func UnicodeDecodeWithOffsetsReplacementChar(value int64) UnicodeDecodeWithOffsetsAttr {
+	return func(m optionalAttr) {
+		m["replacement_char"] = value
+	}
+}
+
+// UnicodeDecodeWithOffsetsReplaceControlCharacters sets the optional replace_control_characters attribute to value.
+//
+// value: Whether to replace the C0 control characters (00-1F) with the
+// `replacement_char`. Default is false.
+// If not specified, defaults to false
+func UnicodeDecodeWithOffsetsReplaceControlCharacters(value bool) UnicodeDecodeWithOffsetsAttr {
+	return func(m optionalAttr) {
+		m["replace_control_characters"] = value
+	}
+}
+
+// UnicodeDecodeWithOffsetsTsplits sets the optional Tsplits attribute to value.
+// If not specified, defaults to DT_INT64
+func UnicodeDecodeWithOffsetsTsplits(value tf.DataType) UnicodeDecodeWithOffsetsAttr {
+	return func(m optionalAttr) {
+		m["Tsplits"] = value
+	}
+}
+
+// Decodes each string in `input` into a sequence of Unicode code points.
+//
+// The character codepoints for all strings are returned using a single vector
+// `char_values`, with strings expanded to characters in row-major order.
+// Similarly, the character start byte offsets are returned using a single vector
+// `char_to_byte_starts`, with strings expanded in row-major order.
+//
+// The `row_splits` tensor indicates where the codepoints and start offsets for
+// each input string begin and end within the `char_values` and
+// `char_to_byte_starts` tensors.  In particular, the values for the `i`th
+// string (in row-major order) are stored in the slice
+// `[row_splits[i]:row_splits[i+1]]`. Thus:
+//
+// * `char_values[row_splits[i]+j]` is the Unicode codepoint for the `j`th
+//   character in the `i`th string (in row-major order).
+// * `char_to_bytes_starts[row_splits[i]+j]` is the start byte offset for the `j`th
+//   character in the `i`th string (in row-major order).
+// * `row_splits[i+1] - row_splits[i]` is the number of characters in the `i`th
+//   string (in row-major order).
+//
+// Arguments:
+//	input: The text to be decoded. Can have any shape. Note that the output is flattened
+// to a vector of char values.
+//	input_encoding: Text encoding of the input strings. This is any of the encodings supported
+// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
+//
+// Returns:
+//	row_splits: A 1D int32 tensor containing the row splits.
+//	char_values: A 1D int32 Tensor containing the decoded codepoints.
+//	char_to_byte_starts: A 1D int32 Tensor containing the byte index in the input string where each
+// character in `char_values` starts.
+func UnicodeDecodeWithOffsets(scope *Scope, input tf.Output, input_encoding string, optional ...UnicodeDecodeWithOffsetsAttr) (row_splits tf.Output, char_values tf.Output, char_to_byte_starts tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"input_encoding": input_encoding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "UnicodeDecodeWithOffsets",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+//   This op is used as a placeholder in If branch functions. It doesn't provide a
+//   valid output when run, so must either be removed (e.g. replaced with a
+//   function input) or guaranteed not to be used (e.g. if mirroring an
+//   intermediate output needed for the gradient computation of the other branch).
+//
+// Arguments:
+//	dtype: The type of the output.
+//	shape:     The purported shape of the output. This is only used for shape inference;
+//     the output will not necessarily have this shape. Can be a partial shape.
+//
+// Returns     \"Fake\" output value. This should not be consumed by another op.
+func FakeParam(scope *Scope, dtype tf.DataType, shape tf.Shape) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype, "shape": shape}
+	opspec := tf.OpSpec{
+		Type: "FakeParam",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// UnicodeTranscodeAttr is an optional argument to UnicodeTranscode.
+type UnicodeTranscodeAttr func(optionalAttr)
+
+// UnicodeTranscodeErrors sets the optional errors attribute to value.
+//
+// value: Error handling policy when there is invalid formatting found in the input.
+// The value of 'strict' will cause the operation to produce a InvalidArgument
+// error on any invalid input formatting. A value of 'replace' (the default) will
+// cause the operation to replace any invalid formatting in the input with the
+// `replacement_char` codepoint. A value of 'ignore' will cause the operation to
+// skip any invalid formatting in the input and produce no corresponding output
+// character.
+// If not specified, defaults to "replace"
+func UnicodeTranscodeErrors(value string) UnicodeTranscodeAttr {
+	return func(m optionalAttr) {
+		m["errors"] = value
+	}
+}
+
+// UnicodeTranscodeReplacementChar sets the optional replacement_char attribute to value.
+//
+// value: The replacement character codepoint to be used in place of any invalid
+// formatting in the input when `errors='replace'`. Any valid unicode codepoint may
+// be used. The default value is the default unicode replacement character is
+// 0xFFFD or U+65533.)
+//
+// Note that for UTF-8, passing a replacement character expressible in 1 byte, such
+// as ' ', will preserve string alignment to the source since invalid bytes will be
+// replaced with a 1-byte replacement. For UTF-16-BE and UTF-16-LE, any 1 or 2 byte
+// replacement character will preserve byte alignment to the source.
+// If not specified, defaults to 65533
+func UnicodeTranscodeReplacementChar(value int64) UnicodeTranscodeAttr {
+	return func(m optionalAttr) {
+		m["replacement_char"] = value
+	}
+}
+
+// UnicodeTranscodeReplaceControlCharacters sets the optional replace_control_characters attribute to value.
+//
+// value: Whether to replace the C0 control characters (00-1F) with the
+// `replacement_char`. Default is false.
+// If not specified, defaults to false
+func UnicodeTranscodeReplaceControlCharacters(value bool) UnicodeTranscodeAttr {
+	return func(m optionalAttr) {
+		m["replace_control_characters"] = value
+	}
+}
+
+// Transcode the input text from a source encoding to a destination encoding.
+//
+// The input is a string tensor of any shape. The output is a string tensor of
+// the same shape containing the transcoded strings. Output strings are always
+// valid unicode. If the input contains invalid encoding positions, the
+// `errors` attribute sets the policy for how to deal with them. If the default
+// error-handling policy is used, invalid formatting will be substituted in the
+// output by the `replacement_char`. If the errors policy is to `ignore`, any
+// invalid encoding positions in the input are skipped and not included in the
+// output. If it set to `strict` then any invalid formatting will result in an
+// InvalidArgument error.
+//
+// This operation can be used with `output_encoding = input_encoding` to enforce
+// correct formatting for inputs even if they are already in the desired encoding.
+//
+// If the input is prefixed by a Byte Order Mark needed to determine encoding
+// (e.g. if the encoding is UTF-16 and the BOM indicates big-endian), then that
+// BOM will be consumed and not emitted into the output. If the input encoding
+// is marked with an explicit endianness (e.g. UTF-16-BE), then the BOM is
+// interpreted as a non-breaking-space and is preserved in the output (including
+// always for UTF-8).
+//
+// The end result is that if the input is marked as an explicit endianness the
+// transcoding is faithful to all codepoints in the source. If it is not marked
+// with an explicit endianness, the BOM is not considered part of the string itself
+// but as metadata, and so is not preserved in the output.
+//
+// Arguments:
+//	input: The text to be processed. Can have any shape.
+//	input_encoding: Text encoding of the input strings. This is any of the encodings supported
+// by ICU ucnv algorithmic converters. Examples: `"UTF-16", "US ASCII", "UTF-8"`.
+//	output_encoding: The unicode encoding to use in the output. Must be one of
+// `"UTF-8", "UTF-16-BE", "UTF-32-BE"`. Multi-byte encodings will be big-endian.
+//
+// Returns A string tensor containing unicode text encoded using `output_encoding`.
+func UnicodeTranscode(scope *Scope, input tf.Output, input_encoding string, output_encoding string, optional ...UnicodeTranscodeAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"input_encoding": input_encoding, "output_encoding": output_encoding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "UnicodeTranscode",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns x // y element-wise.
+//
+// *NOTE*: `FloorDiv` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func FloorDiv(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "FloorDiv",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// An Op to permute tensors across replicated TPU instances.
+//
+// Each instance supplies its own input.
+//
+// For example, suppose there are 4 TPU instances: `[A, B, C, D]`. Passing
+// source_target_pairs=`[[0,1],[1,2],[2,3],[3,0]]` gets the outputs:
+// `[D, A, B, C]`.
+//
+// Arguments:
+//	input: The local input to be permuted. Currently only supports float and
+// bfloat16.
+//	source_target_pairs: A tensor with shape [num_pairs, 2].
+//
+// Returns The permuted input.
+func CollectivePermute(scope *Scope, input tf.Output, source_target_pairs tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "CollectivePermute",
+		Input: []tf.Input{
+			input, source_target_pairs,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// QuantizedReluXAttr is an optional argument to QuantizedReluX.
+type QuantizedReluXAttr func(optionalAttr)
+
+// QuantizedReluXOutType sets the optional out_type attribute to value.
+// If not specified, defaults to DT_QUINT8
+func QuantizedReluXOutType(value tf.DataType) QuantizedReluXAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Computes Quantized Rectified Linear X: `min(max(features, 0), max_value)`
+//
+// Arguments:
+//
+//
+//	min_features: The float value that the lowest quantized value represents.
+//	max_features: The float value that the highest quantized value represents.
+//
+// Returns:
+//	activations: Has the same output shape as "features".
+//	min_activations: The float value that the lowest quantized value represents.
+//	max_activations: The float value that the highest quantized value represents.
+func QuantizedReluX(scope *Scope, features tf.Output, max_value tf.Output, min_features tf.Output, max_features tf.Output, optional ...QuantizedReluXAttr) (activations tf.Output, min_activations tf.Output, max_activations tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "QuantizedReluX",
+		Input: []tf.Input{
+			features, max_value, min_features, max_features,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// PrelinearizeTupleAttr is an optional argument to PrelinearizeTuple.
+type PrelinearizeTupleAttr func(optionalAttr)
+
+// PrelinearizeTupleLayouts sets the optional layouts attribute to value.
+//
+// value: A vector holding the requested layout in minor-to-major sequence for all the
+// tuple shapes in the order the shapes appear in the "shapes" input. The layout
+// elements for a sub-shape can be set to -1 in which case the corresponding layout
+// will be computed by the infeed operation.
+// If not specified, defaults to {}
+func PrelinearizeTupleLayouts(value []int64) PrelinearizeTupleAttr {
+	return func(m optionalAttr) {
+		m["layouts"] = value
+	}
+}
+
+// An op which linearizes multiple Tensor values to an opaque variant tensor.
+//
+// Arguments:
+//	inputs: A list of tensors that will be provided using the infeed mechanism.
+//	shapes: The shapes of each tensor in `inputs`.
+func PrelinearizeTuple(scope *Scope, inputs []tf.Output, shapes []tf.Shape, optional ...PrelinearizeTupleAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"shapes": shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "PrelinearizeTuple",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the LSTM cell backward propagation for the entire time sequence.
+//
+// This implementation is to be used in conjunction of LSTMBlock.
+//
+// Arguments:
+//	seq_len_max: Maximum time length actually used by this input. Outputs are padded
+// with zeros beyond this length.
+//	x: The sequence input to the LSTM, shape (timelen, batch_size, num_inputs).
+//	cs_prev: Value of the initial cell state.
+//	h_prev: Initial output of cell (to be used for peephole).
+//	w: The weight matrix.
+//	wci: The weight matrix for input gate peephole connection.
+//	wcf: The weight matrix for forget gate peephole connection.
+//	wco: The weight matrix for output gate peephole connection.
+//	b: The bias vector.
+//	i: The input gate over the whole time sequence.
+//	cs: The cell state before the tanh over the whole time sequence.
+//	f: The forget gate over the whole time sequence.
+//	o: The output gate over the whole time sequence.
+//	ci: The cell input over the whole time sequence.
+//	co: The cell after the tanh over the whole time sequence.
+//	h: The output h vector over the whole time sequence.
+//	cs_grad: The current gradient of cs.
+//	h_grad: The gradient of h vector.
+//	use_peephole: Whether to use peephole weights.
+//
+// Returns:
+//	x_grad: The gradient of x to be back-propped.
+//	cs_prev_grad: The gradient of cs_prev to be back-propped.
+//	h_prev_grad: The gradient of h_prev to be back-propped.
+//	w_grad: The gradient for w to be back-propped.
+//	wci_grad: The gradient for wci to be back-propped.
+//	wcf_grad: The gradient for wcf to be back-propped.
+//	wco_grad: The gradient for wco to be back-propped.
+//	b_grad: The gradient for w to be back-propped.
+func BlockLSTMGrad(scope *Scope, seq_len_max tf.Output, x tf.Output, cs_prev tf.Output, h_prev tf.Output, w tf.Output, wci tf.Output, wcf tf.Output, wco tf.Output, b tf.Output, i tf.Output, cs tf.Output, f tf.Output, o tf.Output, ci tf.Output, co tf.Output, h tf.Output, cs_grad tf.Output, h_grad tf.Output, use_peephole bool) (x_grad tf.Output, cs_prev_grad tf.Output, h_prev_grad tf.Output, w_grad tf.Output, wci_grad tf.Output, wcf_grad tf.Output, wco_grad tf.Output, b_grad tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"use_peephole": use_peephole}
+	opspec := tf.OpSpec{
+		Type: "BlockLSTMGrad",
+		Input: []tf.Input{
+			seq_len_max, x, cs_prev, h_prev, w, wci, wcf, wco, b, i, cs, f, o, ci, co, h, cs_grad, h_grad,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6), op.Output(7)
+}
+
+// OutfeedDequeueTupleAttr is an optional argument to OutfeedDequeueTuple.
+type OutfeedDequeueTupleAttr func(optionalAttr)
+
+// OutfeedDequeueTupleDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. This should be -1 when the Op
+// is running on a TPU device, and >= 0 when the Op is running on the CPU
+// device.
+// If not specified, defaults to -1
+func OutfeedDequeueTupleDeviceOrdinal(value int64) OutfeedDequeueTupleAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// Retrieve multiple values from the computation outfeed.
+//
+// This operation will block indefinitely until data is available. Output `i`
+// corresponds to XLA tuple element `i`.
+//
+// Arguments:
+//	dtypes: The element types of each element in `outputs`.
+//	shapes: The shapes of each tensor in `outputs`.
+//
+// Returns A list of tensors that will be read from the outfeed.
+func OutfeedDequeueTuple(scope *Scope, dtypes []tf.DataType, shapes []tf.Shape, optional ...OutfeedDequeueTupleAttr) (outputs []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes, "shapes": shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "OutfeedDequeueTuple",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
+		scope.UpdateErr("OutfeedDequeueTuple", err)
+		return
+	}
+	return outputs
+}
+
+// DecodeCompressedAttr is an optional argument to DecodeCompressed.
+type DecodeCompressedAttr func(optionalAttr)
+
+// DecodeCompressedCompressionType sets the optional compression_type attribute to value.
+//
+// value: A scalar containing either (i) the empty string (no
+// compression), (ii) "ZLIB", or (iii) "GZIP".
+// If not specified, defaults to ""
+func DecodeCompressedCompressionType(value string) DecodeCompressedAttr {
+	return func(m optionalAttr) {
+		m["compression_type"] = value
+	}
+}
+
+// Decompress strings.
+//
+// This op decompresses each element of the `bytes` input `Tensor`, which
+// is assumed to be compressed using the given `compression_type`.
+//
+// The `output` is a string `Tensor` of the same shape as `bytes`,
+// each element containing the decompressed data from the corresponding
+// element in `bytes`.
+//
+// Arguments:
+//	bytes: A Tensor of string which is compressed.
+//
+// Returns A Tensor with the same shape as input `bytes`, uncompressed
+// from bytes.
+func DecodeCompressed(scope *Scope, bytes tf.Output, optional ...DecodeCompressedAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodeCompressed",
+		Input: []tf.Input{
+			bytes,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Converts each string in the input Tensor to its hash mod by a number of buckets.
+//
+// The hash function is deterministic on the content of the string within the
+// process. The hash function is a keyed hash function, where attribute `key`
+// defines the key of the hash function. `key` is an array of 2 elements.
+//
+// A strong hash is important when inputs may be malicious, e.g. URLs with
+// additional components. Adversaries could try to make their inputs hash to the
+// same bucket for a denial-of-service attack or to skew the results. A strong
+// hash can be used to make it difficult to find inputs with a skewed hash value
+// distribution over buckets. This requires that the hash function is
+// seeded by a high-entropy (random) "key" unknown to the adversary.
+//
+// The additional robustness comes at a cost of roughly 4x higher compute
+// time than `tf.string_to_hash_bucket_fast`.
+//
+// Arguments:
+//	input: The strings to assign a hash bucket.
+//	num_buckets: The number of buckets.
+//	key: The key used to seed the hash function, passed as a list of two uint64
+// elements.
+//
+// Returns A Tensor of the same shape as the input `string_tensor`.
+func StringToHashBucketStrong(scope *Scope, input tf.Output, num_buckets int64, key []int64) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_buckets": num_buckets, "key": key}
+	opspec := tf.OpSpec{
+		Type: "StringToHashBucketStrong",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Deserialize bucket boundaries and ready flag into current QuantileAccumulator.
+//
+// An op that deserializes bucket boundaries and are boundaries ready flag into current QuantileAccumulator.
+//
+// Arguments:
+//	quantile_stream_resource_handle: resource handle referring to a QuantileStreamResource.
+//	bucket_boundaries: float; List of Rank 1 Tensors each containing the bucket boundaries for a feature.
+//
+// Returns the created operation.
+func BoostedTreesQuantileStreamResourceDeserialize(scope *Scope, quantile_stream_resource_handle tf.Output, bucket_boundaries []tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesQuantileStreamResourceDeserialize",
+		Input: []tf.Input{
+			quantile_stream_resource_handle, tf.OutputList(bucket_boundaries),
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// ResourceApplyAdadeltaAttr is an optional argument to ResourceApplyAdadelta.
+type ResourceApplyAdadeltaAttr func(optionalAttr)
+
+// ResourceApplyAdadeltaUseLocking sets the optional use_locking attribute to value.
+//
+// value: If True, updating of the var, accum and update_accum tensors will be protected by
+// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceApplyAdadeltaUseLocking(value bool) ResourceApplyAdadeltaAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the adadelta scheme.
+//
+// accum = rho() * accum + (1 - rho()) * grad.square();
+// update = (update_accum + epsilon).sqrt() * (accum + epsilon()).rsqrt() * grad;
+// update_accum = rho() * update_accum + (1 - rho()) * update.square();
+// var -= update;
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	accum_update: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	rho: Decay factor. Must be a scalar.
+//	epsilon: Constant factor. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdadeltaAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyAdadelta",
+		Input: []tf.Input{
+			var_, accum, accum_update, lr, rho, epsilon, grad,
 		},
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// Slice a `SparseTensor` based on the `start` and `size`.
+// Converts each string in the input Tensor to its hash mod by a number of buckets.
 //
-// For example, if the input is
-//
-//     input_tensor = shape = [2, 7]
-//     [    a   d e  ]
-//     [b c          ]
-//
-// Graphically the output tensors are:
-//
-//     sparse_slice([0, 0], [2, 4]) = shape = [2, 4]
-//     [    a  ]
-//     [b c    ]
-//
-//     sparse_slice([0, 4], [2, 3]) = shape = [2, 3]
-//     [ d e  ]
-//     [      ]
+// The hash function is deterministic on the content of the string within the
+// process and will never change. However, it is not suitable for cryptography.
+// This function may be used when CPU time is scarce and inputs are trusted or
+// unimportant. There is a risk of adversaries constructing inputs that all hash
+// to the same bucket. To prevent this problem, use a strong hash function with
+// `tf.string_to_hash_bucket_strong`.
 //
 // Arguments:
-//	indices: 2-D tensor represents the indices of the sparse tensor.
-//	values: 1-D tensor represents the values of the sparse tensor.
-//	shape: 1-D. tensor represents the shape of the sparse tensor.
-//	start: 1-D. tensor represents the start of the slice.
-//	size: 1-D. tensor represents the size of the slice.
-// output indices: A list of 1-D tensors represents the indices of the output
-// sparse tensors.
+//	input: The strings to assign a hash bucket.
+//	num_buckets: The number of buckets.
 //
-// Returns A list of 1-D tensors represents the values of the output sparse
-// tensors.A list of 1-D tensors represents the shape of the output sparse
-// tensors.
-func SparseSlice(scope *Scope, indices tf.Output, values tf.Output, shape tf.Output, start tf.Output, size tf.Output) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
+// Returns A Tensor of the same shape as the input `string_tensor`.
+func StringToHashBucketFast(scope *Scope, input tf.Output, num_buckets int64) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
+	attrs := map[string]interface{}{"num_buckets": num_buckets}
 	opspec := tf.OpSpec{
-		Type: "SparseSlice",
+		Type: "StringToHashBucketFast",
 		Input: []tf.Input{
-			indices, values, shape, start, size,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Outputs a `Summary` protocol buffer with a tensor and per-plugin data.
-//
-// Arguments:
-//	tag: A string attached to this summary. Used for organization in TensorBoard.
-//	tensor: A tensor to serialize.
-//	serialized_summary_metadata: A serialized SummaryMetadata proto. Contains plugin
-// data.
-func TensorSummaryV2(scope *Scope, tag tf.Output, tensor tf.Output, serialized_summary_metadata tf.Output) (summary tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorSummaryV2",
-		Input: []tf.Input{
-			tag, tensor, serialized_summary_metadata,
+			input,
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// TensorSummaryAttr is an optional argument to TensorSummary.
-type TensorSummaryAttr func(optionalAttr)
+// StringJoinAttr is an optional argument to StringJoin.
+type StringJoinAttr func(optionalAttr)
 
-// TensorSummaryDescription sets the optional description attribute to value.
+// StringJoinSeparator sets the optional separator attribute to value.
 //
-// value: A json-encoded SummaryDescription proto.
+// value: string, an optional join separator.
 // If not specified, defaults to ""
-func TensorSummaryDescription(value string) TensorSummaryAttr {
+func StringJoinSeparator(value string) StringJoinAttr {
 	return func(m optionalAttr) {
-		m["description"] = value
+		m["separator"] = value
 	}
 }
 
-// TensorSummaryLabels sets the optional labels attribute to value.
+// Joins the strings in the given list of string tensors into one tensor;
 //
-// value: An unused list of strings.
-// If not specified, defaults to <>
-func TensorSummaryLabels(value []string) TensorSummaryAttr {
-	return func(m optionalAttr) {
-		m["labels"] = value
-	}
-}
-
-// TensorSummaryDisplayName sets the optional display_name attribute to value.
-//
-// value: An unused string.
-// If not specified, defaults to ""
-func TensorSummaryDisplayName(value string) TensorSummaryAttr {
-	return func(m optionalAttr) {
-		m["display_name"] = value
-	}
-}
-
-// Outputs a `Summary` protocol buffer with a tensor.
-//
-// This op is being phased out in favor of TensorSummaryV2, which lets callers pass
-// a tag as well as a serialized SummaryMetadata proto string that contains
-// plugin-specific data. We will keep this op to maintain backwards compatibility.
+// with the given separator (default is an empty separator).
 //
 // Arguments:
-//	tensor: A tensor to serialize.
-func TensorSummary(scope *Scope, tensor tf.Output, optional ...TensorSummaryAttr) (summary tf.Output) {
+//	inputs: A list of string tensors.  The tensors must all have the same shape,
+// or be scalars.  Scalars may be mixed in; these will be broadcast to the shape
+// of non-scalar inputs.
+func StringJoin(scope *Scope, inputs []tf.Output, optional ...StringJoinAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -37828,505 +38745,138 @@ func TensorSummary(scope *Scope, tensor tf.Output, optional ...TensorSummaryAttr
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorSummary",
-		Input: []tf.Input{
-			tensor,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns a batched diagonal tensor with given batched diagonal values.
-//
-// Returns a tensor with the contents in `diagonal` as `k[0]`-th to `k[1]`-th
-// diagonals of a matrix, with everything else padded with `padding`. `num_rows`
-// and `num_cols` specify the dimension of the innermost matrix of the output. If
-// both are not specified, the op assumes the innermost matrix is square and infers
-// its size from `k` and the innermost dimension of `diagonal`. If only one of them
-// is specified, the op assumes the unspecified value is the smallest possible
-// based on other criteria.
-//
-// Let `diagonal` have `r` dimensions `[I, J, ..., L, M, N]`. The output tensor has
-// rank `r+1` with shape `[I, J, ..., L, M, num_rows, num_cols]` when only one
-// diagonal is given (`k` is an integer or `k[0] == k[1]`). Otherwise, it has rank
-// `r` with shape `[I, J, ..., L, num_rows, num_cols]`.
-//
-// The second innermost dimension of `diagonal` has double meaning.
-// When `k` is scalar or `k[0] == k[1]`, `M` is part of the batch size
-// [I, J, ..., M], and the output tensor is:
-//
-// ```
-// output[i, j, ..., l, m, n]
-//   = diagonal[i, j, ..., l, n-max(d_upper, 0)] ; if n - m == d_upper
-//     output[i, j, ..., l, m, n]                ; otherwise
-// ```
-//
-// Otherwise, `M` is treated as the number of diagonals for the matrix in the
-// same batch (`M = k[1]-k[0]+1`), and the output tensor is:
-//
-// ```
-// output[i, j, ..., l, m, n]
-//   = diagonal[i, j, ..., l, k[1]-d, n-max(d, 0)] ; if d_lower <= d <= d_upper
-//     input[i, j, ..., l, m, n]                   ; otherwise
-// ```
-// where `d = n - m`
-//
-// For example:
-//
-// ```
-// # The main diagonal.
-// diagonal = np.array([[1, 2, 3, 4],            # Input shape: (2, 4)
-//                      [5, 6, 7, 8]])
-// tf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0],  # Output shape: (2, 4, 4)
-//                                [0, 2, 0, 0],
-//                                [0, 0, 3, 0],
-//                                [0, 0, 0, 4]],
-//                               [[5, 0, 0, 0],
-//                                [0, 6, 0, 0],
-//                                [0, 0, 7, 0],
-//                                [0, 0, 0, 8]]]
-//
-// # A superdiagonal (per batch).
-// diagonal = np.array([[1, 2, 3],  # Input shape: (2, 3)
-//                      [4, 5, 6]])
-// tf.matrix_diag(diagonal, k = 1)
-//   ==> [[[0, 1, 0, 0],  # Output shape: (2, 4, 4)
-//         [0, 0, 2, 0],
-//         [0, 0, 0, 3],
-//         [0, 0, 0, 0]],
-//        [[0, 4, 0, 0],
-//         [0, 0, 5, 0],
-//         [0, 0, 0, 6],
-//         [0, 0, 0, 0]]]
-//
-// # A band of diagonals.
-// diagonals = np.array([[[1, 2, 3],  # Input shape: (2, 2, 3)
-//                        [4, 5, 0]],
-//                       [[6, 7, 9],
-//                        [9, 1, 0]]])
-// tf.matrix_diag(diagonals, k = (-1, 0))
-//   ==> [[[1, 0, 0],  # Output shape: (2, 3, 3)
-//         [4, 2, 0],
-//         [0, 5, 3]],
-//        [[6, 0, 0],
-//         [9, 7, 0],
-//         [0, 1, 9]]]
-//
-// # Rectangular matrix.
-// diagonal = np.array([1, 2])  # Input shape: (2)
-// tf.matrix_diag(diagonal, k = -1, num_rows = 3, num_cols = 4)
-//   ==> [[0, 0, 0, 0],  # Output shape: (3, 4)
-//        [1, 0, 0, 0],
-//        [0, 2, 0, 0]]
-//
-// # Rectangular matrix with inferred num_cols and padding = 9.
-// tf.matrix_diag(diagonal, k = -1, num_rows = 3, padding = 9)
-//   ==> [[9, 9],  # Output shape: (3, 2)
-//        [1, 9],
-//        [9, 2]]
-// ```
-//
-// Arguments:
-//	diagonal: Rank `r`, where `r >= 1`
-//	k: Diagonal offset(s). Positive value means superdiagonal, 0 refers to the main
-// diagonal, and negative value means subdiagonals. `k` can be a single integer
-// (for a single diagonal) or a pair of integers specifying the low and high ends
-// of a matrix band. `k[0]` must not be larger than `k[1]`.
-//	num_rows: The number of rows of the output matrix. If it is not provided, the op assumes
-// the output matrix is a square matrix and infers the matrix size from k and the
-// innermost dimension of `diagonal`.
-//	num_cols: The number of columns of the output matrix. If it is not provided, the op
-// assumes the output matrix is a square matrix and infers the matrix size from
-// k and the innermost dimension of `diagonal`.
-//	padding_value: The number to fill the area outside the specified diagonal band with.
-// Default is 0.
-//
-// Returns Has rank `r+1` when `k` is an integer or `k[0] == k[1]`, rank `r` otherwise.
-func MatrixDiagV2(scope *Scope, diagonal tf.Output, k tf.Output, num_rows tf.Output, num_cols tf.Output, padding_value tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MatrixDiagV2",
-		Input: []tf.Input{
-			diagonal, k, num_rows, num_cols, padding_value,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Outputs a `Summary` protocol buffer with scalar values.
-//
-// The input `tags` and `values` must have the same shape.  The generated summary
-// has a summary value for each tag-value pair in `tags` and `values`.
-//
-// Arguments:
-//	tags: Tags for the summary.
-//	values: Same shape as `tags.  Values for the summary.
-//
-// Returns Scalar.  Serialized `Summary` protocol buffer.
-func ScalarSummary(scope *Scope, tags tf.Output, values tf.Output) (summary tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ScalarSummary",
-		Input: []tf.Input{
-			tags, values,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// TensorArrayV2Attr is an optional argument to TensorArrayV2.
-type TensorArrayV2Attr func(optionalAttr)
-
-// TensorArrayV2ElementShape sets the optional element_shape attribute to value.
-// If not specified, defaults to <unknown_rank:true >
-func TensorArrayV2ElementShape(value tf.Shape) TensorArrayV2Attr {
-	return func(m optionalAttr) {
-		m["element_shape"] = value
-	}
-}
-
-// TensorArrayV2DynamicSize sets the optional dynamic_size attribute to value.
-// If not specified, defaults to false
-func TensorArrayV2DynamicSize(value bool) TensorArrayV2Attr {
-	return func(m optionalAttr) {
-		m["dynamic_size"] = value
-	}
-}
-
-// TensorArrayV2ClearAfterRead sets the optional clear_after_read attribute to value.
-// If not specified, defaults to true
-func TensorArrayV2ClearAfterRead(value bool) TensorArrayV2Attr {
-	return func(m optionalAttr) {
-		m["clear_after_read"] = value
-	}
-}
-
-// TensorArrayV2TensorArrayName sets the optional tensor_array_name attribute to value.
-// If not specified, defaults to ""
-func TensorArrayV2TensorArrayName(value string) TensorArrayV2Attr {
-	return func(m optionalAttr) {
-		m["tensor_array_name"] = value
-	}
-}
-
-// Deprecated. Use TensorArrayV3
-//
-// DEPRECATED at GraphDef version 26: Use TensorArrayV3
-func TensorArrayV2(scope *Scope, size tf.Output, dtype tf.DataType, optional ...TensorArrayV2Attr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorArrayV2",
-		Input: []tf.Input{
-			size,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Outputs a `Summary` protocol buffer with a histogram.
-//
-// The generated
-// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
-// has one summary value containing a histogram for `values`.
-//
-// This op reports an `InvalidArgument` error if any value is not finite.
-//
-// Arguments:
-//	tag: Scalar.  Tag to use for the `Summary.Value`.
-//	values: Any shape. Values to use to build the histogram.
-//
-// Returns Scalar. Serialized `Summary` protocol buffer.
-func HistogramSummary(scope *Scope, tag tf.Output, values tf.Output) (summary tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "HistogramSummary",
-		Input: []tf.Input{
-			tag, values,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ImageSummaryAttr is an optional argument to ImageSummary.
-type ImageSummaryAttr func(optionalAttr)
-
-// ImageSummaryMaxImages sets the optional max_images attribute to value.
-//
-// value: Max number of batch elements to generate images for.
-// If not specified, defaults to 3
-//
-// REQUIRES: value >= 1
-func ImageSummaryMaxImages(value int64) ImageSummaryAttr {
-	return func(m optionalAttr) {
-		m["max_images"] = value
-	}
-}
-
-// ImageSummaryBadColor sets the optional bad_color attribute to value.
-//
-// value: Color to use for pixels with non-finite values.
-// If not specified, defaults to <dtype:DT_UINT8 tensor_shape:<dim:<size:4 > > int_val:255 int_val:0 int_val:0 int_val:255 >
-func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr {
-	return func(m optionalAttr) {
-		m["bad_color"] = value
-	}
-}
-
-// Outputs a `Summary` protocol buffer with images.
-//
-// The summary has up to `max_images` summary values containing images. The
-// images are built from `tensor` which must be 4-D with shape `[batch_size,
-// height, width, channels]` and where `channels` can be:
-//
-// *  1: `tensor` is interpreted as Grayscale.
-// *  3: `tensor` is interpreted as RGB.
-// *  4: `tensor` is interpreted as RGBA.
-//
-// The images have the same number of channels as the input tensor. For float
-// input, the values are normalized one image at a time to fit in the range
-// `[0, 255]`.  `uint8` values are unchanged.  The op uses two different
-// normalization algorithms:
-//
-// *  If the input values are all positive, they are rescaled so the largest one
-//    is 255.
-//
-// *  If any input value is negative, the values are shifted so input value 0.0
-//    is at 127.  They are then rescaled so that either the smallest value is 0,
-//    or the largest one is 255.
-//
-// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
-// build the `tag` of the summary values:
-//
-// *  If `max_images` is 1, the summary value tag is '*tag*/image'.
-// *  If `max_images` is greater than 1, the summary value tags are
-//    generated sequentially as '*tag*/image/0', '*tag*/image/1', etc.
-//
-// The `bad_color` argument is the color to use in the generated images for
-// non-finite input values.  It is a `uint8` 1-D tensor of length `channels`.
-// Each element must be in the range `[0, 255]` (It represents the value of a
-// pixel in the output image).  Non-finite values in the input tensor are
-// replaced by this tensor in the output image.  The default value is the color
-// red.
-//
-// Arguments:
-//	tag: Scalar. Used to build the `tag` attribute of the summary values.
-//	tensor: 4-D of shape `[batch_size, height, width, channels]` where
-// `channels` is 1, 3, or 4.
-//
-// Returns Scalar. Serialized `Summary` protocol buffer.
-func ImageSummary(scope *Scope, tag tf.Output, tensor tf.Output, optional ...ImageSummaryAttr) (summary tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ImageSummary",
-		Input: []tf.Input{
-			tag, tensor,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResizeBilinearGradAttr is an optional argument to ResizeBilinearGrad.
-type ResizeBilinearGradAttr func(optionalAttr)
-
-// ResizeBilinearGradAlignCorners sets the optional align_corners attribute to value.
-//
-// value: If true, the centers of the 4 corner pixels of the input and grad tensors are
-// aligned. Defaults to false.
-// If not specified, defaults to false
-func ResizeBilinearGradAlignCorners(value bool) ResizeBilinearGradAttr {
-	return func(m optionalAttr) {
-		m["align_corners"] = value
-	}
-}
-
-// ResizeBilinearGradHalfPixelCenters sets the optional half_pixel_centers attribute to value.
-// If not specified, defaults to false
-func ResizeBilinearGradHalfPixelCenters(value bool) ResizeBilinearGradAttr {
-	return func(m optionalAttr) {
-		m["half_pixel_centers"] = value
-	}
-}
-
-// Computes the gradient of bilinear interpolation.
-//
-// Arguments:
-//	grads: 4-D with shape `[batch, height, width, channels]`.
-//	original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`,
-// The image tensor that was resized.
-//
-// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`.
-// Gradients with respect to the input image. Input image must have been
-// float or double.
-func ResizeBilinearGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBilinearGradAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResizeBilinearGrad",
-		Input: []tf.Input{
-			grads, original_image,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Merges summaries.
-//
-// This op creates a
-// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
-// protocol buffer that contains the union of all the values in the input
-// summaries.
-//
-// When the Op is run, it reports an `InvalidArgument` error if multiple values
-// in the summaries to merge use the same tag.
-//
-// Arguments:
-//	inputs: Can be of any shape.  Each must contain serialized `Summary` protocol
-// buffers.
-//
-// Returns Scalar. Serialized `Summary` protocol buffer.
-func MergeSummary(scope *Scope, inputs []tf.Output) (summary tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MergeSummary",
+		Type: "StringJoin",
 		Input: []tf.Input{
 			tf.OutputList(inputs),
 		},
+		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// CudnnRNNV2Attr is an optional argument to CudnnRNNV2.
-type CudnnRNNV2Attr func(optionalAttr)
-
-// CudnnRNNV2RnnMode sets the optional rnn_mode attribute to value.
-// If not specified, defaults to "lstm"
-func CudnnRNNV2RnnMode(value string) CudnnRNNV2Attr {
-	return func(m optionalAttr) {
-		m["rnn_mode"] = value
-	}
-}
-
-// CudnnRNNV2InputMode sets the optional input_mode attribute to value.
-// If not specified, defaults to "linear_input"
-func CudnnRNNV2InputMode(value string) CudnnRNNV2Attr {
-	return func(m optionalAttr) {
-		m["input_mode"] = value
-	}
-}
-
-// CudnnRNNV2Direction sets the optional direction attribute to value.
-// If not specified, defaults to "unidirectional"
-func CudnnRNNV2Direction(value string) CudnnRNNV2Attr {
-	return func(m optionalAttr) {
-		m["direction"] = value
-	}
-}
-
-// CudnnRNNV2Dropout sets the optional dropout attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNV2Dropout(value float32) CudnnRNNV2Attr {
-	return func(m optionalAttr) {
-		m["dropout"] = value
-	}
-}
-
-// CudnnRNNV2Seed sets the optional seed attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNV2Seed(value int64) CudnnRNNV2Attr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// CudnnRNNV2Seed2 sets the optional seed2 attribute to value.
-// If not specified, defaults to 0
-func CudnnRNNV2Seed2(value int64) CudnnRNNV2Attr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// CudnnRNNV2IsTraining sets the optional is_training attribute to value.
-// If not specified, defaults to true
-func CudnnRNNV2IsTraining(value bool) CudnnRNNV2Attr {
-	return func(m optionalAttr) {
-		m["is_training"] = value
-	}
-}
-
-// A RNN backed by cuDNN.
+// Replaces the contents of the table with the specified keys and values.
 //
-// Computes the RNN from the input and initial states, with respect to the params
-// buffer. Produces one extra output "host_reserved" than CudnnRNN.
+// The tensor `keys` must be of the same type as the keys of the table.
+// The tensor `values` must be of the type of the table values.
 //
-// rnn_mode: Indicates the type of the RNN model.
-// input_mode: Indicates whether there is a linear projection between the input and
-//   the actual computation before the first layer. 'skip_input' is only allowed
-//   when input_size == num_units; 'auto_select' implies 'skip_input' when
-//   input_size == num_units; otherwise, it implies 'linear_input'.
-// direction: Indicates whether a bidirectional model will be used. Should be
-//   "unidirectional" or "bidirectional".
-// dropout: Dropout probability. When set to 0., dropout is disabled.
-// seed: The 1st part of a seed to initialize dropout.
-// seed2: The 2nd part of a seed to initialize dropout.
-// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size].
-// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size,
-//     num_units].
-// input_c: For LSTM, a 3-D tensor with the shape of
-//     [num_layer * dir, batch, num_units]. For other models, it is ignored.
-// params: A 1-D tensor that contains the weights and biases in an opaque layout.
-//     The size must be created through CudnnRNNParamsSize, and initialized
-//     separately. Note that they might not be compatible across different
-//     generations. So it is a good idea to save and restore
-// output: A 3-D tensor with the shape of [seq_length, batch_size,
-//     dir * num_units].
-// output_h: The same shape has input_h.
-// output_c: The same shape as input_c for LSTM. An empty tensor for other models.
-// is_training: Indicates whether this operation is used for inferenece or
-//   training.
-// reserve_space: An opaque tensor that can be used in backprop calculation. It
-//   is only produced if is_training is true.
-// host_reserved: An opaque tensor that can be used in backprop calculation. It is
-//   only produced if is_training is true. It is output on host memory rather than
-//   device memory.
-func CudnnRNNV2(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, optional ...CudnnRNNV2Attr) (output tf.Output, output_h tf.Output, output_c tf.Output, reserve_space tf.Output, host_reserved tf.Output) {
+// Arguments:
+//	table_handle: Handle to the table.
+//	keys: Any shape.  Keys to look up.
+//	values: Values to associate with keys.
+//
+// Returns the created operation.
+func LookupTableImportV2(scope *Scope, table_handle tf.Output, keys tf.Output, values tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "LookupTableImportV2",
+		Input: []tf.Input{
+			table_handle, keys, values,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// LoadTPUEmbeddingMomentumParametersAttr is an optional argument to LoadTPUEmbeddingMomentumParameters.
+type LoadTPUEmbeddingMomentumParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingMomentumParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingMomentumParametersTableId(value int64) LoadTPUEmbeddingMomentumParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingMomentumParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingMomentumParametersTableName(value string) LoadTPUEmbeddingMomentumParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingMomentumParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingMomentumParametersConfig(value string) LoadTPUEmbeddingMomentumParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load Momentum embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the Momentum optimization algorithm.
+//	momenta: Value of momenta used in the Momentum optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingMomentumParameters(scope *Scope, parameters tf.Output, momenta tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMomentumParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingMomentumParameters",
+		Input: []tf.Input{
+			parameters, momenta,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// RFFTAttr is an optional argument to RFFT.
+type RFFTAttr func(optionalAttr)
+
+// RFFTTcomplex sets the optional Tcomplex attribute to value.
+// If not specified, defaults to DT_COMPLEX64
+func RFFTTcomplex(value tf.DataType) RFFTAttr {
+	return func(m optionalAttr) {
+		m["Tcomplex"] = value
+	}
+}
+
+// Real-valued fast Fourier transform.
+//
+// Computes the 1-dimensional discrete Fourier transform of a real-valued signal
+// over the inner-most dimension of `input`.
+//
+// Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the
+// `fft_length / 2 + 1` unique components of the FFT: the zero-frequency term,
+// followed by the `fft_length / 2` positive-frequency terms.
+//
+// Along the axis `RFFT` is computed on, if `fft_length` is smaller than the
+// corresponding dimension of `input`, the dimension is cropped. If it is larger,
+// the dimension is padded with zeros.
+//
+// Arguments:
+//	input: A float32 tensor.
+//	fft_length: An int32 tensor of shape [1]. The FFT length.
+//
+// Returns A complex64 tensor of the same rank as `input`. The inner-most
+//   dimension of `input` is replaced with the `fft_length / 2 + 1` unique
+//   frequency components of its 1D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.rfft
+// @end_compatibility
+func RFFT(scope *Scope, input tf.Output, fft_length tf.Output, optional ...RFFTAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -38335,33 +38885,145 @@ func CudnnRNNV2(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Out
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "CudnnRNNV2",
+		Type: "RFFT",
 		Input: []tf.Input{
-			input, input_h, input_c, params,
+			input, fft_length,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+	return op.Output(0)
 }
 
-// Returns element-wise integer closest to x.
+// SkipgramAttr is an optional argument to Skipgram.
+type SkipgramAttr func(optionalAttr)
+
+// SkipgramWindowSize sets the optional window_size attribute to value.
 //
-// If the result is midway between two representable values,
-// the even representable is chosen.
-// For example:
+// value: The number of words to predict to the left and right of the target.
+// If not specified, defaults to 5
+func SkipgramWindowSize(value int64) SkipgramAttr {
+	return func(m optionalAttr) {
+		m["window_size"] = value
+	}
+}
+
+// SkipgramMinCount sets the optional min_count attribute to value.
 //
+// value: The minimum number of word occurrences for it to be included in the
+// vocabulary.
+// If not specified, defaults to 5
+func SkipgramMinCount(value int64) SkipgramAttr {
+	return func(m optionalAttr) {
+		m["min_count"] = value
+	}
+}
+
+// SkipgramSubsample sets the optional subsample attribute to value.
+//
+// value: Threshold for word occurrence. Words that appear with higher
+// frequency will be randomly down-sampled. Set to 0 to disable.
+// If not specified, defaults to 0.001
+func SkipgramSubsample(value float32) SkipgramAttr {
+	return func(m optionalAttr) {
+		m["subsample"] = value
+	}
+}
+
+// Parses a text file and creates a batch of examples.
+//
+// DEPRECATED at GraphDef version 19: Moving word2vec into tensorflow_models/tutorials and deprecating its ops here as a result
+//
+// Arguments:
+//	filename: The corpus's text file name.
+//	batch_size: The size of produced batch.
+//
+// Returns:
+//	vocab_word: A vector of words in the corpus.
+//	vocab_freq: Frequencies of words. Sorted in the non-ascending order.
+//	words_per_epoch: Number of words per epoch in the data file.
+//	current_epoch: The current epoch number.
+//	total_words_processed: The total number of words processed so far.
+//	examples: A vector of word ids.
+//	labels: A vector of word ids.
+func Skipgram(scope *Scope, filename string, batch_size int64, optional ...SkipgramAttr) (vocab_word tf.Output, vocab_freq tf.Output, words_per_epoch tf.Output, current_epoch tf.Output, total_words_processed tf.Output, examples tf.Output, labels tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"filename": filename, "batch_size": batch_size}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Skipgram",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6)
+}
+
+// StaticRegexReplaceAttr is an optional argument to StaticRegexReplace.
+type StaticRegexReplaceAttr func(optionalAttr)
+
+// StaticRegexReplaceReplaceGlobal sets the optional replace_global attribute to value.
+//
+// value: If True, the replacement is global, otherwise the replacement
+// is done only on the first match.
+// If not specified, defaults to true
+func StaticRegexReplaceReplaceGlobal(value bool) StaticRegexReplaceAttr {
+	return func(m optionalAttr) {
+		m["replace_global"] = value
+	}
+}
+
+// Replaces the match of pattern in input with rewrite.
+//
+// It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
+//
+// Arguments:
+//	input: The text to be processed.
+//	pattern: The regular expression to match the input.
+//	rewrite: The rewrite to be applied to the matched expression.
+//
+// Returns The text after applying pattern and rewrite.
+func StaticRegexReplace(scope *Scope, input tf.Output, pattern string, rewrite string, optional ...StaticRegexReplaceAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"pattern": pattern, "rewrite": rewrite}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StaticRegexReplace",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns which elements of x are finite.
+//
+// @compatibility(numpy)
+// Equivalent to np.isfinite
+// @end_compatibility
+//
+// Example:
+//
+// ```python
+// x = tf.constant([5.0, 4.8, 6.8, np.inf, np.nan])
+// tf.math.is_finite(x) ==> [True, True, True, False, False]
 // ```
-// rint(-1.5) ==> -2.0
-// rint(0.5000001) ==> 1.0
-// rint([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]) ==> [-2., -2., -0., 0., 2., 2., 2.]
-// ```
-func Rint(scope *Scope, x tf.Output) (y tf.Output) {
+func IsFinite(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Rint",
+		Type: "IsFinite",
 		Input: []tf.Input{
 			x,
 		},
@@ -38370,42 +39032,111 @@ func Rint(scope *Scope, x tf.Output) (y tf.Output) {
 	return op.Output(0)
 }
 
-// QuantizedResizeBilinearAttr is an optional argument to QuantizedResizeBilinear.
-type QuantizedResizeBilinearAttr func(optionalAttr)
-
-// QuantizedResizeBilinearAlignCorners sets the optional align_corners attribute to value.
-//
-// value: If true, the centers of the 4 corner pixels of the input and output tensors are
-// aligned, preserving the values at the corner pixels. Defaults to false.
-// If not specified, defaults to false
-func QuantizedResizeBilinearAlignCorners(value bool) QuantizedResizeBilinearAttr {
-	return func(m optionalAttr) {
-		m["align_corners"] = value
-	}
-}
-
-// QuantizedResizeBilinearHalfPixelCenters sets the optional half_pixel_centers attribute to value.
-// If not specified, defaults to false
-func QuantizedResizeBilinearHalfPixelCenters(value bool) QuantizedResizeBilinearAttr {
-	return func(m optionalAttr) {
-		m["half_pixel_centers"] = value
-	}
-}
-
-// Resize quantized `images` to `size` using quantized bilinear interpolation.
-//
-// Input images and output images must be quantized types.
+// Returns a tensor of zeros with the same shape and type as x.
 //
 // Arguments:
-//	images: 4-D with shape `[batch, height, width, channels]`.
-//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-// new size for the images.
+//	x: a tensor of type T.
 //
+// Returns a tensor of the same shape and type as x but filled with zeros.
+func ZerosLike(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ZerosLike",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingAdadeltaParametersAttr is an optional argument to RetrieveTPUEmbeddingAdadeltaParameters.
+type RetrieveTPUEmbeddingAdadeltaParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingAdadeltaParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
 //
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingAdadeltaParametersTableId(value int64) RetrieveTPUEmbeddingAdadeltaParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingAdadeltaParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingAdadeltaParametersTableName(value string) RetrieveTPUEmbeddingAdadeltaParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingAdadeltaParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingAdadeltaParametersConfig(value string) RetrieveTPUEmbeddingAdadeltaParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve Adadelta embedding parameters.
 //
-// Returns 4-D with shape
-// `[batch, new_height, new_width, channels]`.
-func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min tf.Output, max tf.Output, optional ...QuantizedResizeBilinearAttr) (resized_images tf.Output, out_min tf.Output, out_max tf.Output) {
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the Adadelta optimization algorithm.
+//	accumulators: Parameter accumulators updated by the Adadelta optimization algorithm.
+//	updates: Parameter updates updated by the Adadelta optimization algorithm.
+func RetrieveTPUEmbeddingAdadeltaParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdadeltaParametersAttr) (parameters tf.Output, accumulators tf.Output, updates tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingAdadeltaParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// RegexReplaceAttr is an optional argument to RegexReplace.
+type RegexReplaceAttr func(optionalAttr)
+
+// RegexReplaceReplaceGlobal sets the optional replace_global attribute to value.
+//
+// value: If True, the replacement is global (that is, all matches of the `pattern` regular
+// expression in each input string are rewritten), otherwise the `rewrite`
+// substitution is only made for the first `pattern` match.
+// If not specified, defaults to true
+func RegexReplaceReplaceGlobal(value bool) RegexReplaceAttr {
+	return func(m optionalAttr) {
+		m["replace_global"] = value
+	}
+}
+
+// Replaces matches of the `pattern` regular expression in `input` with the
+// replacement string provided in `rewrite`.
+//
+// It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
+//
+// Arguments:
+//	input: The text to be processed.
+//	pattern: The regular expression to be matched in the `input` strings.
+//	rewrite: The rewrite string to be substituted for the `pattern` expression where it is
+// matched in the `input` strings.
+//
+// Returns The text after applying pattern match and rewrite substitution.
+func RegexReplace(scope *Scope, input tf.Output, pattern tf.Output, rewrite tf.Output, optional ...RegexReplaceAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -38414,9 +39145,159 @@ func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "QuantizedResizeBilinear",
+		Type: "RegexReplace",
 		Input: []tf.Input{
-			images, size, min, max,
+			input, pattern, rewrite,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// IRFFTAttr is an optional argument to IRFFT.
+type IRFFTAttr func(optionalAttr)
+
+// IRFFTTreal sets the optional Treal attribute to value.
+// If not specified, defaults to DT_FLOAT
+func IRFFTTreal(value tf.DataType) IRFFTAttr {
+	return func(m optionalAttr) {
+		m["Treal"] = value
+	}
+}
+
+// Inverse real-valued fast Fourier transform.
+//
+// Computes the inverse 1-dimensional discrete Fourier transform of a real-valued
+// signal over the inner-most dimension of `input`.
+//
+// The inner-most dimension of `input` is assumed to be the result of `RFFT`: the
+// `fft_length / 2 + 1` unique components of the DFT of a real-valued signal. If
+// `fft_length` is not provided, it is computed from the size of the inner-most
+// dimension of `input` (`fft_length = 2 * (inner - 1)`). If the FFT length used to
+// compute `input` is odd, it should be provided since it cannot be inferred
+// properly.
+//
+// Along the axis `IRFFT` is computed on, if `fft_length / 2 + 1` is smaller
+// than the corresponding dimension of `input`, the dimension is cropped. If it is
+// larger, the dimension is padded with zeros.
+//
+// Arguments:
+//	input: A complex tensor.
+//	fft_length: An int32 tensor of shape [1]. The FFT length.
+//
+// Returns A float32 tensor of the same rank as `input`. The inner-most
+//   dimension of `input` is replaced with the `fft_length` samples of its inverse
+//   1D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.irfft
+// @end_compatibility
+func IRFFT(scope *Scope, input tf.Output, fft_length tf.Output, optional ...IRFFTAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "IRFFT",
+		Input: []tf.Input{
+			input, fft_length,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes square root of x element-wise.
+//
+// I.e., \\(y = \sqrt{x} = x^{1/2}\\).
+func Sqrt(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Sqrt",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Gradient op for `MirrorPad` op. This op folds a mirror-padded tensor.
+//
+// This operation folds the padded areas of `input` by `MirrorPad` according to the
+// `paddings` you specify. `paddings` must be the same as `paddings` argument
+// given to the corresponding `MirrorPad` op.
+//
+// The folded size of each dimension D of the output is:
+//
+// `input.dim_size(D) - paddings(D, 0) - paddings(D, 1)`
+//
+// For example:
+//
+// ```
+// # 't' is [[1, 2, 3], [4, 5, 6], [7, 8, 9]].
+// # 'paddings' is [[0, 1]], [0, 1]].
+// # 'mode' is SYMMETRIC.
+// # rank of 't' is 2.
+// pad(t, paddings) ==> [[ 1,  5]
+//                       [11, 28]]
+// ```
+//
+// Arguments:
+//	input: The input tensor to be folded.
+//	paddings: A two-column matrix specifying the padding sizes. The number of
+// rows must be the same as the rank of `input`.
+//	mode: The mode used in the `MirrorPad` op.
+//
+// Returns The folded tensor.
+func MirrorPadGrad(scope *Scope, input tf.Output, paddings tf.Output, mode string) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"mode": mode}
+	opspec := tf.OpSpec{
+		Type: "MirrorPadGrad",
+		Input: []tf.Input{
+			input, paddings,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Produces the max pool of the input tensor for quantized types.
+//
+// Arguments:
+//	input: The 4D (batch x rows x cols x depth) Tensor to MaxReduce over.
+//	min_input: The float value that the lowest quantized input value represents.
+//	max_input: The float value that the highest quantized input value represents.
+//	ksize: The size of the window for each dimension of the input tensor.
+// The length must be 4 to match the number of dimensions of the input.
+//	strides: The stride of the sliding window for each dimension of the input
+// tensor. The length must be 4 to match the number of dimensions of the input.
+//	padding: The type of padding algorithm to use.
+//
+// Returns:
+//	output
+//	min_output: The float value that the lowest quantized output value represents.
+//	max_output: The float value that the highest quantized output value represents.
+func QuantizedMaxPool(scope *Scope, input tf.Output, min_input tf.Output, max_input tf.Output, ksize []int64, strides []int64, padding string) (output tf.Output, min_output tf.Output, max_output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "QuantizedMaxPool",
+		Input: []tf.Input{
+			input, min_input, max_input,
 		},
 		Attrs: attrs,
 	}
@@ -38424,23 +39305,1918 @@ func QuantizedResizeBilinear(scope *Scope, images tf.Output, size tf.Output, min
 	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// Provides the time since epoch in seconds.
+// ResourceApplyAdagradAttr is an optional argument to ResourceApplyAdagrad.
+type ResourceApplyAdagradAttr func(optionalAttr)
+
+// ResourceApplyAdagradUseLocking sets the optional use_locking attribute to value.
 //
-// Returns the timestamp as a `float64` for seconds since the Unix epoch.
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyAdagradUseLocking(value bool) ResourceApplyAdagradAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// ResourceApplyAdagradUpdateSlots sets the optional update_slots attribute to value.
+// If not specified, defaults to true
+func ResourceApplyAdagradUpdateSlots(value bool) ResourceApplyAdagradAttr {
+	return func(m optionalAttr) {
+		m["update_slots"] = value
+	}
+}
+
+// Update '*var' according to the adagrad scheme.
 //
-// Note: the timestamp is computed when the op is executed, not when it is added
-// to the graph.
-func Timestamp(scope *Scope) (ts tf.Output) {
+// accum += grad * grad
+// var -= lr * grad * (1 / sqrt(accum))
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, optional ...ResourceApplyAdagradAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyAdagrad",
+		Input: []tf.Input{
+			var_, accum, lr, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// EnqueueTPUEmbeddingSparseBatchAttr is an optional argument to EnqueueTPUEmbeddingSparseBatch.
+type EnqueueTPUEmbeddingSparseBatchAttr func(optionalAttr)
+
+// EnqueueTPUEmbeddingSparseBatchDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. Should be >= 0 and less than the number
+// of TPU cores in the task on which the node is placed.
+// If not specified, defaults to -1
+func EnqueueTPUEmbeddingSparseBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingSparseBatchAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// EnqueueTPUEmbeddingSparseBatchCombiners sets the optional combiners attribute to value.
+//
+// value: A list of string scalars, one for each embedding table that specify
+// how to normalize the embedding activations after weighted summation.
+// Supported combiners are 'mean', 'sum', or 'sqrtn'. It is invalid to have
+// the sum of the weights be 0 for 'mean' or the sum of the squared weights be
+// 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
+// all tables.
+// If not specified, defaults to {}
+func EnqueueTPUEmbeddingSparseBatchCombiners(value []string) EnqueueTPUEmbeddingSparseBatchAttr {
+	return func(m optionalAttr) {
+		m["combiners"] = value
+	}
+}
+
+// An op that enqueues TPUEmbedding input indices from a SparseTensor.
+//
+// This Op eases the porting of code that uses embedding_lookup_sparse(),
+// although some Python preprocessing of the SparseTensor arguments to
+// embedding_lookup_sparse() is required to produce the arguments to this Op,
+// since only a single EnqueueTPUEmbeddingSparseBatch Op is allowed per training
+// step.
+//
+// The tensors at corresponding positions in the three input lists
+// must have the same shape, i.e. rank 1 with dim_size() equal to the total
+// number of lookups into the table described by the corresponding table_id.
+//
+// Arguments:
+//	sample_indices: A list of rank 1 Tensors specifying the training example and
+// feature to which the corresponding embedding_indices and aggregation_weights
+// values belong. sample_indices[i] must equal b * nf + f, where nf is the
+// number of features from the corresponding table, f is in [0, nf), and
+// b is in [0, batch size).
+//	embedding_indices: A list of rank 1 Tensors, indices into the embedding tables.
+//	aggregation_weights: A list of rank 1 Tensors containing per sample -- i.e. per
+// (training example, feature) -- aggregation weights.
+//	mode_override: A string input that overrides the mode specified in the
+// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+// in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
+//
+// Returns the created operation.
+func EnqueueTPUEmbeddingSparseBatch(scope *Scope, sample_indices []tf.Output, embedding_indices []tf.Output, aggregation_weights []tf.Output, mode_override tf.Output, optional ...EnqueueTPUEmbeddingSparseBatchAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "EnqueueTPUEmbeddingSparseBatch",
+		Input: []tf.Input{
+			tf.OutputList(sample_indices), tf.OutputList(embedding_indices), tf.OutputList(aggregation_weights), mode_override,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// ResourceScatterNdUpdateAttr is an optional argument to ResourceScatterNdUpdate.
+type ResourceScatterNdUpdateAttr func(optionalAttr)
+
+// ResourceScatterNdUpdateUseLocking sets the optional use_locking attribute to value.
+//
+// value: An optional bool. Defaults to True. If True, the assignment will
+// be protected by a lock; otherwise the behavior is undefined,
+// but may exhibit less contention.
+// If not specified, defaults to true
+func ResourceScatterNdUpdateUseLocking(value bool) ResourceScatterNdUpdateAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Applies sparse `updates` to individual values or slices within a given
+//
+// variable according to `indices`.
+//
+// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+//
+// `indices` must be integer tensor, containing indices into `ref`.
+// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+//
+// The innermost dimension of `indices` (with length `K`) corresponds to
+// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
+// dimension of `ref`.
+//
+// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
+//
+// ```
+// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].
+// ```
+//
+// For example, say we want to update 4 scattered elements to a rank-1 tensor to
+// 8 elements. In Python, that update would look like this:
+//
+// ```python
+//     ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8])
+//     indices = tf.constant([[4], [3], [1] ,[7]])
+//     updates = tf.constant([9, 10, 11, 12])
+//     update = tf.scatter_nd_update(ref, indices, updates)
+//     with tf.Session() as sess:
+//       print sess.run(update)
+// ```
+//
+// The resulting update to ref would look like this:
+//
+//     [1, 11, 3, 10, 9, 6, 7, 12]
+//
+// See `tf.scatter_nd` for more details about how to make updates to
+// slices.
+//
+// Arguments:
+//	ref: A resource handle. Must be from a VarHandleOp.
+//	indices: A Tensor. Must be one of the following types: int32, int64.
+// A tensor of indices into ref.
+//	updates: A Tensor. Must have the same type as ref. A tensor of updated
+// values to add to ref.
+//
+// Returns the created operation.
+func ResourceScatterNdUpdate(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdUpdateAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterNdUpdate",
+		Input: []tf.Input{
+			ref, indices, updates,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Creates and returns an empty tensor list.
+//
+// All list elements must be tensors of dtype element_dtype and shape compatible
+// with element_shape.
+//
+// handle: an empty tensor list.
+// element_dtype: the type of elements in the list.
+// element_shape: a shape compatible with that of elements in the list.
+func EmptyTensorList(scope *Scope, element_shape tf.Output, max_num_elements tf.Output, element_dtype tf.DataType) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"element_dtype": element_dtype}
+	opspec := tf.OpSpec{
+		Type: "EmptyTensorList",
+		Input: []tf.Input{
+			element_shape, max_num_elements,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Sets up TPUEmbedding in a distributed TPU system.
+//
+// Arguments:
+//	config: Serialized tensorflow.tpu.TPUEmbeddingConfiguration that
+// describes the embedding lookups of the program.
+//
+// Returns the created operation.
+func ConfigureTPUEmbedding(scope *Scope, config string) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"config": config}
+	opspec := tf.OpSpec{
+		Type: "ConfigureTPUEmbedding",
+
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Shuts down a running distributed TPU system.
+//
+// The op returns an error if no system is running.
+//
+// Returns the created operation.
+func ShutdownDistributedTPU(scope *Scope) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Timestamp",
+		Type: "ShutdownDistributedTPU",
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Returns the value stored in an Optional variant or raises an error if none exists.
+func OptionalGetValue(scope *Scope, optional tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "OptionalGetValue",
+		Input: []tf.Input{
+			optional,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
+		scope.UpdateErr("OptionalGetValue", err)
+		return
+	}
+	return components
+}
+
+// Determine the script codes of a given tensor of Unicode integer code points.
+//
+// This operation converts Unicode code points to script codes corresponding to
+// each code point. Script codes correspond to International Components for
+// Unicode (ICU) UScriptCode values. See http://icu-project.org/apiref/icu4c/uscript_8h.html.
+// Returns -1 (USCRIPT_INVALID_CODE) for invalid codepoints. Output shape will
+// match input shape.
+//
+// Arguments:
+//	input: A Tensor of int32 Unicode code points.
+//
+// Returns A Tensor of int32 script codes corresponding to each input code point.
+func UnicodeScript(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "UnicodeScript",
+		Input: []tf.Input{
+			input,
+		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
+// Creates a dataset that zips together `input_datasets`.
+//
+// The elements of the resulting dataset are created by zipping corresponding
+// elements from each of the input datasets.
+//
+// The size of the resulting dataset will match the size of the smallest input
+// dataset, and no error will be raised if input datasets have different sizes.
+//
+// Arguments:
+//	input_datasets: List of `N` variant Tensors representing datasets to be zipped together.
+//
+//
+func ZipDataset(scope *Scope, input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "ZipDataset",
+		Input: []tf.Input{
+			tf.OutputList(input_datasets),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Rounds the values of a tensor to the nearest integer, element-wise.
+//
+// Rounds half to even.  Also known as bankers rounding. If you want to round
+// according to the current system rounding mode use std::cint.
+func Round(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Round",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a tree ensemble model and returns a handle to it.
+//
+// Arguments:
+//	tree_ensemble_handle: Handle to the tree ensemble resource to be created.
+//	stamp_token: Token to use as the initial value of the resource stamp.
+//	tree_ensemble_serialized: Serialized proto of the tree ensemble.
+//
+// Returns the created operation.
+func BoostedTreesCreateEnsemble(scope *Scope, tree_ensemble_handle tf.Output, stamp_token tf.Output, tree_ensemble_serialized tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesCreateEnsemble",
+		Input: []tf.Input{
+			tree_ensemble_handle, stamp_token, tree_ensemble_serialized,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Calculates the softmax of a CSRSparseMatrix.
+//
+// Calculate the softmax of the innermost dimensions of a SparseMatrix.
+//
+// Missing values are treated as `-inf` (i.e., logits of zero probability); and
+// the output has the same sparsity structure as the input (though missing values
+// in the output may now be treated as having probability zero).
+//
+// Arguments:
+//	logits: A CSRSparseMatrix.
+//
+//
+// Returns A CSRSparseMatrix.
+func SparseMatrixSoftmax(scope *Scope, logits tf.Output, type_ tf.DataType) (softmax tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"type": type_}
+	opspec := tf.OpSpec{
+		Type: "SparseMatrixSoftmax",
+		Input: []tf.Input{
+			logits,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RestoreAttr is an optional argument to Restore.
+type RestoreAttr func(optionalAttr)
+
+// RestorePreferredShard sets the optional preferred_shard attribute to value.
+//
+// value: Index of file to open first if multiple files match
+// `file_pattern`.
+// If not specified, defaults to -1
+func RestorePreferredShard(value int64) RestoreAttr {
+	return func(m optionalAttr) {
+		m["preferred_shard"] = value
+	}
+}
+
+// Restores a tensor from checkpoint files.
+//
+// Reads a tensor stored in one or several files. If there are several files (for
+// instance because a tensor was saved as slices), `file_pattern` may contain
+// wildcard symbols (`*` and `?`) in the filename portion only, not in the
+// directory portion.
+//
+// If a `file_pattern` matches several files, `preferred_shard` can be used to hint
+// in which file the requested tensor is likely to be found. This op will first
+// open the file at index `preferred_shard` in the list of matching files and try
+// to restore tensors from that file.  Only if some tensors or tensor slices are
+// not found in that first file, then the Op opens all the files. Setting
+// `preferred_shard` to match the value passed as the `shard` input
+// of a matching `Save` Op may speed up Restore.  This attribute only affects
+// performance, not correctness.  The default value -1 means files are processed in
+// order.
+//
+// See also `RestoreSlice`.
+//
+// Arguments:
+//	file_pattern: Must have a single element. The pattern of the files from
+// which we read the tensor.
+//	tensor_name: Must have a single element. The name of the tensor to be
+// restored.
+//	dt: The type of the tensor to be restored.
+//
+// Returns The restored tensor.
+func Restore(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, dt tf.DataType, optional ...RestoreAttr) (tensor tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dt": dt}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Restore",
+		Input: []tf.Input{
+			file_pattern, tensor_name,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyKerasMomentumAttr is an optional argument to ResourceApplyKerasMomentum.
+type ResourceApplyKerasMomentumAttr func(optionalAttr)
+
+// ResourceApplyKerasMomentumUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyKerasMomentumUseLocking(value bool) ResourceApplyKerasMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// ResourceApplyKerasMomentumUseNesterov sets the optional use_nesterov attribute to value.
+//
+// value: If `True`, the tensor passed to compute grad will be
+// var + momentum * accum, so in the end, the var you get is actually
+// var + momentum * accum.
+// If not specified, defaults to false
+func ResourceApplyKerasMomentumUseNesterov(value bool) ResourceApplyKerasMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_nesterov"] = value
+	}
+}
+
+// Update '*var' according to the momentum scheme. Set use_nesterov = True if you
+//
+// want to use Nesterov momentum.
+//
+// accum = accum * momentum - lr * grad
+// var += accum
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	grad: The gradient.
+//	momentum: Momentum. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceApplyKerasMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyKerasMomentumAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyKerasMomentum",
+		Input: []tf.Input{
+			var_, accum, lr, grad, momentum,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// CumsumAttr is an optional argument to Cumsum.
+type CumsumAttr func(optionalAttr)
+
+// CumsumExclusive sets the optional exclusive attribute to value.
+//
+// value: If `True`, perform exclusive cumsum.
+// If not specified, defaults to false
+func CumsumExclusive(value bool) CumsumAttr {
+	return func(m optionalAttr) {
+		m["exclusive"] = value
+	}
+}
+
+// CumsumReverse sets the optional reverse attribute to value.
+//
+// value: A `bool` (default: False).
+// If not specified, defaults to false
+func CumsumReverse(value bool) CumsumAttr {
+	return func(m optionalAttr) {
+		m["reverse"] = value
+	}
+}
+
+// Compute the cumulative sum of the tensor `x` along `axis`.
+//
+// By default, this op performs an inclusive cumsum, which means that the first
+// element of the input is identical to the first element of the output:
+//
+// ```python
+// tf.cumsum([a, b, c])  # => [a, a + b, a + b + c]
+// ```
+//
+// By setting the `exclusive` kwarg to `True`, an exclusive cumsum is
+// performed instead:
+//
+// ```python
+// tf.cumsum([a, b, c], exclusive=True)  # => [0, a, a + b]
+// ```
+//
+// By setting the `reverse` kwarg to `True`, the cumsum is performed in the
+// opposite direction:
+//
+// ```python
+// tf.cumsum([a, b, c], reverse=True)  # => [a + b + c, b + c, c]
+// ```
+//
+// This is more efficient than using separate `tf.reverse` ops.
+//
+// The `reverse` and `exclusive` kwargs can also be combined:
+//
+// ```python
+// tf.cumsum([a, b, c], exclusive=True, reverse=True)  # => [b + c, c, 0]
+// ```
+//
+// Arguments:
+//	x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
+// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
+// `complex128`, `qint8`, `quint8`, `qint32`, `half`.
+//	axis: A `Tensor` of type `int32` (default: 0). Must be in the range
+// `[-rank(x), rank(x))`.
+func Cumsum(scope *Scope, x tf.Output, axis tf.Output, optional ...CumsumAttr) (out tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Cumsum",
+		Input: []tf.Input{
+			x, axis,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Performs gradient updates of embedding tables.
+//
+// Arguments:
+//	inputs: A TensorList of gradients with which to update embedding tables.
+// This argument has the same length and shapes as the return value of
+// RecvTPUEmbeddingActivations, but contains gradients of the model's loss
+// with respect to the embedding activations. The embedding tables are updated
+// from these gradients via the optimizer specified in the TPU embedding
+// configuration given to tpu.initialize_system.
+//	learning_rates: A TensorList of float32 scalars, one for each dynamic learning
+// rate tag: see the comments in
+// //third_party/tensorflow/core/protobuf/tpu/optimization_parameters.proto.
+// Multiple tables can share the same dynamic learning rate tag as specified
+// in the configuration. If the learning rates for all tables are constant,
+// this list should be empty.
+//	config: Serialized TPUEmbeddingConfiguration proto.
+//
+// Returns the created operation.
+func SendTPUEmbeddingGradients(scope *Scope, inputs []tf.Output, learning_rates []tf.Output, config string) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"config": config}
+	opspec := tf.OpSpec{
+		Type: "SendTPUEmbeddingGradients",
+		Input: []tf.Input{
+			tf.OutputList(inputs), tf.OutputList(learning_rates),
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes softmax cross entropy cost and gradients to backpropagate.
+//
+// Unlike `SoftmaxCrossEntropyWithLogits`, this operation does not accept
+// a matrix of label probabilities, but rather a single label per row
+// of features.  This label is considered to have probability 1.0 for the
+// given row.
+//
+// Inputs are the logits, not probabilities.
+//
+// Arguments:
+//	features: batch_size x num_classes matrix
+//	labels: batch_size vector with values in [0, num_classes).
+// This is the label for the given minibatch entry.
+//
+// Returns:
+//	loss: Per example loss (batch_size vector).
+//	backprop: backpropagated gradients (batch_size x num_classes matrix).
+func SparseSoftmaxCrossEntropyWithLogits(scope *Scope, features tf.Output, labels tf.Output) (loss tf.Output, backprop tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSoftmaxCrossEntropyWithLogits",
+		Input: []tf.Input{
+			features, labels,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// ResourceApplyProximalGradientDescentAttr is an optional argument to ResourceApplyProximalGradientDescent.
+type ResourceApplyProximalGradientDescentAttr func(optionalAttr)
+
+// ResourceApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value.
+//
+// value: If True, the subtraction will be protected by a lock;
+// otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceApplyProximalGradientDescentUseLocking(value bool) ResourceApplyProximalGradientDescentAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' as FOBOS algorithm with fixed learning rate.
+//
+// prox_v = var - alpha * delta
+// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	alpha: Scaling factor. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	delta: The change.
+//
+// Returns the created operation.
+func ResourceApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, delta tf.Output, optional ...ResourceApplyProximalGradientDescentAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyProximalGradientDescent",
+		Input: []tf.Input{
+			var_, alpha, l1, l2, delta,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Worker heartbeat op.
+//
+// Heartbeats may be sent periodically to indicate the coordinator is still active,
+// to retrieve the current worker status and to expedite shutdown when necessary.
+//
+// Arguments:
+//	request: A string tensor containing a serialized WorkerHeartbeatRequest
+//
+// Returns A string tensor containing a serialized WorkerHeartbeatResponse
+func WorkerHeartbeat(scope *Scope, request tf.Output) (response tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "WorkerHeartbeat",
+		Input: []tf.Input{
+			request,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the item in the list with the given index.
+//
+// input_handle: the list
+// index: the position in the list from which an element will be retrieved
+// item: the element at that position
+//
+//
+func TensorListGetItem(scope *Scope, input_handle tf.Output, index tf.Output, element_shape tf.Output, element_dtype tf.DataType) (item tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"element_dtype": element_dtype}
+	opspec := tf.OpSpec{
+		Type: "TensorListGetItem",
+		Input: []tf.Input{
+			input_handle, index, element_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug.
+type RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve proximal Adagrad embedding parameters with debug support.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the proximal Adagrad optimization algorithm.
+//	accumulators: Parameter accumulators updated by the proximal Adagrad optimization algorithm.
+//	gradient_accumulators: Parameter gradient_accumulators updated by the proximal Adagrad optimization algorithm.
+func RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingProximalAdagradParametersGradAccumDebug",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Records the latency of producing `input_dataset` elements in a StatsAggregator.
+func LatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "LatencyStatsDataset",
+		Input: []tf.Input{
+			input_dataset, tag,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes the power of one value to another.
+//
+// Given a tensor `x` and a tensor `y`, this operation computes \\(x^y\\) for
+// corresponding elements in `x` and `y`. For example:
+//
+// ```
+// # tensor 'x' is [[2, 2]], [3, 3]]
+// # tensor 'y' is [[8, 16], [2, 3]]
+// tf.pow(x, y) ==> [[256, 65536], [9, 27]]
+// ```
+func Pow(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Pow",
+		Input: []tf.Input{
+			x, y,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Element-wise multiplication of a sparse matrix with a dense tensor.
+//
+// Returns a sparse matrix.
+//
+// The dense tensor `b` may be either a scalar; otherwise `a` must be a rank-3
+// `SparseMatrix`; in this case `b` must be shaped `[batch_size, 1, 1]` and the
+// multiply operation broadcasts.
+//
+// **NOTE** even if `b` is zero, the sparsity structure of the output does not
+// change.
+//
+// Arguments:
+//	a: A CSRSparseMatrix.
+//	b: A dense tensor.
+//
+// Returns A dense output tensor.
+func SparseMatrixMul(scope *Scope, a tf.Output, b tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseMatrixMul",
+		Input: []tf.Input{
+			a, b,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the element-wise sum of a list of tensors.
+//
+// `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not
+// wait for all of its inputs to be ready before beginning to sum. This can
+// save memory if inputs are ready at different times, since minimum temporary
+// storage is proportional to the output size rather than the inputs size.
+//
+// Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable.
+//
+// Returns a `Tensor` of same shape and type as the elements of `inputs`.
+//
+// Arguments:
+//	inputs: A list of `Tensor` objects, each with same shape and type.
+//	shape: Shape of elements of `inputs`.
+func AccumulateNV2(scope *Scope, inputs []tf.Output, shape tf.Shape) (sum tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"shape": shape}
+	opspec := tf.OpSpec{
+		Type: "AccumulateNV2",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// An op enabling differentiation of TPU Embeddings.
+//
+// This op simply returns its first input, which is assumed to have been sliced
+// from the Tensors returned by TPUEmbeddingDequeueActivations. The presence of
+// this op, and its first argument being a trainable Variable, enables automatic
+// differentiation of graphs containing embeddings via the TPU Embedding Python
+// libraries.
+//
+// Arguments:
+//	embedding_variable: A trainable variable, enabling optimizers to find this op.
+//	sliced_activations: The embedding activations Tensor to return.
+//	table_id: The id of the table in the embedding layer configuration from which
+// these activations were computed.
+//	lookup_id: Identifier of the set of embedding indices which produced these
+// activations.
+func TPUEmbeddingActivations(scope *Scope, embedding_variable tf.Output, sliced_activations tf.Output, table_id int64, lookup_id int64) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"table_id": table_id, "lookup_id": lookup_id}
+	opspec := tf.OpSpec{
+		Type: "TPUEmbeddingActivations",
+		Input: []tf.Input{
+			embedding_variable, sliced_activations,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Transforms a vector of brain.Example protos (as strings) into typed tensors.
+//
+// Arguments:
+//	serialized: A vector containing a batch of binary serialized Example protos.
+//	names: A vector containing the names of the serialized protos.
+// May contain, for example, table key (descriptive) names for the
+// corresponding serialized protos.  These are purely useful for debugging
+// purposes, and the presence of values here has no effect on the output.
+// May also be an empty vector if no names are available.
+// If non-empty, this vector must be the same length as "serialized".
+//	sparse_keys: A list of Nsparse string Tensors (scalars).
+// The keys expected in the Examples' features associated with sparse values.
+//	dense_keys: A list of Ndense string Tensors (scalars).
+// The keys expected in the Examples' features associated with dense values.
+//	dense_defaults: A list of Ndense Tensors (some may be empty).
+// dense_defaults[j] provides default values
+// when the example's feature_map lacks dense_key[j].  If an empty Tensor is
+// provided for dense_defaults[j], then the Feature dense_keys[j] is required.
+// The input type is inferred from dense_defaults[j], even when it's empty.
+// If dense_defaults[j] is not empty, and dense_shapes[j] is fully defined,
+// then the shape of dense_defaults[j] must match that of dense_shapes[j].
+// If dense_shapes[j] has an undefined major dimension (variable strides dense
+// feature), dense_defaults[j] must contain a single element:
+// the padding element.
+//	sparse_types: A list of Nsparse types; the data types of data in each Feature
+// given in sparse_keys.
+// Currently the ParseExample supports DT_FLOAT (FloatList),
+// DT_INT64 (Int64List), and DT_STRING (BytesList).
+//	dense_shapes: A list of Ndense shapes; the shapes of data in each Feature
+// given in dense_keys.
+// The number of elements in the Feature corresponding to dense_key[j]
+// must always equal dense_shapes[j].NumEntries().
+// If dense_shapes[j] == (D0, D1, ..., DN) then the shape of output
+// Tensor dense_values[j] will be (|serialized|, D0, D1, ..., DN):
+// The dense outputs are just the inputs row-stacked by batch.
+// This works for dense_shapes[j] = (-1, D1, ..., DN).  In this case
+// the shape of the output Tensor dense_values[j] will be
+// (|serialized|, M, D1, .., DN), where M is the maximum number of blocks
+// of elements of length D1 * .... * DN, across all minibatch entries
+// in the input.  Any minibatch entry with less than M blocks of elements of
+// length D1 * ... * DN will be padded with the corresponding default_value
+// scalar element along the second dimension.
+func ParseExample(scope *Scope, serialized tf.Output, names tf.Output, sparse_keys []tf.Output, dense_keys []tf.Output, dense_defaults []tf.Output, sparse_types []tf.DataType, dense_shapes []tf.Shape) (sparse_indices []tf.Output, sparse_values []tf.Output, sparse_shapes []tf.Output, dense_values []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"sparse_types": sparse_types, "dense_shapes": dense_shapes}
+	opspec := tf.OpSpec{
+		Type: "ParseExample",
+		Input: []tf.Input{
+			serialized, names, tf.OutputList(sparse_keys), tf.OutputList(dense_keys), tf.OutputList(dense_defaults),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if sparse_indices, idx, err = makeOutputList(op, idx, "sparse_indices"); err != nil {
+		scope.UpdateErr("ParseExample", err)
+		return
+	}
+	if sparse_values, idx, err = makeOutputList(op, idx, "sparse_values"); err != nil {
+		scope.UpdateErr("ParseExample", err)
+		return
+	}
+	if sparse_shapes, idx, err = makeOutputList(op, idx, "sparse_shapes"); err != nil {
+		scope.UpdateErr("ParseExample", err)
+		return
+	}
+	if dense_values, idx, err = makeOutputList(op, idx, "dense_values"); err != nil {
+		scope.UpdateErr("ParseExample", err)
+		return
+	}
+	return sparse_indices, sparse_values, sparse_shapes, dense_values
+}
+
+// MapPeekAttr is an optional argument to MapPeek.
+type MapPeekAttr func(optionalAttr)
+
+// MapPeekCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapPeekCapacity(value int64) MapPeekAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// MapPeekMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapPeekMemoryLimit(value int64) MapPeekAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// MapPeekContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func MapPeekContainer(value string) MapPeekAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// MapPeekSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func MapPeekSharedName(value string) MapPeekAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op peeks at the values at the specified key.  If the
+//
+// underlying container does not contain this key
+// this op will block until it does.
+func MapPeek(scope *Scope, key tf.Output, indices tf.Output, dtypes []tf.DataType, optional ...MapPeekAttr) (values []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MapPeek",
+		Input: []tf.Input{
+			key, indices,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if values, idx, err = makeOutputList(op, idx, "values"); err != nil {
+		scope.UpdateErr("MapPeek", err)
+		return
+	}
+	return values
+}
+
+// RetrieveTPUEmbeddingCenteredRMSPropParametersAttr is an optional argument to RetrieveTPUEmbeddingCenteredRMSPropParameters.
+type RetrieveTPUEmbeddingCenteredRMSPropParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingCenteredRMSPropParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingCenteredRMSPropParametersTableId(value int64) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingCenteredRMSPropParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingCenteredRMSPropParametersTableName(value string) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingCenteredRMSPropParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingCenteredRMSPropParametersConfig(value string) RetrieveTPUEmbeddingCenteredRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve centered RMSProp embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the centered RMSProp optimization algorithm.
+//	ms: Parameter ms updated by the centered RMSProp optimization algorithm.
+//	mom: Parameter mom updated by the centered RMSProp optimization algorithm.
+//	mg: Parameter mg updated by the centered RMSProp optimization algorithm.
+func RetrieveTPUEmbeddingCenteredRMSPropParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingCenteredRMSPropParametersAttr) (parameters tf.Output, ms tf.Output, mom tf.Output, mg tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingCenteredRMSPropParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// Computes the LSTM cell backward propagation for the entire time sequence.
+//
+// This implementation is to be used in conjunction of BlockLSTMV2.
+//
+// Arguments:
+//	seq_len_max: Maximum time length actually used by this input. Outputs are padded
+// with zeros beyond this length.
+//	x: The sequence input to the LSTM, shape (timelen, batch_size, num_inputs).
+//	cs_prev: Value of the initial cell state.
+//	h_prev: Initial output of cell (to be used for peephole).
+//	w: The weight matrix.
+//	wci: The weight matrix for input gate peephole connection.
+//	wcf: The weight matrix for forget gate peephole connection.
+//	wco: The weight matrix for output gate peephole connection.
+//	b: The bias vector.
+//	i: The input gate over the whole time sequence.
+//	cs: The cell state before the tanh over the whole time sequence.
+//	f: The forget gate over the whole time sequence.
+//	o: The output gate over the whole time sequence.
+//	ci: The cell input over the whole time sequence.
+//	co: The cell after the tanh over the whole time sequence.
+//	h: The output h vector over the whole time sequence.
+//	cs_grad: The current gradient of cs.
+//	h_grad: The gradient of h vector.
+//	use_peephole: Whether to use peephole weights.
+//
+// Returns:
+//	x_grad: The gradient of x to be back-propped.
+//	cs_prev_grad: The gradient of cs_prev to be back-propped.
+//	h_prev_grad: The gradient of h_prev to be back-propped.
+//	w_grad: The gradient for w to be back-propped.
+//	wci_grad: The gradient for wci to be back-propped.
+//	wcf_grad: The gradient for wcf to be back-propped.
+//	wco_grad: The gradient for wco to be back-propped.
+//	b_grad: The gradient for w to be back-propped.
+func BlockLSTMGradV2(scope *Scope, seq_len_max tf.Output, x tf.Output, cs_prev tf.Output, h_prev tf.Output, w tf.Output, wci tf.Output, wcf tf.Output, wco tf.Output, b tf.Output, i tf.Output, cs tf.Output, f tf.Output, o tf.Output, ci tf.Output, co tf.Output, h tf.Output, cs_grad tf.Output, h_grad tf.Output, use_peephole bool) (x_grad tf.Output, cs_prev_grad tf.Output, h_prev_grad tf.Output, w_grad tf.Output, wci_grad tf.Output, wcf_grad tf.Output, wco_grad tf.Output, b_grad tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"use_peephole": use_peephole}
+	opspec := tf.OpSpec{
+		Type: "BlockLSTMGradV2",
+		Input: []tf.Input{
+			seq_len_max, x, cs_prev, h_prev, w, wci, wcf, wco, b, i, cs, f, o, ci, co, h, cs_grad, h_grad,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6), op.Output(7)
+}
+
+// Returns the element-wise max of two SparseTensors.
+//
+// Assumes the two SparseTensors have the same shape, i.e., no broadcasting.
+//
+// Arguments:
+//	a_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, in the canonical lexicographic ordering.
+//	a_values: 1-D.  `N` non-empty values corresponding to `a_indices`.
+//	a_shape: 1-D.  Shape of the input SparseTensor.
+//	b_indices: counterpart to `a_indices` for the other operand.
+//	b_values: counterpart to `a_values` for the other operand; must be of the same dtype.
+//	b_shape: counterpart to `a_shape` for the other operand; the two shapes must be equal.
+//
+// Returns:
+//	output_indices: 2-D.  The indices of the output SparseTensor.
+//	output_values: 1-D.  The values of the output SparseTensor.
+func SparseSparseMaximum(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output) (output_indices tf.Output, output_values tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSparseMaximum",
+		Input: []tf.Input{
+			a_indices, a_values, a_shape, b_indices, b_values, b_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Computes the Bessel i1e function of `x` element-wise.
+//
+// Exponentially scaled modified Bessel function of order 0 defined as
+// `bessel_i1e(x) = exp(-abs(x)) bessel_i1(x)`.
+//
+// This function is faster and numerically stabler than `bessel_i1(x)`.
+func BesselI1e(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BesselI1e",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingADAMParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingADAMParametersGradAccumDebug.
+type LoadTPUEmbeddingADAMParametersGradAccumDebugAttr func(optionalAttr)
+
+// LoadTPUEmbeddingADAMParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingADAMParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingADAMParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingADAMParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingADAMParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingADAMParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingADAMParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingADAMParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingADAMParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load ADAM embedding parameters with debug support.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the ADAM optimization algorithm.
+//	momenta: Value of momenta used in the ADAM optimization algorithm.
+//	velocities: Value of velocities used in the ADAM optimization algorithm.
+//	gradient_accumulators: Value of gradient_accumulators used in the ADAM optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingADAMParametersGradAccumDebug(scope *Scope, parameters tf.Output, momenta tf.Output, velocities tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingADAMParametersGradAccumDebugAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingADAMParametersGradAccumDebug",
+		Input: []tf.Input{
+			parameters, momenta, velocities, gradient_accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// RetrieveTPUEmbeddingRMSPropParametersAttr is an optional argument to RetrieveTPUEmbeddingRMSPropParameters.
+type RetrieveTPUEmbeddingRMSPropParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingRMSPropParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingRMSPropParametersTableId(value int64) RetrieveTPUEmbeddingRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingRMSPropParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingRMSPropParametersTableName(value string) RetrieveTPUEmbeddingRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingRMSPropParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingRMSPropParametersConfig(value string) RetrieveTPUEmbeddingRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve RMSProp embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the RMSProp optimization algorithm.
+//	ms: Parameter ms updated by the RMSProp optimization algorithm.
+//	mom: Parameter mom updated by the RMSProp optimization algorithm.
+func RetrieveTPUEmbeddingRMSPropParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingRMSPropParametersAttr) (parameters tf.Output, ms tf.Output, mom tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingRMSPropParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Returns a batched diagonal tensor with a given batched diagonal values.
+//
+// Given a `diagonal`, this operation returns a tensor with the `diagonal` and
+// everything else padded with zeros. The diagonal is computed as follows:
+//
+// Assume `diagonal` has `k` dimensions `[I, J, K, ..., N]`, then the output is a
+// tensor of rank `k+1` with dimensions [I, J, K, ..., N, N]` where:
+//
+// `output[i, j, k, ..., m, n] = 1{m=n} * diagonal[i, j, k, ..., n]`.
+//
+// For example:
+//
+// ```
+// # 'diagonal' is [[1, 2, 3, 4], [5, 6, 7, 8]]
+//
+// and diagonal.shape = (2, 4)
+//
+// tf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0]
+//                                      [0, 2, 0, 0]
+//                                      [0, 0, 3, 0]
+//                                      [0, 0, 0, 4]],
+//                                     [[5, 0, 0, 0]
+//                                      [0, 6, 0, 0]
+//                                      [0, 0, 7, 0]
+//                                      [0, 0, 0, 8]]]
+//
+// which has shape (2, 4, 4)
+// ```
+//
+// Arguments:
+//	diagonal: Rank `k`, where `k >= 1`.
+//
+// Returns Rank `k+1`, with `output.shape = diagonal.shape + [diagonal.shape[-1]]`.
+func MatrixDiag(scope *Scope, diagonal tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixDiag",
+		Input: []tf.Input{
+			diagonal,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// StatelessTruncatedNormalAttr is an optional argument to StatelessTruncatedNormal.
+type StatelessTruncatedNormalAttr func(optionalAttr)
+
+// StatelessTruncatedNormalDtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_FLOAT
+func StatelessTruncatedNormalDtype(value tf.DataType) StatelessTruncatedNormalAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs deterministic pseudorandom values from a truncated normal distribution.
+//
+// The generated values follow a normal distribution with mean 0 and standard
+// deviation 1, except that values whose magnitude is more than 2 standard
+// deviations from the mean are dropped and re-picked.
+//
+// The outputs are a deterministic function of `shape` and `seed`.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
+//
+// Returns Random values with specified shape.
+func StatelessTruncatedNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessTruncatedNormalAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StatelessTruncatedNormal",
+		Input: []tf.Input{
+			shape, seed,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// StatelessRandomUniformAttr is an optional argument to StatelessRandomUniform.
+type StatelessRandomUniformAttr func(optionalAttr)
+
+// StatelessRandomUniformDtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_FLOAT
+func StatelessRandomUniformDtype(value tf.DataType) StatelessRandomUniformAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs deterministic pseudorandom random values from a uniform distribution.
+//
+// The generated values follow a uniform distribution in the range `[0, 1)`. The
+// lower bound 0 is included in the range, while the upper bound 1 is excluded.
+//
+// The outputs are a deterministic function of `shape` and `seed`.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
+//
+// Returns Random values with specified shape.
+func StatelessRandomUniform(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomUniformAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StatelessRandomUniform",
+		Input: []tf.Input{
+			shape, seed,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingMomentumParametersGradAccumDebug.
+type RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingMomentumParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingMomentumParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingMomentumParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve Momentum embedding parameters with debug support.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the Momentum optimization algorithm.
+//	momenta: Parameter momenta updated by the Momentum optimization algorithm.
+//	gradient_accumulators: Parameter gradient_accumulators updated by the Momentum optimization algorithm.
+func RetrieveTPUEmbeddingMomentumParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMomentumParametersGradAccumDebugAttr) (parameters tf.Output, momenta tf.Output, gradient_accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingMomentumParametersGradAccumDebug",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// MaxPoolGradGradV2Attr is an optional argument to MaxPoolGradGradV2.
+type MaxPoolGradGradV2Attr func(optionalAttr)
+
+// MaxPoolGradGradV2DataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func MaxPoolGradGradV2DataFormat(value string) MaxPoolGradGradV2Attr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Computes second-order gradients of the maxpooling function.
+//
+// Arguments:
+//	orig_input: The original input tensor.
+//	orig_output: The original output tensor.
+//	grad: 4-D.  Gradients of gradients w.r.t. the input of `max_pool`.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns Gradients of gradients w.r.t. the input to `max_pool`.
+func MaxPoolGradGradV2(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize tf.Output, strides tf.Output, padding string, optional ...MaxPoolGradGradV2Attr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MaxPoolGradGradV2",
+		Input: []tf.Input{
+			orig_input, orig_output, grad, ksize, strides,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingMomentumParametersAttr is an optional argument to RetrieveTPUEmbeddingMomentumParameters.
+type RetrieveTPUEmbeddingMomentumParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingMomentumParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingMomentumParametersTableId(value int64) RetrieveTPUEmbeddingMomentumParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingMomentumParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingMomentumParametersTableName(value string) RetrieveTPUEmbeddingMomentumParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingMomentumParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingMomentumParametersConfig(value string) RetrieveTPUEmbeddingMomentumParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve Momentum embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the Momentum optimization algorithm.
+//	momenta: Parameter momenta updated by the Momentum optimization algorithm.
+func RetrieveTPUEmbeddingMomentumParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingMomentumParametersAttr) (parameters tf.Output, momenta tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingMomentumParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// RecvAttr is an optional argument to Recv.
+type RecvAttr func(optionalAttr)
+
+// RecvClientTerminated sets the optional client_terminated attribute to value.
+//
+// value: If set to true, this indicates that the node was added
+// to the graph as a result of a client-side feed or fetch of Tensor data,
+// in which case the corresponding send or recv is expected to be managed
+// locally by the caller.
+// If not specified, defaults to false
+func RecvClientTerminated(value bool) RecvAttr {
+	return func(m optionalAttr) {
+		m["client_terminated"] = value
+	}
+}
+
+// Receives the named tensor from send_device on recv_device.
+//
+// Arguments:
+//
+//	tensor_name: The name of the tensor to receive.
+//	send_device: The name of the device sending the tensor.
+//	send_device_incarnation: The current incarnation of send_device.
+//	recv_device: The name of the device receiving the tensor.
+//
+// Returns The tensor to receive.
+func Recv(scope *Scope, tensor_type tf.DataType, tensor_name string, send_device string, send_device_incarnation int64, recv_device string, optional ...RecvAttr) (tensor tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"tensor_type": tensor_type, "tensor_name": tensor_name, "send_device": send_device, "send_device_incarnation": send_device_incarnation, "recv_device": recv_device}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Recv",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// OrderedMapStageAttr is an optional argument to OrderedMapStage.
+type OrderedMapStageAttr func(optionalAttr)
+
+// OrderedMapStageCapacity sets the optional capacity attribute to value.
+//
+// value: Maximum number of elements in the Staging Area. If > 0, inserts
+// on the container will block when the capacity is reached.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func OrderedMapStageCapacity(value int64) OrderedMapStageAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// OrderedMapStageMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func OrderedMapStageMemoryLimit(value int64) OrderedMapStageAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// OrderedMapStageContainer sets the optional container attribute to value.
+//
+// value: If non-empty, this queue is placed in the given container. Otherwise,
+// a default container is used.
+// If not specified, defaults to ""
+func OrderedMapStageContainer(value string) OrderedMapStageAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// OrderedMapStageSharedName sets the optional shared_name attribute to value.
+//
+// value: It is necessary to match this name to the matching Unstage Op.
+// If not specified, defaults to ""
+func OrderedMapStageSharedName(value string) OrderedMapStageAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Stage (key, values) in the underlying container which behaves like a ordered
+//
+// associative container.   Elements are ordered by key.
+//
+// Arguments:
+//	key: int64
+//
+//	values: a list of tensors
+// dtypes A list of data types that inserted values should adhere to.
+//
+//
+// Returns the created operation.
+func OrderedMapStage(scope *Scope, key tf.Output, indices tf.Output, values []tf.Output, dtypes []tf.DataType, optional ...OrderedMapStageAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "OrderedMapStage",
+		Input: []tf.Input{
+			key, indices, tf.OutputList(values),
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// TPUReplicateMetadataAttr is an optional argument to TPUReplicateMetadata.
+type TPUReplicateMetadataAttr func(optionalAttr)
+
+// TPUReplicateMetadataNumCoresPerReplica sets the optional num_cores_per_replica attribute to value.
+//
+// value: Number of cores per replica. Used for model parallelism.
+// If not specified, defaults to 1
+func TPUReplicateMetadataNumCoresPerReplica(value int64) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["num_cores_per_replica"] = value
+	}
+}
+
+// TPUReplicateMetadataTopology sets the optional topology attribute to value.
+//
+// value: TopologyProto indicating the topology of the TPU pod slice.
+// If not specified, defaults to ""
+func TPUReplicateMetadataTopology(value string) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["topology"] = value
+	}
+}
+
+// TPUReplicateMetadataUseTpu sets the optional use_tpu attribute to value.
+//
+// value: Whether to place the computation on the TPU.
+// If not specified, defaults to true
+func TPUReplicateMetadataUseTpu(value bool) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["use_tpu"] = value
+	}
+}
+
+// TPUReplicateMetadataDeviceAssignment sets the optional device_assignment attribute to value.
+//
+// value: The assignment of devices for the computation.
+// If not specified, defaults to {}
+func TPUReplicateMetadataDeviceAssignment(value []int64) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["device_assignment"] = value
+	}
+}
+
+// TPUReplicateMetadataComputationShape sets the optional computation_shape attribute to value.
+//
+// value: DEPRECATED. Use num_cores_per_replica instead.
+// If not specified, defaults to {}
+func TPUReplicateMetadataComputationShape(value []int64) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["computation_shape"] = value
+	}
+}
+
+// TPUReplicateMetadataHostComputeCore sets the optional host_compute_core attribute to value.
+// If not specified, defaults to {}
+func TPUReplicateMetadataHostComputeCore(value []string) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["host_compute_core"] = value
+	}
+}
+
+// TPUReplicateMetadataPaddingMap sets the optional padding_map attribute to value.
+// If not specified, defaults to {}
+func TPUReplicateMetadataPaddingMap(value []string) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["padding_map"] = value
+	}
+}
+
+// TPUReplicateMetadataStepMarkerLocation sets the optional step_marker_location attribute to value.
+// If not specified, defaults to "STEP_MARK_AT_ENTRY"
+func TPUReplicateMetadataStepMarkerLocation(value string) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["step_marker_location"] = value
+	}
+}
+
+// TPUReplicateMetadataAllowSoftPlacement sets the optional allow_soft_placement attribute to value.
+// If not specified, defaults to false
+func TPUReplicateMetadataAllowSoftPlacement(value bool) TPUReplicateMetadataAttr {
+	return func(m optionalAttr) {
+		m["allow_soft_placement"] = value
+	}
+}
+
+// Metadata indicating how the TPU computation should be replicated.
+//
+// This operation holds the metadata common to operations of a `tpu.replicate()` computation subgraph.
+//
+// Arguments:
+//	num_replicas: Number of replicas of the computation
+//
+// Returns the created operation.
+func TPUReplicateMetadata(scope *Scope, num_replicas int64, optional ...TPUReplicateMetadataAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_replicas": num_replicas}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TPUReplicateMetadata",
+
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
 // ExtractGlimpseAttr is an optional argument to ExtractGlimpse.
 type ExtractGlimpseAttr func(optionalAttr)
 
@@ -38543,117 +41319,52 @@ func ExtractGlimpse(scope *Scope, input tf.Output, size tf.Output, offsets tf.Ou
 	return op.Output(0)
 }
 
-// Returns the last element of the input list as well as a list with all but that element.
-//
-// Fails if the list is empty.
-//
-// input_handle: the input list
-// tensor: the withdrawn last element of the list
-// element_dtype: the type of elements in the list
-// element_shape: the shape of the output tensor
-func TensorListPopBack(scope *Scope, input_handle tf.Output, element_shape tf.Output, element_dtype tf.DataType) (output_handle tf.Output, tensor tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"element_dtype": element_dtype}
-	opspec := tf.OpSpec{
-		Type: "TensorListPopBack",
-		Input: []tf.Input{
-			input_handle, element_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
+// ResourceApplyAdamAttr is an optional argument to ResourceApplyAdam.
+type ResourceApplyAdamAttr func(optionalAttr)
 
-// TensorListStackAttr is an optional argument to TensorListStack.
-type TensorListStackAttr func(optionalAttr)
-
-// TensorListStackNumElements sets the optional num_elements attribute to value.
-// If not specified, defaults to -1
-func TensorListStackNumElements(value int64) TensorListStackAttr {
+// ResourceApplyAdamUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var, m, and v tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyAdamUseLocking(value bool) ResourceApplyAdamAttr {
 	return func(m optionalAttr) {
-		m["num_elements"] = value
+		m["use_locking"] = value
 	}
 }
 
-// Stacks all tensors in the list.
+// ResourceApplyAdamUseNesterov sets the optional use_nesterov attribute to value.
 //
-// Requires that all tensors have the same shape.
-//
-// input_handle: the input list
-// tensor: the gathered result
-// num_elements: optional. If not -1, the number of elements in the list.
-//
-func TensorListStack(scope *Scope, input_handle tf.Output, element_shape tf.Output, element_dtype tf.DataType, optional ...TensorListStackAttr) (tensor tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"element_dtype": element_dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorListStack",
-		Input: []tf.Input{
-			input_handle, element_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// AddSparseToTensorsMapAttr is an optional argument to AddSparseToTensorsMap.
-type AddSparseToTensorsMapAttr func(optionalAttr)
-
-// AddSparseToTensorsMapContainer sets the optional container attribute to value.
-//
-// value: The container name for the `SparseTensorsMap` created by this op.
-// If not specified, defaults to ""
-func AddSparseToTensorsMapContainer(value string) AddSparseToTensorsMapAttr {
+// value: If `True`, uses the nesterov update.
+// If not specified, defaults to false
+func ResourceApplyAdamUseNesterov(value bool) ResourceApplyAdamAttr {
 	return func(m optionalAttr) {
-		m["container"] = value
+		m["use_nesterov"] = value
 	}
 }
 
-// AddSparseToTensorsMapSharedName sets the optional shared_name attribute to value.
+// Update '*var' according to the Adam algorithm.
 //
-// value: The shared name for the `SparseTensorsMap` created by this op.
-// If blank, the new Operation's unique name is used.
-// If not specified, defaults to ""
-func AddSparseToTensorsMapSharedName(value string) AddSparseToTensorsMapAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Add a `SparseTensor` to a `SparseTensorsMap` return its handle.
-//
-// A `SparseTensor` is represented by three tensors: `sparse_indices`,
-// `sparse_values`, and `sparse_shape`.
-//
-// This operator takes the given `SparseTensor` and adds it to a container
-// object (a `SparseTensorsMap`).  A unique key within this container is generated
-// in the form of an `int64`, and this is the value that is returned.
-//
-// The `SparseTensor` can then be read out as part of a minibatch by passing
-// the key as a vector element to `TakeManySparseFromTensorsMap`.  To ensure
-// the correct `SparseTensorsMap` is accessed, ensure that the same
-// `container` and `shared_name` are passed to that Op.  If no `shared_name`
-// is provided here, instead use the *name* of the Operation created by calling
-// `AddSparseToTensorsMap` as the `shared_name` passed to
-// `TakeManySparseFromTensorsMap`.  Ensure the Operations are colocated.
+// $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$
+// $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$
+// $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$
+// $$variable := variable - lr_t * m_t / (\sqrt{v_t} + \epsilon)$$
 //
 // Arguments:
-//	sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
-//	sparse_values: 1-D.  The `values` of the `SparseTensor`.
-//	sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
+//	var_: Should be from a Variable().
+//	m: Should be from a Variable().
+//	v: Should be from a Variable().
+//	beta1_power: Must be a scalar.
+//	beta2_power: Must be a scalar.
+//	lr: Scaling factor. Must be a scalar.
+//	beta1: Momentum factor. Must be a scalar.
+//	beta2: Momentum factor. Must be a scalar.
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
 //
-// Returns 0-D.  The handle of the `SparseTensor` now stored in the
-// `SparseTensorsMap`.
-func AddSparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...AddSparseToTensorsMapAttr) (sparse_handle tf.Output) {
+// Returns the created operation.
+func ResourceApplyAdam(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, beta2_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdamAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -38662,7 +41373,492 @@ func AddSparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "AddSparseToTensorsMap",
+		Type: "ResourceApplyAdam",
+		Input: []tf.Input{
+			var_, m, v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes sigmoid of `x` element-wise.
+//
+// Specifically, `y = 1 / (1 + exp(-x))`.
+func Sigmoid(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Sigmoid",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceSparseApplyAdadeltaAttr is an optional argument to ResourceSparseApplyAdadelta.
+type ResourceSparseApplyAdadeltaAttr func(optionalAttr)
+
+// ResourceSparseApplyAdadeltaUseLocking sets the optional use_locking attribute to value.
+//
+// value: If True, updating of the var and accum tensors will be protected by
+// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceSparseApplyAdadeltaUseLocking(value bool) ResourceSparseApplyAdadeltaAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// var: Should be from a Variable().
+//
+// Arguments:
+//
+//	accum: Should be from a Variable().
+//	accum_update: : Should be from a Variable().
+//	lr: Learning rate. Must be a scalar.
+//	rho: Decay factor. Must be a scalar.
+//	epsilon: Constant factor. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//
+// Returns the created operation.
+func ResourceSparseApplyAdadelta(scope *Scope, var_ tf.Output, accum tf.Output, accum_update tf.Output, lr tf.Output, rho tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdadeltaAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceSparseApplyAdadelta",
+		Input: []tf.Input{
+			var_, accum, accum_update, lr, rho, epsilon, grad, indices,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingADAMParametersGradAccumDebug.
+type RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingADAMParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingADAMParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingADAMParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve ADAM embedding parameters with debug support.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the ADAM optimization algorithm.
+//	momenta: Parameter momenta updated by the ADAM optimization algorithm.
+//	velocities: Parameter velocities updated by the ADAM optimization algorithm.
+//	gradient_accumulators: Parameter gradient_accumulators updated by the ADAM optimization algorithm.
+func RetrieveTPUEmbeddingADAMParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingADAMParametersGradAccumDebugAttr) (parameters tf.Output, momenta tf.Output, velocities tf.Output, gradient_accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingADAMParametersGradAccumDebug",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// RequantizePerChannelAttr is an optional argument to RequantizePerChannel.
+type RequantizePerChannelAttr func(optionalAttr)
+
+// RequantizePerChannelOutType sets the optional out_type attribute to value.
+//
+// value: The quantized type of output tensor that needs to be converted.
+// If not specified, defaults to DT_QUINT8
+func RequantizePerChannelOutType(value tf.DataType) RequantizePerChannelAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Requantizes input with min and max values known per channel.
+//
+// Arguments:
+//	input: The original input tensor.
+//	input_min: The minimum value of the input tensor
+//	input_max: The maximum value of the input tensor.
+//	requested_output_min: The minimum value of the output tensor requested.
+//	requested_output_max: The maximum value of the output tensor requested.
+//
+// Returns:
+//	output: Output tensor.
+//	output_min: The minimum value of the final output tensor
+//	output_max: The maximum value of the final output tensor.
+func RequantizePerChannel(scope *Scope, input tf.Output, input_min tf.Output, input_max tf.Output, requested_output_min tf.Output, requested_output_max tf.Output, optional ...RequantizePerChannelAttr) (output tf.Output, output_min tf.Output, output_max tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RequantizePerChannel",
+		Input: []tf.Input{
+			input, input_min, input_max, requested_output_min, requested_output_max,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// LeakyReluAttr is an optional argument to LeakyRelu.
+type LeakyReluAttr func(optionalAttr)
+
+// LeakyReluAlpha sets the optional alpha attribute to value.
+// If not specified, defaults to 0.2
+func LeakyReluAlpha(value float32) LeakyReluAttr {
+	return func(m optionalAttr) {
+		m["alpha"] = value
+	}
+}
+
+// Computes rectified linear: `max(features, features * alpha)`.
+func LeakyRelu(scope *Scope, features tf.Output, optional ...LeakyReluAttr) (activations tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LeakyRelu",
+		Input: []tf.Input{
+			features,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// UniqueV2Attr is an optional argument to UniqueV2.
+type UniqueV2Attr func(optionalAttr)
+
+// UniqueV2OutIdx sets the optional out_idx attribute to value.
+// If not specified, defaults to DT_INT32
+func UniqueV2OutIdx(value tf.DataType) UniqueV2Attr {
+	return func(m optionalAttr) {
+		m["out_idx"] = value
+	}
+}
+
+// Finds unique elements along an axis of a tensor.
+//
+// This operation either returns a tensor `y` containing unique elements
+// along the `axis` of a tensor. The returned unique elements is sorted
+// in the same order as they occur along `axis` in `x`.
+// This operation also returns a tensor `idx` that is the same size as
+// the number of the elements in `x` along the `axis` dimension. It
+// contains the index in the unique output `y`.
+// In other words, for an `1-D` tensor `x` with `axis = None:
+//
+// `y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
+//
+// For example:
+//
+// ```
+// # tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
+// y, idx = unique(x)
+// y ==> [1, 2, 4, 7, 8]
+// idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
+// ```
+//
+// For an `2-D` tensor `x` with `axis = 0`:
+//
+// ```
+// # tensor 'x' is [[1, 0, 0],
+// #                [1, 0, 0],
+// #                [2, 0, 0]]
+// y, idx = unique(x, axis=0)
+// y ==> [[1, 0, 0],
+//        [2, 0, 0]]
+// idx ==> [0, 0, 1]
+// ```
+//
+// For an `2-D` tensor `x` with `axis = 1`:
+//
+// ```
+// # tensor 'x' is [[1, 0, 0],
+// #                [1, 0, 0],
+// #                [2, 0, 0]]
+// y, idx = unique(x, axis=1)
+// y ==> [[1, 0],
+//        [1, 0],
+//        [2, 0]]
+// idx ==> [0, 1, 1]
+// ```
+//
+// Arguments:
+//	x: A `Tensor`.
+//	axis: A `Tensor` of type `int32` (default: None). The axis of the Tensor to
+// find the unique elements.
+//
+// Returns:
+//	y: A `Tensor`. Unique elements along the `axis` of `Tensor` x.
+//	idx: A 1-D Tensor. Has the same type as x that contains the index of each
+// value of x in the output y.
+func UniqueV2(scope *Scope, x tf.Output, axis tf.Output, optional ...UniqueV2Attr) (y tf.Output, idx tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "UniqueV2",
+		Input: []tf.Input{
+			x, axis,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// DecodePaddedRawAttr is an optional argument to DecodePaddedRaw.
+type DecodePaddedRawAttr func(optionalAttr)
+
+// DecodePaddedRawLittleEndian sets the optional little_endian attribute to value.
+//
+// value: Whether the input `input_bytes` is in little-endian order. Ignored for
+// `out_type` values that are stored in a single byte, like `uint8`
+// If not specified, defaults to true
+func DecodePaddedRawLittleEndian(value bool) DecodePaddedRawAttr {
+	return func(m optionalAttr) {
+		m["little_endian"] = value
+	}
+}
+
+// Reinterpret the bytes of a string as a vector of numbers.
+//
+// Arguments:
+//	input_bytes: Tensor of string to be decoded.
+//	fixed_length: Length in bytes for each element of the decoded output. Must be a multiple
+// of the size of the output type.
+//
+//
+// Returns A Tensor with one more dimension than the input `bytes`. The added dimension
+// will have size equal to the length of the elements of `bytes` divided by the
+// number of bytes to represent `out_type`.
+func DecodePaddedRaw(scope *Scope, input_bytes tf.Output, fixed_length tf.Output, out_type tf.DataType, optional ...DecodePaddedRawAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"out_type": out_type}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodePaddedRaw",
+		Input: []tf.Input{
+			input_bytes, fixed_length,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingADAMParametersAttr is an optional argument to RetrieveTPUEmbeddingADAMParameters.
+type RetrieveTPUEmbeddingADAMParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingADAMParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingADAMParametersTableId(value int64) RetrieveTPUEmbeddingADAMParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingADAMParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingADAMParametersTableName(value string) RetrieveTPUEmbeddingADAMParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingADAMParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingADAMParametersConfig(value string) RetrieveTPUEmbeddingADAMParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve ADAM embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the ADAM optimization algorithm.
+//	momenta: Parameter momenta updated by the ADAM optimization algorithm.
+//	velocities: Parameter velocities updated by the ADAM optimization algorithm.
+func RetrieveTPUEmbeddingADAMParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingADAMParametersAttr) (parameters tf.Output, momenta tf.Output, velocities tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingADAMParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// RetrieveTPUEmbeddingFTRLParametersAttr is an optional argument to RetrieveTPUEmbeddingFTRLParameters.
+type RetrieveTPUEmbeddingFTRLParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingFTRLParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingFTRLParametersTableId(value int64) RetrieveTPUEmbeddingFTRLParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingFTRLParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingFTRLParametersTableName(value string) RetrieveTPUEmbeddingFTRLParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingFTRLParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingFTRLParametersConfig(value string) RetrieveTPUEmbeddingFTRLParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve FTRL embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the FTRL optimization algorithm.
+//	accumulators: Parameter accumulators updated by the FTRL optimization algorithm.
+//	linears: Parameter linears updated by the FTRL optimization algorithm.
+func RetrieveTPUEmbeddingFTRLParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingFTRLParametersAttr) (parameters tf.Output, accumulators tf.Output, linears tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingFTRLParameters",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// SerializeManySparseAttr is an optional argument to SerializeManySparse.
+type SerializeManySparseAttr func(optionalAttr)
+
+// SerializeManySparseOutType sets the optional out_type attribute to value.
+//
+// value: The `dtype` to use for serialization; the supported types are `string`
+// (default) and `variant`.
+// If not specified, defaults to DT_STRING
+func SerializeManySparseOutType(value tf.DataType) SerializeManySparseAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Serialize an `N`-minibatch `SparseTensor` into an `[N, 3]` `Tensor` object.
+//
+// The `SparseTensor` must have rank `R` greater than 1, and the first dimension
+// is treated as the minibatch dimension.  Elements of the `SparseTensor`
+// must be sorted in increasing order of this first dimension.  The serialized
+// `SparseTensor` objects going into each row of `serialized_sparse` will have
+// rank `R-1`.
+//
+// The minibatch size `N` is extracted from `sparse_shape[0]`.
+//
+// Arguments:
+//	sparse_indices: 2-D.  The `indices` of the minibatch `SparseTensor`.
+//	sparse_values: 1-D.  The `values` of the minibatch `SparseTensor`.
+//	sparse_shape: 1-D.  The `shape` of the minibatch `SparseTensor`.
+func SerializeManySparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeManySparseAttr) (serialized_sparse tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SerializeManySparse",
 		Input: []tf.Input{
 			sparse_indices, sparse_values, sparse_shape,
 		},
@@ -38672,83 +41868,69 @@ func AddSparseToTensorsMap(scope *Scope, sparse_indices tf.Output, sparse_values
 	return op.Output(0)
 }
 
-// List of the given size with empty elements.
-//
-// element_shape: the shape of the future elements of the list
-// num_elements: the number of elements to reserve
-// handle: the output list
-// element_dtype: the desired type of elements in the list.
-func TensorListReserve(scope *Scope, element_shape tf.Output, num_elements tf.Output, element_dtype tf.DataType) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"element_dtype": element_dtype}
-	opspec := tf.OpSpec{
-		Type: "TensorListReserve",
-		Input: []tf.Input{
-			element_shape, num_elements,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
+// NonMaxSuppressionV5Attr is an optional argument to NonMaxSuppressionV5.
+type NonMaxSuppressionV5Attr func(optionalAttr)
 
-// Returns the item in the list with the given index.
+// NonMaxSuppressionV5PadToMaxOutputSize sets the optional pad_to_max_output_size attribute to value.
 //
-// input_handle: the list
-// index: the position in the list from which an element will be retrieved
-// item: the element at that position
-//
-//
-func TensorListGetItem(scope *Scope, input_handle tf.Output, index tf.Output, element_shape tf.Output, element_dtype tf.DataType) (item tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"element_dtype": element_dtype}
-	opspec := tf.OpSpec{
-		Type: "TensorListGetItem",
-		Input: []tf.Input{
-			input_handle, index, element_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// IdentityReaderV2Attr is an optional argument to IdentityReaderV2.
-type IdentityReaderV2Attr func(optionalAttr)
-
-// IdentityReaderV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this reader is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func IdentityReaderV2Container(value string) IdentityReaderV2Attr {
+// value: If true, the output `selected_indices` is padded to be of length
+// `max_output_size`. Defaults to false.
+// If not specified, defaults to false
+func NonMaxSuppressionV5PadToMaxOutputSize(value bool) NonMaxSuppressionV5Attr {
 	return func(m optionalAttr) {
-		m["container"] = value
+		m["pad_to_max_output_size"] = value
 	}
 }
 
-// IdentityReaderV2SharedName sets the optional shared_name attribute to value.
+// Greedily selects a subset of bounding boxes in descending order of score,
 //
-// value: If non-empty, this reader is named in the given bucket
-// with this shared_name. Otherwise, the node name is used instead.
-// If not specified, defaults to ""
-func IdentityReaderV2SharedName(value string) IdentityReaderV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// A Reader that outputs the queued work as both the key and value.
+// pruning away boxes that have high intersection-over-union (IOU) overlap
+// with previously selected boxes.  Bounding boxes with score less than
+// `score_threshold` are removed.  Bounding boxes are supplied as
+// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
+// diagonal pair of box corners and the coordinates can be provided as normalized
+// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
+// is agnostic to where the origin is in the coordinate system and more
+// generally is invariant to orthogonal transformations and translations
+// of the coordinate system; thus translating or reflections of the coordinate
+// system result in the same boxes being selected by the algorithm.
+// The output of this operation is a set of integers indexing into the input
+// collection of bounding boxes representing the selected boxes.  The bounding
+// box coordinates corresponding to the selected indices can then be obtained
+// using the `tf.gather operation`.  For example:
+//   selected_indices = tf.image.non_max_suppression_v2(
+//       boxes, scores, max_output_size, iou_threshold, score_threshold)
+//   selected_boxes = tf.gather(boxes, selected_indices)
+// This op also supports a Soft-NMS (with Gaussian weighting) mode (c.f.
+// Bodla et al, https://arxiv.org/abs/1704.04503) where boxes reduce the score
+// of other overlapping boxes instead of directly causing them to be pruned.
+// To enable this Soft-NMS mode, set the `soft_nms_sigma` parameter to be
+// larger than 0.
 //
-// To use, enqueue strings in a Queue.  ReaderRead will take the front
-// work string and output (work, work).
+// Arguments:
+//	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
+//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
+// score corresponding to each box (each row of boxes).
+//	max_output_size: A scalar integer tensor representing the maximum number of
+// boxes to be selected by non max suppression.
+//	iou_threshold: A 0-D float tensor representing the threshold for deciding whether
+// boxes overlap too much with respect to IOU.
+//	score_threshold: A 0-D float tensor representing the threshold for deciding when to remove
+// boxes based on score.
+//	soft_nms_sigma: A 0-D float tensor representing the sigma parameter for Soft NMS; see Bodla et
+// al (c.f. https://arxiv.org/abs/1704.04503).  When `soft_nms_sigma=0.0` (which
+// is default), we fall back to standard (hard) NMS.
 //
-// Returns The handle to reference the Reader.
-func IdentityReaderV2(scope *Scope, optional ...IdentityReaderV2Attr) (reader_handle tf.Output) {
+// Returns:
+//	selected_indices: A 1-D integer tensor of shape `[M]` representing the selected
+// indices from the boxes tensor, where `M <= max_output_size`.
+//	selected_scores: A 1-D float tensor of shape `[M]` representing the corresponding
+// scores for each selected box, where `M <= max_output_size`.  Scores only differ
+// from corresponding input scores when using Soft NMS (i.e. when
+// `soft_nms_sigma>0`)
+//	valid_outputs: A 0-D integer tensor representing the number of valid elements in
+// `selected_indices`, with the valid elements appearing first.
+func NonMaxSuppressionV5(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output, score_threshold tf.Output, soft_nms_sigma tf.Output, optional ...NonMaxSuppressionV5Attr) (selected_indices tf.Output, selected_scores tf.Output, valid_outputs tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -38757,7 +41939,112 @@ func IdentityReaderV2(scope *Scope, optional ...IdentityReaderV2Attr) (reader_ha
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "IdentityReaderV2",
+		Type: "NonMaxSuppressionV5",
+		Input: []tf.Input{
+			boxes, scores, max_output_size, iou_threshold, score_threshold, soft_nms_sigma,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Says whether the targets are in the top `K` predictions.
+//
+// This outputs a `batch_size` bool array, an entry `out[i]` is `true` if the
+// prediction for the target class is among the top `k` predictions among
+// all predictions for example `i`. Note that the behavior of `InTopK` differs
+// from the `TopK` op in its handling of ties; if multiple classes have the
+// same prediction value and straddle the top-`k` boundary, all of those
+// classes are considered to be in the top `k`.
+//
+// More formally, let
+//
+//   \\(predictions_i\\) be the predictions for all classes for example `i`,
+//   \\(targets_i\\) be the target class for example `i`,
+//   \\(out_i\\) be the output for example `i`,
+//
+// $$out_i = predictions_{i, targets_i} \in TopKIncludingTies(predictions_i)$$
+//
+// Arguments:
+//	predictions: A `batch_size` x `classes` tensor.
+//	targets: A `batch_size` vector of class ids.
+//	k: Number of top elements to look at for computing precision.
+//
+// Returns Computed precision at `k` as a `bool Tensor`.
+func InTopKV2(scope *Scope, predictions tf.Output, targets tf.Output, k tf.Output) (precision tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "InTopKV2",
+		Input: []tf.Input{
+			predictions, targets, k,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates an Optional variant with no value.
+func OptionalNone(scope *Scope) (optional tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "OptionalNone",
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr is an optional argument to RetrieveTPUEmbeddingStochasticGradientDescentParameters.
+type RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingStochasticGradientDescentParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingStochasticGradientDescentParametersTableId(value int64) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingStochasticGradientDescentParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingStochasticGradientDescentParametersTableName(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingStochasticGradientDescentParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingStochasticGradientDescentParametersConfig(value string) RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve SGD embedding parameters.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns Parameter parameters updated by the stochastic gradient descent optimization algorithm.
+func RetrieveTPUEmbeddingStochasticGradientDescentParameters(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingStochasticGradientDescentParametersAttr) (parameters tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingStochasticGradientDescentParameters",
 
 		Attrs: attrs,
 	}
@@ -38765,53 +42052,633 @@ func IdentityReaderV2(scope *Scope, optional ...IdentityReaderV2Attr) (reader_ha
 	return op.Output(0)
 }
 
-// Creates a TensorList by indexing into a Tensor.
+// Returns x / y element-wise.
 //
-// Each member of the TensorList corresponds to one row of the input tensor,
-// specified by the given index (see `tf.gather`).
-//
-// tensor: The input tensor.
-// indices: The indices used to index into the list.
-// element_shape: The shape of the elements in the list (can be less specified than
-//   the shape of the tensor).
-// num_elements: The size of the output list. Must be large enough to accommodate
-//   the largest index in indices. If -1, the list is just large enough to include
-//   the largest index in indices.
-// output_handle: The TensorList.
-func TensorListScatterV2(scope *Scope, tensor tf.Output, indices tf.Output, element_shape tf.Output, num_elements tf.Output) (output_handle tf.Output) {
+// *NOTE*: `Div` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+func Div(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "TensorListScatterV2",
+		Type: "Div",
 		Input: []tf.Input{
-			tensor, indices, element_shape, num_elements,
+			x, y,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Converts one or more images from RGB to HSV.
+// DecodeCSVAttr is an optional argument to DecodeCSV.
+type DecodeCSVAttr func(optionalAttr)
+
+// DecodeCSVFieldDelim sets the optional field_delim attribute to value.
 //
-// Outputs a tensor of the same shape as the `images` tensor, containing the HSV
-// value of the pixels. The output is only well defined if the value in `images`
-// are in `[0,1]`.
+// value: char delimiter to separate fields in a record.
+// If not specified, defaults to ","
+func DecodeCSVFieldDelim(value string) DecodeCSVAttr {
+	return func(m optionalAttr) {
+		m["field_delim"] = value
+	}
+}
+
+// DecodeCSVUseQuoteDelim sets the optional use_quote_delim attribute to value.
 //
-// `output[..., 0]` contains hue, `output[..., 1]` contains saturation, and
-// `output[..., 2]` contains value. All HSV values are in `[0,1]`. A hue of 0
-// corresponds to pure red, hue 1/3 is pure green, and 2/3 is pure blue.
+// value: If false, treats double quotation marks as regular
+// characters inside of the string fields (ignoring RFC 4180, Section 2,
+// Bullet 5).
+// If not specified, defaults to true
+func DecodeCSVUseQuoteDelim(value bool) DecodeCSVAttr {
+	return func(m optionalAttr) {
+		m["use_quote_delim"] = value
+	}
+}
+
+// DecodeCSVNaValue sets the optional na_value attribute to value.
+//
+// value: Additional string to recognize as NA/NaN.
+// If not specified, defaults to ""
+func DecodeCSVNaValue(value string) DecodeCSVAttr {
+	return func(m optionalAttr) {
+		m["na_value"] = value
+	}
+}
+
+// DecodeCSVSelectCols sets the optional select_cols attribute to value.
+// If not specified, defaults to {}
+func DecodeCSVSelectCols(value []int64) DecodeCSVAttr {
+	return func(m optionalAttr) {
+		m["select_cols"] = value
+	}
+}
+
+// Convert CSV records to tensors. Each column maps to one tensor.
+//
+// RFC 4180 format is expected for the CSV records.
+// (https://tools.ietf.org/html/rfc4180)
+// Note that we allow leading and trailing spaces with int or float field.
 //
 // Arguments:
-//	images: 1-D or higher rank. RGB data to convert. Last dimension must be size 3.
+//	records: Each string is a record/row in the csv and all records should have
+// the same format.
+//	record_defaults: One tensor per column of the input record, with either a
+// scalar default value for that column or an empty vector if the column is
+// required.
 //
-// Returns `images` converted to HSV.
-func RGBToHSV(scope *Scope, images tf.Output) (output tf.Output) {
+// Returns Each tensor will have the same shape as records.
+func DecodeCSV(scope *Scope, records tf.Output, record_defaults []tf.Output, optional ...DecodeCSVAttr) (output []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodeCSV",
+		Input: []tf.Input{
+			records, tf.OutputList(record_defaults),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
+		scope.UpdateErr("DecodeCSV", err)
+		return
+	}
+	return output
+}
+
+// Enqueue a Tensor on the computation outfeed.
+//
+// Arguments:
+//	input: A tensor that will be inserted into the outfeed queue.
+//
+// Returns the created operation.
+func OutfeedEnqueue(scope *Scope, input tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "RGBToHSV",
+		Type: "OutfeedEnqueue",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// DecodeJpegAttr is an optional argument to DecodeJpeg.
+type DecodeJpegAttr func(optionalAttr)
+
+// DecodeJpegChannels sets the optional channels attribute to value.
+//
+// value: Number of color channels for the decoded image.
+// If not specified, defaults to 0
+func DecodeJpegChannels(value int64) DecodeJpegAttr {
+	return func(m optionalAttr) {
+		m["channels"] = value
+	}
+}
+
+// DecodeJpegRatio sets the optional ratio attribute to value.
+//
+// value: Downscaling ratio.
+// If not specified, defaults to 1
+func DecodeJpegRatio(value int64) DecodeJpegAttr {
+	return func(m optionalAttr) {
+		m["ratio"] = value
+	}
+}
+
+// DecodeJpegFancyUpscaling sets the optional fancy_upscaling attribute to value.
+//
+// value: If true use a slower but nicer upscaling of the
+// chroma planes (yuv420/422 only).
+// If not specified, defaults to true
+func DecodeJpegFancyUpscaling(value bool) DecodeJpegAttr {
+	return func(m optionalAttr) {
+		m["fancy_upscaling"] = value
+	}
+}
+
+// DecodeJpegTryRecoverTruncated sets the optional try_recover_truncated attribute to value.
+//
+// value: If true try to recover an image from truncated input.
+// If not specified, defaults to false
+func DecodeJpegTryRecoverTruncated(value bool) DecodeJpegAttr {
+	return func(m optionalAttr) {
+		m["try_recover_truncated"] = value
+	}
+}
+
+// DecodeJpegAcceptableFraction sets the optional acceptable_fraction attribute to value.
+//
+// value: The minimum required fraction of lines before a truncated
+// input is accepted.
+// If not specified, defaults to 1
+func DecodeJpegAcceptableFraction(value float32) DecodeJpegAttr {
+	return func(m optionalAttr) {
+		m["acceptable_fraction"] = value
+	}
+}
+
+// DecodeJpegDctMethod sets the optional dct_method attribute to value.
+//
+// value: string specifying a hint about the algorithm used for
+// decompression.  Defaults to "" which maps to a system-specific
+// default.  Currently valid values are ["INTEGER_FAST",
+// "INTEGER_ACCURATE"].  The hint may be ignored (e.g., the internal
+// jpeg library changes to a version that does not have that specific
+// option.)
+// If not specified, defaults to ""
+func DecodeJpegDctMethod(value string) DecodeJpegAttr {
+	return func(m optionalAttr) {
+		m["dct_method"] = value
+	}
+}
+
+// Decode a JPEG-encoded image to a uint8 tensor.
+//
+// The attr `channels` indicates the desired number of color channels for the
+// decoded image.
+//
+// Accepted values are:
+//
+// *   0: Use the number of channels in the JPEG-encoded image.
+// *   1: output a grayscale image.
+// *   3: output an RGB image.
+//
+// If needed, the JPEG-encoded image is transformed to match the requested number
+// of color channels.
+//
+// The attr `ratio` allows downscaling the image by an integer factor during
+// decoding.  Allowed values are: 1, 2, 4, and 8.  This is much faster than
+// downscaling the image later.
+//
+//
+// This op also supports decoding PNGs and non-animated GIFs since the interface is
+// the same, though it is cleaner to use `tf.image.decode_image`.
+//
+// Arguments:
+//	contents: 0-D.  The JPEG-encoded image.
+//
+// Returns 3-D with shape `[height, width, channels]`..
+func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodeJpeg",
+		Input: []tf.Input{
+			contents,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the number of nonzeroes of `sparse_matrix`.
+//
+// Arguments:
+//	sparse_matrix: A CSRSparseMatrix.
+//
+// Returns The number of nonzeroes of `sparse_matrix`.
+func SparseMatrixNNZ(scope *Scope, sparse_matrix tf.Output) (nnz tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseMatrixNNZ",
+		Input: []tf.Input{
+			sparse_matrix,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Reshapes a SparseTensor to represent values in a new dense shape.
+//
+// This operation has the same semantics as reshape on the represented dense
+// tensor.  The `input_indices` are recomputed based on the requested `new_shape`.
+//
+// If one component of `new_shape` is the special value -1, the size of that
+// dimension is computed so that the total dense size remains constant.  At
+// most one component of `new_shape` can be -1.  The number of dense elements
+// implied by `new_shape` must be the same as the number of dense elements
+// originally implied by `input_shape`.
+//
+// Reshaping does not affect the order of values in the SparseTensor.
+//
+// If the input tensor has rank `R_in` and `N` non-empty values, and `new_shape`
+// has length `R_out`, then `input_indices` has shape `[N, R_in]`,
+// `input_shape` has length `R_in`, `output_indices` has shape `[N, R_out]`, and
+// `output_shape` has length `R_out`.
+//
+// Arguments:
+//	input_indices: 2-D.  `N x R_in` matrix with the indices of non-empty values in a
+// SparseTensor.
+//	input_shape: 1-D.  `R_in` vector with the input SparseTensor's dense shape.
+//	new_shape: 1-D.  `R_out` vector with the requested new dense shape.
+//
+// Returns:
+//	output_indices: 2-D.  `N x R_out` matrix with the updated indices of non-empty
+// values in the output SparseTensor.
+//	output_shape: 1-D.  `R_out` vector with the full dense shape of the output
+// SparseTensor.  This is the same as `new_shape` but with any -1 dimensions
+// filled in.
+func SparseReshape(scope *Scope, input_indices tf.Output, input_shape tf.Output, new_shape tf.Output) (output_indices tf.Output, output_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseReshape",
+		Input: []tf.Input{
+			input_indices, input_shape, new_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug.
+type LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr func(optionalAttr)
+
+// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load proximal Adagrad embedding parameters with debug support.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the proximal Adagrad optimization algorithm.
+//	accumulators: Value of accumulators used in the proximal Adagrad optimization algorithm.
+//	gradient_accumulators: Value of gradient_accumulators used in the proximal Adagrad optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingProximalAdagradParametersGradAccumDebugAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingProximalAdagradParametersGradAccumDebug",
+		Input: []tf.Input{
+			parameters, accumulators, gradient_accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// ResourceApplyAdaMaxAttr is an optional argument to ResourceApplyAdaMax.
+type ResourceApplyAdaMaxAttr func(optionalAttr)
+
+// ResourceApplyAdaMaxUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var, m, and v tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyAdaMaxUseLocking(value bool) ResourceApplyAdaMaxAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the AdaMax algorithm.
+//
+// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+// v_t <- max(beta2 * v_{t-1}, abs(g))
+// variable <- variable - learning_rate / (1 - beta1^t) * m_t / (v_t + epsilon)
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	m: Should be from a Variable().
+//	v: Should be from a Variable().
+//	beta1_power: Must be a scalar.
+//	lr: Scaling factor. Must be a scalar.
+//	beta1: Momentum factor. Must be a scalar.
+//	beta2: Momentum factor. Must be a scalar.
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyAdaMax(scope *Scope, var_ tf.Output, m tf.Output, v tf.Output, beta1_power tf.Output, lr tf.Output, beta1 tf.Output, beta2 tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdaMaxAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyAdaMax",
+		Input: []tf.Input{
+			var_, m, v, beta1_power, lr, beta1, beta2, epsilon, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Retrieves the tree ensemble resource stamp token, number of trees and growing statistics.
+//
+// Arguments:
+//	tree_ensemble_handle: Handle to the tree ensemble.
+//
+// Returns:
+//	stamp_token: Stamp token of the tree ensemble resource.
+//	num_trees: The number of trees in the tree ensemble resource.
+//	num_finalized_trees: The number of trees that were finished successfully.
+//	num_attempted_layers: The number of layers we attempted to build (but not necessarily succeeded).
+//	last_layer_nodes_range: Rank size 2 tensor that contains start and end ids of the nodes in the latest
+// layer.
+func BoostedTreesGetEnsembleStates(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, num_trees tf.Output, num_finalized_trees tf.Output, num_attempted_layers tf.Output, last_layer_nodes_range tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "BoostedTreesGetEnsembleStates",
+		Input: []tf.Input{
+			tree_ensemble_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+}
+
+// ResourceScatterNdAddAttr is an optional argument to ResourceScatterNdAdd.
+type ResourceScatterNdAddAttr func(optionalAttr)
+
+// ResourceScatterNdAddUseLocking sets the optional use_locking attribute to value.
+//
+// value: An optional bool. Defaults to True. If True, the assignment will
+// be protected by a lock; otherwise the behavior is undefined,
+// but may exhibit less contention.
+// If not specified, defaults to true
+func ResourceScatterNdAddUseLocking(value bool) ResourceScatterNdAddAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Applies sparse addition to individual values or slices in a Variable.
+//
+// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
+//
+// `indices` must be integer tensor, containing indices into `ref`.
+// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
+//
+// The innermost dimension of `indices` (with length `K`) corresponds to
+// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
+// dimension of `ref`.
+//
+// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
+//
+// ```
+// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]
+// ```
+//
+// For example, say we want to add 4 scattered elements to a rank-1 tensor to
+// 8 elements. In Python, that addition would look like this:
+//
+// ```python
+// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8], use_resource=True)
+// indices = tf.constant([[4], [3], [1], [7]])
+// updates = tf.constant([9, 10, 11, 12])
+// add = tf.scatter_nd_add(ref, indices, updates)
+// with tf.Session() as sess:
+//   print sess.run(add)
+// ```
+//
+// The resulting update to ref would look like this:
+//
+//     [1, 13, 3, 14, 14, 6, 7, 20]
+//
+// See `tf.scatter_nd` for more details about how to make updates to
+// slices.
+//
+// Arguments:
+//	ref: A resource handle. Must be from a VarHandleOp.
+//	indices: A Tensor. Must be one of the following types: int32, int64.
+// A tensor of indices into ref.
+//	updates: A Tensor. Must have the same type as ref. A tensor of
+// values to add to ref.
+//
+// Returns the created operation.
+func ResourceScatterNdAdd(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdAddAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceScatterNdAdd",
+		Input: []tf.Input{
+			ref, indices, updates,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Creates a dataset that batches input elements into a SparseTensor.
+//
+// Arguments:
+//	input_dataset: A handle to an input dataset. Must have a single component.
+//	batch_size: A scalar representing the number of elements to accumulate in a
+// batch.
+//	row_shape: A vector representing the dense shape of each row in the produced
+// SparseTensor. The shape may be partially specified, using `-1` to indicate
+// that a particular dimension should use the maximum size of all batch elements.
+//
+//
+func DenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, row_shape tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "DenseToSparseBatchDataset",
+		Input: []tf.Input{
+			input_dataset, batch_size, row_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingAdadeltaParametersGradAccumDebug.
+type LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr func(optionalAttr)
+
+// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingAdadeltaParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingAdadeltaParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingAdadeltaParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load Adadelta parameters with debug support.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the Adadelta optimization algorithm.
+//	accumulators: Value of accumulators used in the Adadelta optimization algorithm.
+//	updates: Value of updates used in the Adadelta optimization algorithm.
+//	gradient_accumulators: Value of gradient_accumulators used in the Adadelta optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingAdadeltaParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, updates tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdadeltaParametersGradAccumDebugAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingAdadeltaParametersGradAccumDebug",
+		Input: []tf.Input{
+			parameters, accumulators, updates, gradient_accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Convert one or more images from HSV to RGB.
+//
+// Outputs a tensor of the same shape as the `images` tensor, containing the RGB
+// value of the pixels. The output is only well defined if the value in `images`
+// are in `[0,1]`.
+//
+// See `rgb_to_hsv` for a description of the HSV encoding.
+//
+// Arguments:
+//	images: 1-D or higher rank. HSV data to convert. Last dimension must be size 3.
+//
+// Returns `images` converted to RGB.
+func HSVToRGB(scope *Scope, images tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "HSVToRGB",
 		Input: []tf.Input{
 			images,
 		},
@@ -38820,22 +42687,711 @@ func RGBToHSV(scope *Scope, images tf.Output) (output tf.Output) {
 	return op.Output(0)
 }
 
-// Computes the determinant of one or more square matrices.
+// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug.
+type RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr func(optionalAttr)
+
+// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
 //
-// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-// form square matrices. The output is a tensor containing the determinants
-// for all input submatrices `[..., :, :]`.
+// REQUIRES: value >= -1
+func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugConfig(value string) RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Retrieve Adadelta embedding parameters with debug support.
+//
+// An op that retrieves optimization parameters from embedding to host
+// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
+// the correct embedding table configuration. For example, this op is
+// used to retrieve updated parameters before saving a checkpoint.
+//
+// Returns:
+//	parameters: Parameter parameters updated by the Adadelta optimization algorithm.
+//	accumulators: Parameter accumulators updated by the Adadelta optimization algorithm.
+//	updates: Parameter updates updated by the Adadelta optimization algorithm.
+//	gradient_accumulators: Parameter gradient_accumulators updated by the Adadelta optimization algorithm.
+func RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, updates tf.Output, gradient_accumulators tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "RetrieveTPUEmbeddingAdadeltaParametersGradAccumDebug",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// ResourceSparseApplyFtrlV2Attr is an optional argument to ResourceSparseApplyFtrlV2.
+type ResourceSparseApplyFtrlV2Attr func(optionalAttr)
+
+// ResourceSparseApplyFtrlV2UseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceSparseApplyFtrlV2UseLocking(value bool) ResourceSparseApplyFtrlV2Attr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update relevant entries in '*var' according to the Ftrl-proximal scheme.
+//
+// That is for rows we have grad for, we update var, accum and linear as follows:
+// grad_with_shrinkage = grad + 2 * l2_shrinkage * var
+// accum_new = accum + grad_with_shrinkage * grad_with_shrinkage
+// linear += grad_with_shrinkage +
+//     (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var
+// quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2
+// var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0
+// accum = accum_new
 //
 // Arguments:
-//	input: Shape is `[..., M, M]`.
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	linear: Should be from a Variable().
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//	lr: Scaling factor. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 shrinkage regulariation. Must be a scalar.
 //
-// Returns Shape is `[...]`.
-func MatrixDeterminant(scope *Scope, input tf.Output) (output tf.Output) {
+//	lr_power: Scaling factor. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceSparseApplyFtrlV2(scope *Scope, var_ tf.Output, accum tf.Output, linear tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, l2_shrinkage tf.Output, lr_power tf.Output, optional ...ResourceSparseApplyFtrlV2Attr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceSparseApplyFtrlV2",
+		Input: []tf.Input{
+			var_, accum, linear, grad, indices, lr, l1, l2, l2_shrinkage, lr_power,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingFTRLParametersGradAccumDebug.
+type LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr func(optionalAttr)
+
+// LoadTPUEmbeddingFTRLParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingFTRLParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingFTRLParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingFTRLParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingFTRLParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingFTRLParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load FTRL embedding parameters with debug support.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the FTRL optimization algorithm.
+//	accumulators: Value of accumulators used in the FTRL optimization algorithm.
+//	linears: Value of linears used in the FTRL optimization algorithm.
+//	gradient_accumulators: Value of gradient_accumulators used in the FTRL optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingFTRLParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, linears tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingFTRLParametersGradAccumDebugAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingFTRLParametersGradAccumDebug",
+		Input: []tf.Input{
+			parameters, accumulators, linears, gradient_accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// LoadTPUEmbeddingAdadeltaParametersAttr is an optional argument to LoadTPUEmbeddingAdadeltaParameters.
+type LoadTPUEmbeddingAdadeltaParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingAdadeltaParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingAdadeltaParametersTableId(value int64) LoadTPUEmbeddingAdadeltaParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingAdadeltaParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingAdadeltaParametersTableName(value string) LoadTPUEmbeddingAdadeltaParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingAdadeltaParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingAdadeltaParametersConfig(value string) LoadTPUEmbeddingAdadeltaParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load Adadelta embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the Adadelta optimization algorithm.
+//	accumulators: Value of accumulators used in the Adadelta optimization algorithm.
+//	updates: Value of updates used in the Adadelta optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingAdadeltaParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, updates tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdadeltaParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingAdadeltaParameters",
+		Input: []tf.Input{
+			parameters, accumulators, updates,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// PaddingFIFOQueueV2Attr is an optional argument to PaddingFIFOQueueV2.
+type PaddingFIFOQueueV2Attr func(optionalAttr)
+
+// PaddingFIFOQueueV2Shapes sets the optional shapes attribute to value.
+//
+// value: The shape of each component in a value. The length of this attr must
+// be either 0 or the same as the length of component_types.
+// Shapes of fixed rank but variable size are allowed by setting
+// any shape dimension to -1.  In this case, the inputs' shape may vary along
+// the given dimension, and DequeueMany will pad the given dimension with
+// zeros up to the maximum shape of all elements in the given batch.
+// If the length of this attr is 0, different queue elements may have
+// different ranks and shapes, but only one element may be dequeued at a time.
+// If not specified, defaults to {}
+//
+// REQUIRES: len(value) >= 0
+func PaddingFIFOQueueV2Shapes(value []tf.Shape) PaddingFIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["shapes"] = value
+	}
+}
+
+// PaddingFIFOQueueV2Capacity sets the optional capacity attribute to value.
+//
+// value: The upper bound on the number of elements in this queue.
+// Negative numbers mean no limit.
+// If not specified, defaults to -1
+func PaddingFIFOQueueV2Capacity(value int64) PaddingFIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// PaddingFIFOQueueV2Container sets the optional container attribute to value.
+//
+// value: If non-empty, this queue is placed in the given container.
+// Otherwise, a default container is used.
+// If not specified, defaults to ""
+func PaddingFIFOQueueV2Container(value string) PaddingFIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// PaddingFIFOQueueV2SharedName sets the optional shared_name attribute to value.
+//
+// value: If non-empty, this queue will be shared under the given name
+// across multiple sessions.
+// If not specified, defaults to ""
+func PaddingFIFOQueueV2SharedName(value string) PaddingFIFOQueueV2Attr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// A queue that produces elements in first-in first-out order.
+//
+// Variable-size shapes are allowed by setting the corresponding shape dimensions
+// to 0 in the shape attr.  In this case DequeueMany will pad up to the maximum
+// size of any given element in the minibatch.  See below for details.
+//
+// Arguments:
+//	component_types: The type of each component in a value.
+//
+// Returns The handle to the queue.
+func PaddingFIFOQueueV2(scope *Scope, component_types []tf.DataType, optional ...PaddingFIFOQueueV2Attr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"component_types": component_types}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "PaddingFIFOQueueV2",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingMomentumParametersGradAccumDebug.
+type LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr func(optionalAttr)
+
+// LoadTPUEmbeddingMomentumParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingMomentumParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingMomentumParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingMomentumParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingMomentumParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingMomentumParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load Momentum embedding parameters with debug support.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the Momentum optimization algorithm.
+//	momenta: Value of momenta used in the Momentum optimization algorithm.
+//	gradient_accumulators: Value of gradient_accumulators used in the Momentum optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingMomentumParametersGradAccumDebug(scope *Scope, parameters tf.Output, momenta tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMomentumParametersGradAccumDebugAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingMomentumParametersGradAccumDebug",
+		Input: []tf.Input{
+			parameters, momenta, gradient_accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Creates a Dataset that returns pseudorandom numbers.
+//
+// Creates a Dataset that returns a stream of uniformly distributed
+// pseudorandom 64-bit signed integers.
+//
+// In the TensorFlow Python API, you can instantiate this dataset via the
+// class `tf.data.experimental.RandomDataset`.
+//
+// Instances of this dataset are also created as a result of the
+// `hoist_random_uniform` static optimization. Whether this optimization is
+// performed is determined by the `experimental_optimization.hoist_random_uniform`
+// option of `tf.data.Options`.
+//
+// Arguments:
+//	seed: A scalar seed for the random number generator. If either seed or
+// seed2 is set to be non-zero, the random number generator is seeded
+// by the given seed.  Otherwise, a random seed is used.
+//	seed2: A second scalar seed to avoid seed collision.
+//
+//
+func RandomDataset(scope *Scope, seed tf.Output, seed2 tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "RandomDataset",
+		Input: []tf.Input{
+			seed, seed2,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// FractionalAvgPoolAttr is an optional argument to FractionalAvgPool.
+type FractionalAvgPoolAttr func(optionalAttr)
+
+// FractionalAvgPoolPseudoRandom sets the optional pseudo_random attribute to value.
+//
+// value: When set to True, generates the pooling sequence in a
+// pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
+// Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for
+// difference between pseudorandom and random.
+// If not specified, defaults to false
+func FractionalAvgPoolPseudoRandom(value bool) FractionalAvgPoolAttr {
+	return func(m optionalAttr) {
+		m["pseudo_random"] = value
+	}
+}
+
+// FractionalAvgPoolOverlapping sets the optional overlapping attribute to value.
+//
+// value: When set to True, it means when pooling, the values at the boundary
+// of adjacent pooling cells are used by both cells. For example:
+//
+// `index  0  1  2  3  4`
+//
+// `value  20 5  16 3  7`
+//
+// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
+// The result would be [41/3, 26/3] for fractional avg pooling.
+// If not specified, defaults to false
+func FractionalAvgPoolOverlapping(value bool) FractionalAvgPoolAttr {
+	return func(m optionalAttr) {
+		m["overlapping"] = value
+	}
+}
+
+// FractionalAvgPoolDeterministic sets the optional deterministic attribute to value.
+//
+// value: When set to True, a fixed pooling region will be used when
+// iterating over a FractionalAvgPool node in the computation graph. Mainly used
+// in unit test to make FractionalAvgPool deterministic.
+// If not specified, defaults to false
+func FractionalAvgPoolDeterministic(value bool) FractionalAvgPoolAttr {
+	return func(m optionalAttr) {
+		m["deterministic"] = value
+	}
+}
+
+// FractionalAvgPoolSeed sets the optional seed attribute to value.
+//
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func FractionalAvgPoolSeed(value int64) FractionalAvgPoolAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// FractionalAvgPoolSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func FractionalAvgPoolSeed2(value int64) FractionalAvgPoolAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Performs fractional average pooling on the input.
+//
+// Fractional average pooling is similar to Fractional max pooling in the pooling
+// region generation step. The only difference is that after pooling regions are
+// generated, a mean operation is performed instead of a max operation in each
+// pooling region.
+//
+// Arguments:
+//	value: 4-D with shape `[batch, height, width, channels]`.
+//	pooling_ratio: Pooling ratio for each dimension of `value`, currently only
+// supports row and col dimension and should be >= 1.0. For example, a valid
+// pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
+// must be 1.0 because we don't allow pooling on batch and channels
+// dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
+// respectively.
+//
+// Returns:
+//	output: output tensor after fractional avg pooling.
+//	row_pooling_sequence: row pooling sequence, needed to calculate gradient.
+//	col_pooling_sequence: column pooling sequence, needed to calculate gradient.
+func FractionalAvgPool(scope *Scope, value tf.Output, pooling_ratio []float32, optional ...FractionalAvgPoolAttr) (output tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"pooling_ratio": pooling_ratio}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "FractionalAvgPool",
+		Input: []tf.Input{
+			value,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// StatefulUniformFullIntAttr is an optional argument to StatefulUniformFullInt.
+type StatefulUniformFullIntAttr func(optionalAttr)
+
+// StatefulUniformFullIntDtype sets the optional dtype attribute to value.
+//
+// value: The type of the output.
+// If not specified, defaults to DT_UINT64
+func StatefulUniformFullIntDtype(value tf.DataType) StatefulUniformFullIntAttr {
+	return func(m optionalAttr) {
+		m["dtype"] = value
+	}
+}
+
+// Outputs random integers from a uniform distribution.
+//
+// The generated values are uniform integers covering the whole range of `dtype`.
+//
+// Arguments:
+//	resource: The handle of the resource variable that stores the state of the RNG.
+//	algorithm: The RNG algorithm.
+//	shape: The shape of the output tensor.
+//
+// Returns Random values with specified shape.
+func StatefulUniformFullInt(scope *Scope, resource tf.Output, algorithm tf.Output, shape tf.Output, optional ...StatefulUniformFullIntAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "StatefulUniformFullInt",
+		Input: []tf.Input{
+			resource, algorithm, shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingStochasticGradientDescentParametersAttr is an optional argument to LoadTPUEmbeddingStochasticGradientDescentParameters.
+type LoadTPUEmbeddingStochasticGradientDescentParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingStochasticGradientDescentParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingStochasticGradientDescentParametersTableId(value int64) LoadTPUEmbeddingStochasticGradientDescentParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingStochasticGradientDescentParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingStochasticGradientDescentParametersTableName(value string) LoadTPUEmbeddingStochasticGradientDescentParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingStochasticGradientDescentParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingStochasticGradientDescentParametersConfig(value string) LoadTPUEmbeddingStochasticGradientDescentParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load SGD embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the stochastic gradient descent optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingStochasticGradientDescentParameters(scope *Scope, parameters tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingStochasticGradientDescentParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingStochasticGradientDescentParameters",
+		Input: []tf.Input{
+			parameters,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum.
+type ResourceApplyMomentumAttr func(optionalAttr)
+
+// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value.
+//
+// value: If `True`, the tensor passed to compute grad will be
+// var - lr * momentum * accum, so in the end, the var you get is actually
+// var - lr * momentum * accum.
+// If not specified, defaults to false
+func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_nesterov"] = value
+	}
+}
+
+// Update '*var' according to the momentum scheme. Set use_nesterov = True if you
+//
+// want to use Nesterov momentum.
+//
+// accum = accum * momentum + grad
+// var -= lr * accum
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	grad: The gradient.
+//	momentum: Momentum. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyMomentum",
+		Input: []tf.Input{
+			var_, accum, lr, grad, momentum,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Strip leading and trailing whitespaces from the Tensor.
+//
+// Arguments:
+//	input: A string `Tensor` of any shape.
+//
+// Returns A string `Tensor` of the same shape as the input.
+func StringStrip(scope *Scope, input tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "MatrixDeterminant",
+		Type: "StringStrip",
 		Input: []tf.Input{
 			input,
 		},
@@ -38844,46 +43400,164 @@ func MatrixDeterminant(scope *Scope, input tf.Output) (output tf.Output) {
 	return op.Output(0)
 }
 
-// Computes the sign and the log of the absolute value of the determinant of
-//
-// one or more square matrices.
-//
-// The input is a tensor of shape `[N, M, M]` whose inner-most 2 dimensions
-// form square matrices. The outputs are two tensors containing the signs and
-// absolute values of the log determinants for all N input submatrices
-// `[..., :, :]` such that the determinant = sign*exp(log_abs_determinant).
-// The log_abs_determinant is computed as det(P)*sum(log(diag(LU))) where LU
-// is the LU decomposition of the input and P is the corresponding
-// permutation matrix.
+// Returns the number of work units this Reader has finished processing.
 //
 // Arguments:
-//	input: Shape is `[N, M, M]`.
-//
-// Returns The signs of the log determinants of the inputs. Shape is `[N]`.The logs of the absolute values of the determinants
-// of the N input matrices.  Shape is `[N]`.
-func LogMatrixDeterminant(scope *Scope, input tf.Output) (sign tf.Output, log_abs_determinant tf.Output) {
+//	reader_handle: Handle to a Reader.
+func ReaderNumWorkUnitsCompletedV2(scope *Scope, reader_handle tf.Output) (units_completed tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "LogMatrixDeterminant",
+		Type: "ReaderNumWorkUnitsCompletedV2",
 		Input: []tf.Input{
-			input,
+			reader_handle,
 		},
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
+	return op.Output(0)
 }
 
-// Computes reciprocal of square root of x element-wise.
+// FractionalMaxPoolAttr is an optional argument to FractionalMaxPool.
+type FractionalMaxPoolAttr func(optionalAttr)
+
+// FractionalMaxPoolPseudoRandom sets the optional pseudo_random attribute to value.
 //
-// I.e., \\(y = 1 / \sqrt{x}\\).
-func Rsqrt(scope *Scope, x tf.Output) (y tf.Output) {
+// value: When set to True, generates the pooling sequence in a
+// pseudorandom fashion, otherwise, in a random fashion. Check paper [Benjamin
+// Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071) for
+// difference between pseudorandom and random.
+// If not specified, defaults to false
+func FractionalMaxPoolPseudoRandom(value bool) FractionalMaxPoolAttr {
+	return func(m optionalAttr) {
+		m["pseudo_random"] = value
+	}
+}
+
+// FractionalMaxPoolOverlapping sets the optional overlapping attribute to value.
+//
+// value: When set to True, it means when pooling, the values at the boundary
+// of adjacent pooling cells are used by both cells. For example:
+//
+// `index  0  1  2  3  4`
+//
+// `value  20 5  16 3  7`
+//
+// If the pooling sequence is [0, 2, 4], then 16, at index 2 will be used twice.
+// The result would be [20, 16] for fractional max pooling.
+// If not specified, defaults to false
+func FractionalMaxPoolOverlapping(value bool) FractionalMaxPoolAttr {
+	return func(m optionalAttr) {
+		m["overlapping"] = value
+	}
+}
+
+// FractionalMaxPoolDeterministic sets the optional deterministic attribute to value.
+//
+// value: When set to True, a fixed pooling region will be used when
+// iterating over a FractionalMaxPool node in the computation graph. Mainly used
+// in unit test to make FractionalMaxPool deterministic.
+// If not specified, defaults to false
+func FractionalMaxPoolDeterministic(value bool) FractionalMaxPoolAttr {
+	return func(m optionalAttr) {
+		m["deterministic"] = value
+	}
+}
+
+// FractionalMaxPoolSeed sets the optional seed attribute to value.
+//
+// value: If either seed or seed2 are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func FractionalMaxPoolSeed(value int64) FractionalMaxPoolAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// FractionalMaxPoolSeed2 sets the optional seed2 attribute to value.
+//
+// value: An second seed to avoid seed collision.
+// If not specified, defaults to 0
+func FractionalMaxPoolSeed2(value int64) FractionalMaxPoolAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Performs fractional max pooling on the input.
+//
+// Fractional max pooling is slightly different than regular max pooling.  In
+// regular max pooling, you downsize an input set by taking the maximum value of
+// smaller N x N subsections of the set (often 2x2), and try to reduce the set by
+// a factor of N, where N is an integer.  Fractional max pooling, as you might
+// expect from the word "fractional", means that the overall reduction ratio N
+// does not have to be an integer.
+//
+// The sizes of the pooling regions are generated randomly but are fairly uniform.
+// For example, let's look at the height dimension, and the constraints on the
+// list of rows that will be pool boundaries.
+//
+// First we define the following:
+//
+// 1.  input_row_length : the number of rows from the input set
+// 2.  output_row_length : which will be smaller than the input
+// 3.  alpha = input_row_length / output_row_length : our reduction ratio
+// 4.  K = floor(alpha)
+// 5.  row_pooling_sequence : this is the result list of pool boundary rows
+//
+// Then, row_pooling_sequence should satisfy:
+//
+// 1.  a[0] = 0 : the first value of the sequence is 0
+// 2.  a[end] = input_row_length : the last value of the sequence is the size
+// 3.  K <= (a[i+1] - a[i]) <= K+1 : all intervals are K or K+1 size
+// 4.  length(row_pooling_sequence) = output_row_length+1
+//
+// For more details on fractional max pooling, see this paper:
+// [Benjamin Graham, Fractional Max-Pooling](http://arxiv.org/abs/1412.6071)
+//
+// Arguments:
+//	value: 4-D with shape `[batch, height, width, channels]`.
+//	pooling_ratio: Pooling ratio for each dimension of `value`, currently only
+// supports row and col dimension and should be >= 1.0. For example, a valid
+// pooling ratio looks like [1.0, 1.44, 1.73, 1.0]. The first and last elements
+// must be 1.0 because we don't allow pooling on batch and channels
+// dimensions. 1.44 and 1.73 are pooling ratio on height and width dimensions
+// respectively.
+//
+// Returns:
+//	output: output tensor after fractional max pooling.
+//	row_pooling_sequence: row pooling sequence, needed to calculate gradient.
+//	col_pooling_sequence: column pooling sequence, needed to calculate gradient.
+func FractionalMaxPool(scope *Scope, value tf.Output, pooling_ratio []float32, optional ...FractionalMaxPoolAttr) (output tf.Output, row_pooling_sequence tf.Output, col_pooling_sequence tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"pooling_ratio": pooling_ratio}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "FractionalMaxPool",
+		Input: []tf.Input{
+			value,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Computes the reciprocal of x element-wise.
+//
+// I.e., \\(y = 1 / x\\).
+func Reciprocal(scope *Scope, x tf.Output) (y tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Rsqrt",
+		Type: "Reciprocal",
 		Input: []tf.Input{
 			x,
 		},
@@ -38892,38 +43566,3613 @@ func Rsqrt(scope *Scope, x tf.Output) (y tf.Output) {
 	return op.Output(0)
 }
 
-// Draw bounding boxes on a batch of images.
+// LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingAdagradParametersGradAccumDebug.
+type LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr func(optionalAttr)
+
+// LoadTPUEmbeddingAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
 //
-// Outputs a copy of `images` but draws on top of the pixels zero or more bounding
-// boxes specified by the locations in `boxes`. The coordinates of the each
-// bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The
-// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
-// height of the underlying image.
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingAdagradParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingAdagradParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingAdagradParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingAdagradParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load Adagrad embedding parameters with debug support.
 //
-// For example, if an image is 100 x 200 pixels (height x width) and the bounding
-// box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
-// the bounding box will be `(40, 10)` to `(100, 50)` (in (x,y) coordinates).
-//
-// Parts of the bounding box may fall outside the image.
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
 //
 // Arguments:
-//	images: 4-D with shape `[batch, height, width, depth]`. A batch of images.
-//	boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding
-// boxes.
-//	colors: 2-D. A list of RGBA colors to cycle through for the boxes.
+//	parameters: Value of parameters used in the Adagrad optimization algorithm.
+//	accumulators: Value of accumulators used in the Adagrad optimization algorithm.
+//	gradient_accumulators: Value of gradient_accumulators used in the Adagrad optimization algorithm.
 //
-// Returns 4-D with the same shape as `images`. The batch of input images with
-// bounding boxes drawn on the images.
-func DrawBoundingBoxesV2(scope *Scope, images tf.Output, boxes tf.Output, colors tf.Output) (output tf.Output) {
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingAdagradParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingAdagradParametersGradAccumDebug",
+		Input: []tf.Input{
+			parameters, accumulators, gradient_accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// FakeQuantWithMinMaxVarsAttr is an optional argument to FakeQuantWithMinMaxVars.
+type FakeQuantWithMinMaxVarsAttr func(optionalAttr)
+
+// FakeQuantWithMinMaxVarsNumBits sets the optional num_bits attribute to value.
+// If not specified, defaults to 8
+func FakeQuantWithMinMaxVarsNumBits(value int64) FakeQuantWithMinMaxVarsAttr {
+	return func(m optionalAttr) {
+		m["num_bits"] = value
+	}
+}
+
+// FakeQuantWithMinMaxVarsNarrowRange sets the optional narrow_range attribute to value.
+// If not specified, defaults to false
+func FakeQuantWithMinMaxVarsNarrowRange(value bool) FakeQuantWithMinMaxVarsAttr {
+	return func(m optionalAttr) {
+		m["narrow_range"] = value
+	}
+}
+
+// Fake-quantize the 'inputs' tensor of type float via global float scalars `min`
+//
+// and `max` to 'outputs' tensor of same shape as `inputs`.
+//
+// `[min; max]` define the clamping range for the `inputs` data.
+// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
+// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
+// then de-quantized and output as floats in `[min; max]` interval.
+// `num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive.
+//
+// Before quantization, `min` and `max` values are adjusted with the following
+// logic.
+// It is suggested to have `min <= 0 <= max`. If `0` is not in the range of values,
+// the behavior can be unexpected:
+// If `0 < min < max`: `min_adj = 0` and `max_adj = max - min`.
+// If `min < max < 0`: `min_adj = min - max` and `max_adj = 0`.
+// If `min <= 0 <= max`: `scale = (max - min) / (2^num_bits - 1) `,
+// `min_adj = scale * round(min / scale)` and `max_adj = max + min_adj - min`.
+//
+// This operation has a gradient and thus allows for training `min` and `max`
+// values.
+func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsAttr) (outputs tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "FakeQuantWithMinMaxVars",
+		Input: []tf.Input{
+			inputs, min, max,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Enqueue multiple Tensor values on the computation outfeed.
+//
+// Arguments:
+//	inputs: A list of tensors that will be inserted into the outfeed queue as an
+// XLA tuple.
+//
+// Returns the created operation.
+func OutfeedEnqueueTuple(scope *Scope, inputs []tf.Output) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "DrawBoundingBoxesV2",
+		Type: "OutfeedEnqueueTuple",
 		Input: []tf.Input{
-			images, boxes, colors,
+			tf.OutputList(inputs),
 		},
 	}
+	return scope.AddOperation(opspec)
+}
+
+// LoadTPUEmbeddingADAMParametersAttr is an optional argument to LoadTPUEmbeddingADAMParameters.
+type LoadTPUEmbeddingADAMParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingADAMParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingADAMParametersTableId(value int64) LoadTPUEmbeddingADAMParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingADAMParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingADAMParametersTableName(value string) LoadTPUEmbeddingADAMParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingADAMParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingADAMParametersConfig(value string) LoadTPUEmbeddingADAMParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load ADAM embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the ADAM optimization algorithm.
+//	momenta: Value of momenta used in the ADAM optimization algorithm.
+//	velocities: Value of velocities used in the ADAM optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingADAMParameters(scope *Scope, parameters tf.Output, momenta tf.Output, velocities tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingADAMParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingADAMParameters",
+		Input: []tf.Input{
+			parameters, momenta, velocities,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// MapClearAttr is an optional argument to MapClear.
+type MapClearAttr func(optionalAttr)
+
+// MapClearCapacity sets the optional capacity attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapClearCapacity(value int64) MapClearAttr {
+	return func(m optionalAttr) {
+		m["capacity"] = value
+	}
+}
+
+// MapClearMemoryLimit sets the optional memory_limit attribute to value.
+// If not specified, defaults to 0
+//
+// REQUIRES: value >= 0
+func MapClearMemoryLimit(value int64) MapClearAttr {
+	return func(m optionalAttr) {
+		m["memory_limit"] = value
+	}
+}
+
+// MapClearContainer sets the optional container attribute to value.
+// If not specified, defaults to ""
+func MapClearContainer(value string) MapClearAttr {
+	return func(m optionalAttr) {
+		m["container"] = value
+	}
+}
+
+// MapClearSharedName sets the optional shared_name attribute to value.
+// If not specified, defaults to ""
+func MapClearSharedName(value string) MapClearAttr {
+	return func(m optionalAttr) {
+		m["shared_name"] = value
+	}
+}
+
+// Op removes all elements in the underlying container.
+//
+// Returns the created operation.
+func MapClear(scope *Scope, dtypes []tf.DataType, optional ...MapClearAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MapClear",
+
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Deserialize `SparseTensor` objects.
+//
+// The input `serialized_sparse` must have the shape `[?, ?, ..., ?, 3]` where
+// the last dimension stores serialized `SparseTensor` objects and the other N
+// dimensions (N >= 0) correspond to a batch. The ranks of the original
+// `SparseTensor` objects must all match. When the final `SparseTensor` is
+// created, its rank is the rank of the incoming `SparseTensor` objects plus N;
+// the sparse tensors have been concatenated along new dimensions, one for each
+// batch.
+//
+// The output `SparseTensor` object's shape values for the original dimensions
+// are the max across the input `SparseTensor` objects' shape values for the
+// corresponding dimensions. The new dimensions match the size of the batch.
+//
+// The input `SparseTensor` objects' indices are assumed ordered in
+// standard lexicographic order.  If this is not the case, after this
+// step run `SparseReorder` to restore index ordering.
+//
+// For example, if the serialized input is a `[2 x 3]` matrix representing two
+// original `SparseTensor` objects:
+//
+//     index = [ 0]
+//             [10]
+//             [20]
+//     values = [1, 2, 3]
+//     shape = [50]
+//
+// and
+//
+//     index = [ 2]
+//             [10]
+//     values = [4, 5]
+//     shape = [30]
+//
+// then the final deserialized `SparseTensor` will be:
+//
+//     index = [0  0]
+//             [0 10]
+//             [0 20]
+//             [1  2]
+//             [1 10]
+//     values = [1, 2, 3, 4, 5]
+//     shape = [2 50]
+//
+// Arguments:
+//	serialized_sparse: The serialized `SparseTensor` objects. The last dimension
+// must have 3 columns.
+//	dtype: The `dtype` of the serialized `SparseTensor` objects.
+func DeserializeSparse(scope *Scope, serialized_sparse tf.Output, dtype tf.DataType) (sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	opspec := tf.OpSpec{
+		Type: "DeserializeSparse",
+		Input: []tf.Input{
+			serialized_sparse,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Decode web-safe base64-encoded strings.
+//
+// Input may or may not have padding at the end. See EncodeBase64 for padding.
+// Web-safe means that input must use - and _ instead of + and /.
+//
+// Arguments:
+//	input: Base64 strings to decode.
+//
+// Returns Decoded strings.
+func DecodeBase64(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "DecodeBase64",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingAdagradParametersAttr is an optional argument to LoadTPUEmbeddingAdagradParameters.
+type LoadTPUEmbeddingAdagradParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingAdagradParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingAdagradParametersTableId(value int64) LoadTPUEmbeddingAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingAdagradParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingAdagradParametersTableName(value string) LoadTPUEmbeddingAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingAdagradParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingAdagradParametersConfig(value string) LoadTPUEmbeddingAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load Adagrad embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the Adagrad optimization algorithm.
+//	accumulators: Value of accumulators used in the Adagrad optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingAdagradParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdagradParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingAdagradParameters",
+		Input: []tf.Input{
+			parameters, accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes the mean along segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+// for an explanation of segments.
+//
+// Computes a tensor such that
+// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is
+// over `j` such that `segment_ids[j] == i` and `N` is the total number of
+// values summed.
+//
+// If the mean is empty for a given segment ID `i`, `output[i] = 0`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMean.png" alt>
+// </div>
+//
+// For example:
+//
+// ```
+// c = tf.constant([[1.0,2,3,4], [4, 3, 2, 1], [5,6,7,8]])
+// tf.segment_mean(c, tf.constant([0, 0, 1]))
+// # ==> [[2.5, 2.5, 2.5, 2.5],
+// #      [5, 6, 7, 8]]
+// ```
+//
+//
+// Arguments:
+//
+//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
+// first dimension.  Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SegmentMean",
+		Input: []tf.Input{
+			data, segment_ids,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceSparseApplyKerasMomentumAttr is an optional argument to ResourceSparseApplyKerasMomentum.
+type ResourceSparseApplyKerasMomentumAttr func(optionalAttr)
+
+// ResourceSparseApplyKerasMomentumUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceSparseApplyKerasMomentumUseLocking(value bool) ResourceSparseApplyKerasMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// ResourceSparseApplyKerasMomentumUseNesterov sets the optional use_nesterov attribute to value.
+//
+// value: If `True`, the tensor passed to compute grad will be
+// var + momentum * accum, so in the end, the var you get is actually
+// var + momentum * accum.
+// If not specified, defaults to false
+func ResourceSparseApplyKerasMomentumUseNesterov(value bool) ResourceSparseApplyKerasMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_nesterov"] = value
+	}
+}
+
+// Update relevant entries in '*var' and '*accum' according to the momentum scheme.
+//
+// Set use_nesterov = True if you want to use Nesterov momentum.
+//
+// That is for rows we have grad for, we update var and accum as follows:
+//
+// accum = accum * momentum - lr * grad
+// var += accum
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Learning rate. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//	momentum: Momentum. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceSparseApplyKerasMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyKerasMomentumAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceSparseApplyKerasMomentum",
+		Input: []tf.Input{
+			var_, accum, lr, grad, indices, momentum,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// LuAttr is an optional argument to Lu.
+type LuAttr func(optionalAttr)
+
+// LuOutputIdxType sets the optional output_idx_type attribute to value.
+// If not specified, defaults to DT_INT32
+func LuOutputIdxType(value tf.DataType) LuAttr {
+	return func(m optionalAttr) {
+		m["output_idx_type"] = value
+	}
+}
+
+// Computes the LU decomposition of one or more square matrices.
+//
+// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+// form square matrices.
+//
+// The input has to be invertible.
+//
+// The output consists of two tensors LU and P containing the LU decomposition
+// of all input submatrices `[..., :, :]`. LU encodes the lower triangular and
+// upper triangular factors.
+//
+// For each input submatrix of shape `[M, M]`, L is a lower triangular matrix of
+// shape `[M, M]` with unit diagonal whose entries correspond to the strictly lower
+// triangular part of LU. U is a upper triangular matrix of shape `[M, M]` whose
+// entries correspond to the upper triangular part, including the diagonal, of LU.
+//
+// P represents a permutation matrix encoded as a list of indices each between `0`
+// and `M-1`, inclusive. If P_mat denotes the permutation matrix corresponding to
+// P, then the L, U and P satisfies P_mat * input = L * U.
+//
+// Arguments:
+//	input: A tensor of shape `[..., M, M]` whose inner-most 2 dimensions form matrices of
+// size `[M, M]`.
+//
+// Returns:
+//	lu: A tensor of shape `[..., M, M]` whose strictly lower triangular part denotes the
+// lower triangular factor `L` with unit diagonal, and whose upper triangular part
+// denotes the upper triangular factor `U`.
+//	p: Permutation of the rows encoded as a list of indices in `0..M-1`. Shape is
+// `[..., M]`.
+// @compatibility(scipy)
+// Similar to `scipy.linalg.lu`, except the triangular factors `L` and `U` are
+// packed into a single tensor, the permutation is applied to `input` instead of
+// the right hand side and the permutation `P` is returned as a list of indices
+// instead of a permutation matrix.
+// @end_compatibility
+func Lu(scope *Scope, input tf.Output, optional ...LuAttr) (lu tf.Output, p tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Lu",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// Returns the truth value of `NOT x` element-wise.
+//
+// Arguments:
+//	x: A `Tensor` of type `bool`.
+//
+// Returns A `Tensor` of type `bool` with the same shape as `x`. The logical negation of `x`.
+func LogicalNot(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "LogicalNot",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Computes rectified linear gradients for a Relu operation.
+//
+// Arguments:
+//	gradients: The backpropagated gradients to the corresponding Relu operation.
+//	features: The features passed as input to the corresponding Relu operation, OR
+// the outputs of that operation (both work equivalently).
+//
+// Returns `gradients * (features > 0)`.
+func ReluGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ReluGrad",
+		Input: []tf.Input{
+			gradients, features,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingRMSPropParametersAttr is an optional argument to LoadTPUEmbeddingRMSPropParameters.
+type LoadTPUEmbeddingRMSPropParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingRMSPropParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingRMSPropParametersTableId(value int64) LoadTPUEmbeddingRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingRMSPropParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingRMSPropParametersTableName(value string) LoadTPUEmbeddingRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingRMSPropParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingRMSPropParametersConfig(value string) LoadTPUEmbeddingRMSPropParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load RMSProp embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the RMSProp optimization algorithm.
+//	ms: Value of ms used in the RMSProp optimization algorithm.
+//	mom: Value of mom used in the RMSProp optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingRMSPropParameters(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingRMSPropParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingRMSPropParameters",
+		Input: []tf.Input{
+			parameters, ms, mom,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// InfeedEnqueueTupleAttr is an optional argument to InfeedEnqueueTuple.
+type InfeedEnqueueTupleAttr func(optionalAttr)
+
+// InfeedEnqueueTupleLayouts sets the optional layouts attribute to value.
+//
+// value: A vector holding the requested layout in minor-to-major sequence for
+// all the tuple shapes, in the order the shapes appear in the "shapes" input.
+// The layout elements for a sub-shape can be set to -1, in which case the
+// corresponding layout will be computed by the infeed operation.
+// If not specified, defaults to {}
+func InfeedEnqueueTupleLayouts(value []int64) InfeedEnqueueTupleAttr {
+	return func(m optionalAttr) {
+		m["layouts"] = value
+	}
+}
+
+// InfeedEnqueueTupleDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. This should be -1 when the Op
+// is running on a TPU device, and >= 0 when the Op is running on the CPU
+// device.
+// If not specified, defaults to -1
+func InfeedEnqueueTupleDeviceOrdinal(value int64) InfeedEnqueueTupleAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// Feeds multiple Tensor values into the computation as an XLA tuple.
+//
+// Arguments:
+//	inputs: A list of tensors that will be provided using the infeed mechanism.
+//	shapes: The shapes of each tensor in `inputs`.
+//
+// Returns the created operation.
+func InfeedEnqueueTuple(scope *Scope, inputs []tf.Output, shapes []tf.Shape, optional ...InfeedEnqueueTupleAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"shapes": shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "InfeedEnqueueTuple",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Extracts the average gradient in the given ConditionalAccumulator.
+//
+// The op blocks until sufficient (i.e., more than num_required)
+// gradients have been accumulated.  If the accumulator has already
+// aggregated more than num_required gradients, it returns the average of
+// the accumulated gradients.  Also automatically increments the recorded
+// global_step in the accumulator by 1, and resets the aggregate to 0.
+//
+// Arguments:
+//	handle: The handle to an accumulator.
+//	num_required: Number of gradients required before we return an aggregate.
+//	dtype: The data type of accumulated gradients. Needs to correspond to the type
+// of the accumulator.
+//
+// Returns The average of the accumulated gradients.
+func ResourceAccumulatorTakeGradient(scope *Scope, handle tf.Output, num_required tf.Output, dtype tf.DataType) (average tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	opspec := tf.OpSpec{
+		Type: "ResourceAccumulatorTakeGradient",
+		Input: []tf.Input{
+			handle, num_required,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// InfeedEnqueueAttr is an optional argument to InfeedEnqueue.
+type InfeedEnqueueAttr func(optionalAttr)
+
+// InfeedEnqueueShape sets the optional shape attribute to value.
+//
+// value: The shape of the tensor.
+// If not specified, defaults to {}
+func InfeedEnqueueShape(value tf.Shape) InfeedEnqueueAttr {
+	return func(m optionalAttr) {
+		m["shape"] = value
+	}
+}
+
+// InfeedEnqueueLayout sets the optional layout attribute to value.
+//
+// value: A vector holding the requested layout in minor-to-major sequence.
+// If a layout attribute is passed, but its values are all -1, the layout will
+// be computed by the infeed operation.
+// If not specified, defaults to {}
+func InfeedEnqueueLayout(value []int64) InfeedEnqueueAttr {
+	return func(m optionalAttr) {
+		m["layout"] = value
+	}
+}
+
+// InfeedEnqueueDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. This should be -1 when the Op
+// is running on a TPU device, and >= 0 when the Op is running on the CPU
+// device.
+// If not specified, defaults to -1
+func InfeedEnqueueDeviceOrdinal(value int64) InfeedEnqueueAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// An op which feeds a single Tensor value into the computation.
+//
+// Arguments:
+//	input: A tensor that will be provided using the infeed mechanism.
+//
+// Returns the created operation.
+func InfeedEnqueue(scope *Scope, input tf.Output, optional ...InfeedEnqueueAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "InfeedEnqueue",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// A dataset that creates window datasets from the input dataset.
+//
+// Arguments:
+//
+//	size: A scalar representing the number of elements to accumulate in a window.
+//	shift: A scalar representing the steps moving the sliding window forward in one
+// iteration. It must be positive.
+//	stride: A scalar representing the stride of the input elements of the sliding window.
+// It must be positive.
+//	drop_remainder: A scalar representing whether a window should be dropped in case its size is
+// smaller than desired.
+//
+//
+func WindowDataset(scope *Scope, input_dataset tf.Output, size tf.Output, shift tf.Output, stride tf.Output, drop_remainder tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "WindowDataset",
+		Input: []tf.Input{
+			input_dataset, size, shift, stride, drop_remainder,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SetSizeAttr is an optional argument to SetSize.
+type SetSizeAttr func(optionalAttr)
+
+// SetSizeValidateIndices sets the optional validate_indices attribute to value.
+// If not specified, defaults to true
+func SetSizeValidateIndices(value bool) SetSizeAttr {
+	return func(m optionalAttr) {
+		m["validate_indices"] = value
+	}
+}
+
+// Number of unique elements along last dimension of input `set`.
+//
+// Input `set` is a `SparseTensor` represented by `set_indices`, `set_values`,
+// and `set_shape`. The last dimension contains values in a set, duplicates are
+// allowed but ignored.
+//
+// If `validate_indices` is `True`, this op validates the order and range of `set`
+// indices.
+//
+// Arguments:
+//	set_indices: 2D `Tensor`, indices of a `SparseTensor`.
+//	set_values: 1D `Tensor`, values of a `SparseTensor`.
+//	set_shape: 1D `Tensor`, shape of a `SparseTensor`.
+//
+// Returns For `set` ranked `n`, this is a `Tensor` with rank `n-1`, and the same 1st
+// `n-1` dimensions as `set`. Each value is the number of unique elements in
+// the corresponding `[0...n-1]` dimension of `set`.
+func SetSize(scope *Scope, set_indices tf.Output, set_values tf.Output, set_shape tf.Output, optional ...SetSizeAttr) (size tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SetSize",
+		Input: []tf.Input{
+			set_indices, set_values, set_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// AutoShardDatasetAttr is an optional argument to AutoShardDataset.
+type AutoShardDatasetAttr func(optionalAttr)
+
+// AutoShardDatasetAutoShardPolicy sets the optional auto_shard_policy attribute to value.
+// If not specified, defaults to 0
+func AutoShardDatasetAutoShardPolicy(value int64) AutoShardDatasetAttr {
+	return func(m optionalAttr) {
+		m["auto_shard_policy"] = value
+	}
+}
+
+// Creates a dataset that shards the input dataset.
+//
+// Creates a dataset that shards the input dataset by num_workers, returning a
+// sharded dataset for the index-th worker. This attempts to automatically shard
+// a dataset by examining the Dataset graph and inserting a shard op before the
+// inputs to a reader Dataset (e.g. CSVDataset, TFRecordDataset).
+//
+// This dataset will throw a NotFound error if we cannot shard the dataset
+// automatically.
+//
+// Arguments:
+//	input_dataset: A variant tensor representing the input dataset.
+//	num_workers: A scalar representing the number of workers to distribute this dataset across.
+//	index: A scalar representing the index of the current worker out of num_workers.
+//
+//
+func AutoShardDataset(scope *Scope, input_dataset tf.Output, num_workers tf.Output, index tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...AutoShardDatasetAttr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "AutoShardDataset",
+		Input: []tf.Input{
+			input_dataset, num_workers, index,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// InfeedEnqueuePrelinearizedBufferAttr is an optional argument to InfeedEnqueuePrelinearizedBuffer.
+type InfeedEnqueuePrelinearizedBufferAttr func(optionalAttr)
+
+// InfeedEnqueuePrelinearizedBufferDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. This should be -1 when the Op is running on a TPU device
+// and = 0 when the Op is running on the CPU device.
+// If not specified, defaults to -1
+func InfeedEnqueuePrelinearizedBufferDeviceOrdinal(value int64) InfeedEnqueuePrelinearizedBufferAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// An op which enqueues prelinearized buffer into TPU infeed.
+//
+// Arguments:
+//	input: A variant tensor representing linearized output.
+//
+// Returns the created operation.
+func InfeedEnqueuePrelinearizedBuffer(scope *Scope, input tf.Output, optional ...InfeedEnqueuePrelinearizedBufferAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "InfeedEnqueuePrelinearizedBuffer",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// An Op to sum inputs across replicated TPU instances.
+//
+// Each instance supplies its own input.
+//
+// For example, suppose there are 8 TPU instances: `[A, B, C, D, E, F, G, H]`.
+// Passing group_assignment=`[[0,2,4,6],[1,3,5,7]]` sets `A, C, E, G` as group 0,
+// and `B, D, F, H` as group 1. Thus we get the outputs:
+// `[A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H]`.
+//
+// Arguments:
+//	input: The local input to the sum.
+//	group_assignment: An int32 tensor with shape
+// [num_groups, num_replicas_per_group]. `group_assignment[i]` represents the
+// replica ids in the ith subgroup.
+//
+// Returns The sum of all the distributed inputs.
+func CrossReplicaSum(scope *Scope, input tf.Output, group_assignment tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "CrossReplicaSum",
+		Input: []tf.Input{
+			input, group_assignment,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that executes a SQL query and emits rows of the result set.
+//
+// Arguments:
+//	driver_name: The database type. Currently, the only supported type is 'sqlite'.
+//	data_source_name: A connection string to connect to the database.
+//	query: A SQL query to execute.
+//
+//
+func SqlDataset(scope *Scope, driver_name tf.Output, data_source_name tf.Output, query tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "SqlDataset",
+		Input: []tf.Input{
+			driver_name, data_source_name, query,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Outputs deterministic pseudorandom random integers from a uniform distribution.
+//
+// The generated values follow a uniform distribution in the range `[minval, maxval)`.
+//
+// The outputs are a deterministic function of `shape`, `seed`, `minval`, and `maxval`.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	seed: 2 seeds (shape [2]).
+//	minval: Minimum value (inclusive, scalar).
+//	maxval: Maximum value (exclusive, scalar).
+//
+// Returns Random values with specified shape.
+func StatelessRandomUniformInt(scope *Scope, shape tf.Output, seed tf.Output, minval tf.Output, maxval tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "StatelessRandomUniformInt",
+		Input: []tf.Input{
+			shape, seed, minval, maxval,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyRMSPropAttr is an optional argument to ResourceApplyRMSProp.
+type ResourceApplyRMSPropAttr func(optionalAttr)
+
+// ResourceApplyRMSPropUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var, ms, and mom tensors is protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyRMSPropUseLocking(value bool) ResourceApplyRMSPropAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the RMSProp algorithm.
+//
+// Note that in dense implementation of this algorithm, ms and mom will
+// update even if the grad is zero, but in this sparse implementation, ms
+// and mom will not update in iterations during which the grad is zero.
+//
+// mean_square = decay * mean_square + (1-decay) * gradient ** 2
+// Delta = learning_rate * gradient / sqrt(mean_square + epsilon)
+//
+// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)
+// var <- var - mom
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	ms: Should be from a Variable().
+//	mom: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	rho: Decay rate. Must be a scalar.
+//
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyRMSProp(scope *Scope, var_ tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyRMSPropAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyRMSProp",
+		Input: []tf.Input{
+			var_, ms, mom, lr, rho, momentum, epsilon, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// MaxPool3DGradAttr is an optional argument to MaxPool3DGrad.
+type MaxPool3DGradAttr func(optionalAttr)
+
+// MaxPool3DGradDataFormat sets the optional data_format attribute to value.
+//
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func MaxPool3DGradDataFormat(value string) MaxPool3DGradAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Computes gradients of max pooling function.
+//
+// Arguments:
+//	orig_input: The original input tensor.
+//	orig_output: The original output tensor.
+//	grad: Output backprop of shape `[batch, depth, rows, cols, channels]`.
+//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
+// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+func MaxPool3DGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DGradAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MaxPool3DGrad",
+		Input: []tf.Input{
+			orig_input, orig_output, grad,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SubstrAttr is an optional argument to Substr.
+type SubstrAttr func(optionalAttr)
+
+// SubstrUnit sets the optional unit attribute to value.
+//
+// value: The unit that is used to create the substring.  One of: `"BYTE"` (for
+// defining position and length by bytes) or `"UTF8_CHAR"` (for the UTF-8
+// encoded Unicode code points).  The default is `"BYTE"`. Results are undefined if
+// `unit=UTF8_CHAR` and the `input` strings do not contain structurally valid
+// UTF-8.
+// If not specified, defaults to "BYTE"
+func SubstrUnit(value string) SubstrAttr {
+	return func(m optionalAttr) {
+		m["unit"] = value
+	}
+}
+
+// Return substrings from `Tensor` of strings.
+//
+// For each string in the input `Tensor`, creates a substring starting at index
+// `pos` with a total length of `len`.
+//
+// If `len` defines a substring that would extend beyond the length of the input
+// string, or if `len` is negative, then as many characters as possible are used.
+//
+// A negative `pos` indicates distance within the string backwards from the end.
+//
+// If `pos` specifies an index which is out of range for any of the input strings,
+// then an `InvalidArgumentError` is thrown.
+//
+// `pos` and `len` must have the same shape, otherwise a `ValueError` is thrown on
+// Op creation.
+//
+// *NOTE*: `Substr` supports broadcasting up to two dimensions. More about
+// broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+//
+// ---
+//
+// Examples
+//
+// Using scalar `pos` and `len`:
+//
+// ```python
+// input = [b'Hello', b'World']
+// position = 1
+// length = 3
+//
+// output = [b'ell', b'orl']
+// ```
+//
+// Using `pos` and `len` with same shape as `input`:
+//
+// ```python
+// input = [[b'ten', b'eleven', b'twelve'],
+//          [b'thirteen', b'fourteen', b'fifteen'],
+//          [b'sixteen', b'seventeen', b'eighteen']]
+// position = [[1, 2, 3],
+//             [1, 2, 3],
+//             [1, 2, 3]]
+// length =   [[2, 3, 4],
+//             [4, 3, 2],
+//             [5, 5, 5]]
+//
+// output = [[b'en', b'eve', b'lve'],
+//           [b'hirt', b'urt', b'te'],
+//           [b'ixtee', b'vente', b'hteen']]
+// ```
+//
+// Broadcasting `pos` and `len` onto `input`:
+//
+// ```
+// input = [[b'ten', b'eleven', b'twelve'],
+//          [b'thirteen', b'fourteen', b'fifteen'],
+//          [b'sixteen', b'seventeen', b'eighteen'],
+//          [b'nineteen', b'twenty', b'twentyone']]
+// position = [1, 2, 3]
+// length =   [1, 2, 3]
+//
+// output = [[b'e', b'ev', b'lve'],
+//           [b'h', b'ur', b'tee'],
+//           [b'i', b've', b'hte'],
+//           [b'i', b'en', b'nty']]
+// ```
+//
+// Broadcasting `input` onto `pos` and `len`:
+//
+// ```
+// input = b'thirteen'
+// position = [1, 5, 7]
+// length =   [3, 2, 1]
+//
+// output = [b'hir', b'ee', b'n']
+// ```
+//
+// Arguments:
+//	input: Tensor of strings
+//	pos: Scalar defining the position of first character in each substring
+//	len: Scalar defining the number of characters to include in each substring
+//
+// Returns Tensor of substrings
+func Substr(scope *Scope, input tf.Output, pos tf.Output, len tf.Output, optional ...SubstrAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Substr",
+		Input: []tf.Input{
+			input, pos, len,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// A TPU core selector Op.
+//
+// This Op produces a set of TPU cores (for warm-up) or a single TPU core
+// (for regular inference) to execute the TPU program on. The output is
+// consumed by TPUPartitionedCall.
+//
+// Returns A vector 1 or more TPU cores.
+func TPUOrdinalSelector(scope *Scope) (device_ordinals tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TPUOrdinalSelector",
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Creates a dataset that passes a sliding window over `input_dataset`.
+//
+// Arguments:
+//
+//	window_size: A scalar representing the number of elements in the
+// sliding window.
+//	window_shift: A scalar representing the steps moving the sliding window
+// forward in one iteration. It must be positive.
+//	window_stride: A scalar representing the stride of the input elements of the sliding window.
+// It must be positive.
+//
+//
+func SlidingWindowDataset(scope *Scope, input_dataset tf.Output, window_size tf.Output, window_shift tf.Output, window_stride tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "SlidingWindowDataset",
+		Input: []tf.Input{
+			input_dataset, window_size, window_shift, window_stride,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Locks a mutex resource.  The output is the lock.  So long as the lock tensor
+//
+// is alive, any other request to use `MutexLock` with this mutex will wait.
+//
+// This is particularly useful for creating a critical section when used in
+// conjunction with `MutexLockIdentity`:
+//
+// ```python
+//
+// mutex = mutex_v2(
+//   shared_name=handle_name, container=container, name=name)
+//
+// def execute_in_critical_section(fn, *args, **kwargs):
+//   lock = gen_resource_variable_ops.mutex_lock(mutex)
+//
+//   with ops.control_dependencies([lock]):
+//     r = fn(*args, **kwargs)
+//
+//   with ops.control_dependencies(nest.flatten(r)):
+//     with ops.colocate_with(mutex):
+//       ensure_lock_exists = mutex_lock_identity(lock)
+//
+//     # Make sure that if any element of r is accessed, all of
+//     # them are executed together.
+//     r = nest.map_structure(tf.identity, r)
+//
+//   with ops.control_dependencies([ensure_lock_exists]):
+//     return nest.map_structure(tf.identity, r)
+// ```
+//
+// While `fn` is running in the critical section, no other functions which wish to
+// use this critical section may run.
+//
+// Often the use case is that two executions of the same graph, in parallel,
+// wish to run `fn`; and we wish to ensure that only one of them executes
+// at a time.  This is especially important if `fn` modifies one or more
+// variables at a time.
+//
+// It is also useful if two separate functions must share a resource, but we
+// wish to ensure the usage is exclusive.
+//
+// Arguments:
+//	mutex: The mutex resource to lock.
+//
+// Returns A tensor that keeps a shared pointer to a lock on the mutex;
+// when the Tensor is destroyed, the use count on the shared pointer is decreased
+// by 1.  When it reaches 0, the lock is released.
+func MutexLock(scope *Scope, mutex tf.Output) (mutex_lock tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "MutexLock",
+		Input: []tf.Input{
+			mutex,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MaxPoolGradWithArgmaxAttr is an optional argument to MaxPoolGradWithArgmax.
+type MaxPoolGradWithArgmaxAttr func(optionalAttr)
+
+// MaxPoolGradWithArgmaxIncludeBatchInIndex sets the optional include_batch_in_index attribute to value.
+//
+// value: Whether to include batch dimension in flattened index of `argmax`.
+// If not specified, defaults to false
+func MaxPoolGradWithArgmaxIncludeBatchInIndex(value bool) MaxPoolGradWithArgmaxAttr {
+	return func(m optionalAttr) {
+		m["include_batch_in_index"] = value
+	}
+}
+
+// Computes gradients of the maxpooling function.
+//
+// Arguments:
+//	input: The original input.
+//	grad: 4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t. the
+// output of `max_pool`.
+//	argmax: The indices of the maximum values chosen for each output of `max_pool`.
+//	ksize: The size of the window for each dimension of the input tensor.
+//	strides: The stride of the sliding window for each dimension of the
+// input tensor.
+//	padding: The type of padding algorithm to use.
+//
+// Returns Gradients w.r.t. the input of `max_pool`.
+func MaxPoolGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradWithArgmaxAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MaxPoolGradWithArgmax",
+		Input: []tf.Input{
+			input, grad, argmax,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// 2D fast Fourier transform.
+//
+// Computes the 2-dimensional discrete Fourier transform over the inner-most
+// 2 dimensions of `input`.
+//
+// Arguments:
+//	input: A complex tensor.
+//
+// Returns A complex tensor of the same shape as `input`. The inner-most 2
+//   dimensions of `input` are replaced with their 2D Fourier transform.
+//
+// @compatibility(numpy)
+// Equivalent to np.fft.fft2
+// @end_compatibility
+func FFT2D(scope *Scope, input tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "FFT2D",
+		Input: []tf.Input{
+			input,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Conv2DBackpropFilterAttr is an optional argument to Conv2DBackpropFilter.
+type Conv2DBackpropFilterAttr func(optionalAttr)
+
+// Conv2DBackpropFilterUseCudnnOnGpu sets the optional use_cudnn_on_gpu attribute to value.
+// If not specified, defaults to true
+func Conv2DBackpropFilterUseCudnnOnGpu(value bool) Conv2DBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["use_cudnn_on_gpu"] = value
+	}
+}
+
+// Conv2DBackpropFilterExplicitPaddings sets the optional explicit_paddings attribute to value.
+//
+// value: If `padding` is `"EXPLICIT"`, the list of explicit padding amounts. For the ith
+// dimension, the amount of padding inserted before and after the dimension is
+// `explicit_paddings[2 * i]` and `explicit_paddings[2 * i + 1]`, respectively. If
+// `padding` is not `"EXPLICIT"`, `explicit_paddings` must be empty.
+// If not specified, defaults to {}
+func Conv2DBackpropFilterExplicitPaddings(value []int64) Conv2DBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["explicit_paddings"] = value
+	}
+}
+
+// Conv2DBackpropFilterDataFormat sets the optional data_format attribute to value.
+//
+// value: Specify the data format of the input and output data. With the
+// default format "NHWC", the data is stored in the order of:
+//     [batch, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCHW", the data storage order of:
+//     [batch, in_channels, in_height, in_width].
+// If not specified, defaults to "NHWC"
+func Conv2DBackpropFilterDataFormat(value string) Conv2DBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Conv2DBackpropFilterDilations sets the optional dilations attribute to value.
+//
+// value: 1-D tensor of length 4.  The dilation factor for each dimension of
+// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
+// element on that dimension. The dimension order is determined by the value of
+// `data_format`, see above for details. Dilations in the batch and depth
+// dimensions must be 1.
+// If not specified, defaults to {i:1 i:1 i:1 i:1}
+func Conv2DBackpropFilterDilations(value []int64) Conv2DBackpropFilterAttr {
+	return func(m optionalAttr) {
+		m["dilations"] = value
+	}
+}
+
+// Computes the gradients of convolution with respect to the filter.
+//
+// Arguments:
+//	input: 4-D with shape `[batch, in_height, in_width, in_channels]`.
+//	filter_sizes: An integer vector representing the tensor shape of `filter`,
+// where `filter` is a 4-D
+// `[filter_height, filter_width, in_channels, out_channels]` tensor.
+//	out_backprop: 4-D with shape `[batch, out_height, out_width, out_channels]`.
+// Gradients w.r.t. the output of the convolution.
+//	strides: The stride of the sliding window for each dimension of the input
+// of the convolution. Must be in the same order as the dimension specified with
+// format.
+//	padding: The type of padding algorithm to use.
+//
+// Returns 4-D with shape
+// `[filter_height, filter_width, in_channels, out_channels]`.  Gradient w.r.t.
+// the `filter` input of the convolution.
+func Conv2DBackpropFilter(scope *Scope, input tf.Output, filter_sizes tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...Conv2DBackpropFilterAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Conv2DBackpropFilter",
+		Input: []tf.Input{
+			input, filter_sizes, out_backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LRNGradAttr is an optional argument to LRNGrad.
+type LRNGradAttr func(optionalAttr)
+
+// LRNGradDepthRadius sets the optional depth_radius attribute to value.
+//
+// value: A depth radius.
+// If not specified, defaults to 5
+func LRNGradDepthRadius(value int64) LRNGradAttr {
+	return func(m optionalAttr) {
+		m["depth_radius"] = value
+	}
+}
+
+// LRNGradBias sets the optional bias attribute to value.
+//
+// value: An offset (usually > 0 to avoid dividing by 0).
+// If not specified, defaults to 1
+func LRNGradBias(value float32) LRNGradAttr {
+	return func(m optionalAttr) {
+		m["bias"] = value
+	}
+}
+
+// LRNGradAlpha sets the optional alpha attribute to value.
+//
+// value: A scale factor, usually positive.
+// If not specified, defaults to 1
+func LRNGradAlpha(value float32) LRNGradAttr {
+	return func(m optionalAttr) {
+		m["alpha"] = value
+	}
+}
+
+// LRNGradBeta sets the optional beta attribute to value.
+//
+// value: An exponent.
+// If not specified, defaults to 0.5
+func LRNGradBeta(value float32) LRNGradAttr {
+	return func(m optionalAttr) {
+		m["beta"] = value
+	}
+}
+
+// Gradients for Local Response Normalization.
+//
+// Arguments:
+//	input_grads: 4-D with shape `[batch, height, width, channels]`.
+//	input_image: 4-D with shape `[batch, height, width, channels]`.
+//	output_image: 4-D with shape `[batch, height, width, channels]`.
+//
+// Returns The gradients for LRN.
+func LRNGrad(scope *Scope, input_grads tf.Output, input_image tf.Output, output_image tf.Output, optional ...LRNGradAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LRNGrad",
+		Input: []tf.Input{
+			input_grads, input_image, output_image,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// PrelinearizeAttr is an optional argument to Prelinearize.
+type PrelinearizeAttr func(optionalAttr)
+
+// PrelinearizeShape sets the optional shape attribute to value.
+//
+// value: The shape of the tensor.
+// If not specified, defaults to {}
+func PrelinearizeShape(value tf.Shape) PrelinearizeAttr {
+	return func(m optionalAttr) {
+		m["shape"] = value
+	}
+}
+
+// PrelinearizeLayout sets the optional layout attribute to value.
+//
+// value: A vector holding the requested layout in minor-to-major sequence. If a layout
+// attribute is passed but its values are all -1 the layout will be computed by
+// the infeed operation.
+// If not specified, defaults to {}
+func PrelinearizeLayout(value []int64) PrelinearizeAttr {
+	return func(m optionalAttr) {
+		m["layout"] = value
+	}
+}
+
+// An op which linearizes one Tensor value to an opaque variant tensor.
+//
+// Arguments:
+//	input: A tensor that will be linearized.
+func Prelinearize(scope *Scope, input tf.Output, optional ...PrelinearizeAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Prelinearize",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the result of a TPU compilation.
+//
+// This operation returns the result of a TPU compilation as a serialized
+// CompilationResultProto, which holds a status and an error message if an error
+// occurred during compilation.
+func TPUCompilationResult(scope *Scope) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TPUCompilationResult",
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ExperimentalRebatchDatasetAttr is an optional argument to ExperimentalRebatchDataset.
+type ExperimentalRebatchDatasetAttr func(optionalAttr)
+
+// ExperimentalRebatchDatasetUseFallback sets the optional use_fallback attribute to value.
+// If not specified, defaults to true
+func ExperimentalRebatchDatasetUseFallback(value bool) ExperimentalRebatchDatasetAttr {
+	return func(m optionalAttr) {
+		m["use_fallback"] = value
+	}
+}
+
+// Creates a dataset that changes the batch size.
+//
+// Creates a dataset that changes the batch size of the dataset to current batch
+// size // num_replicas.
+//
+// Arguments:
+//	input_dataset: A variant tensor representing the input dataset.
+//	num_replicas: A scalar representing the number of replicas to distribute this batch across. As
+// a result of this transformation the current batch size would end up being
+// divided  by this parameter.
+//
+//
+func ExperimentalRebatchDataset(scope *Scope, input_dataset tf.Output, num_replicas tf.Output, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ExperimentalRebatchDatasetAttr) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ExperimentalRebatchDataset",
+		Input: []tf.Input{
+			input_dataset, num_replicas,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Concatenates tensors along one dimension.
+//
+// Arguments:
+//	concat_dim: 0-D.  The dimension along which to concatenate.  Must be in the
+// range [0, rank(values)).
+//	values: The `N` Tensors to concatenate. Their ranks and types must match,
+// and their sizes must match in all dimensions except `concat_dim`.
+//
+// Returns A `Tensor` with the concatenation of values stacked along the
+// `concat_dim` dimension.  This tensor's shape matches that of `values` except
+// in `concat_dim` where it has the sum of the sizes.
+func Concat(scope *Scope, concat_dim tf.Output, values []tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Concat",
+		Input: []tf.Input{
+			concat_dim, tf.OutputList(values),
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyPowerSignAttr is an optional argument to ResourceApplyPowerSign.
+type ResourceApplyPowerSignAttr func(optionalAttr)
+
+// ResourceApplyPowerSignUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and m tensors is
+// protected by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyPowerSignUseLocking(value bool) ResourceApplyPowerSignAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the AddSign update.
+//
+// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+// update <- exp(logbase * sign_decay * sign(g) * sign(m_t)) * g
+// variable <- variable - lr_t * update
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	m: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	logbase: Must be a scalar.
+//	sign_decay: Must be a scalar.
+//	beta: Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyPowerSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, logbase tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyPowerSignAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyPowerSign",
+		Input: []tf.Input{
+			var_, m, lr, logbase, sign_decay, beta, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Returns an element-wise indication of the sign of a number.
+//
+// `y = sign(x) = -1` if `x < 0`; 0 if `x == 0`; 1 if `x > 0`.
+//
+// For complex numbers, `y = sign(x) = x / |x|` if `x != 0`, otherwise `y = 0`.
+//
+// Example usage:
+// >>> tf.math.sign([0., 2., -3.])
+// <tf.Tensor: shape=(3,), dtype=float32, numpy=array([ 0.,  1., -1.], dtype=float32)>
+func Sign(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Sign",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyAddSignAttr is an optional argument to ResourceApplyAddSign.
+type ResourceApplyAddSignAttr func(optionalAttr)
+
+// ResourceApplyAddSignUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and m tensors is
+// protected by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyAddSignUseLocking(value bool) ResourceApplyAddSignAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the AddSign update.
+//
+// m_t <- beta1 * m_{t-1} + (1 - beta1) * g
+// update <- (alpha + sign_decay * sign(g) *sign(m)) * g
+// variable <- variable - lr_t * update
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	m: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	alpha: Must be a scalar.
+//	sign_decay: Must be a scalar.
+//	beta: Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyAddSign(scope *Scope, var_ tf.Output, m tf.Output, lr tf.Output, alpha tf.Output, sign_decay tf.Output, beta tf.Output, grad tf.Output, optional ...ResourceApplyAddSignAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyAddSign",
+		Input: []tf.Input{
+			var_, m, lr, alpha, sign_decay, beta, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Concatenates tensors along one dimension.
+//
+// Arguments:
+//	values: List of `N` Tensors to concatenate. Their ranks and types must match,
+// and their sizes must match in all dimensions except `concat_dim`.
+//	axis: 0-D.  The dimension along which to concatenate.  Must be in the
+// range [-rank(values), rank(values)).
+//
+// Returns A `Tensor` with the concatenation of values stacked along the
+// `concat_dim` dimension.  This tensor's shape matches that of `values` except
+// in `concat_dim` where it has the sum of the sizes.
+func ConcatV2(scope *Scope, values []tf.Output, axis tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ConcatV2",
+		Input: []tf.Input{
+			tf.OutputList(values), axis,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// TensorListConcatAttr is an optional argument to TensorListConcat.
+type TensorListConcatAttr func(optionalAttr)
+
+// TensorListConcatElementShape sets the optional element_shape attribute to value.
+// If not specified, defaults to {unknown_rank:true}
+func TensorListConcatElementShape(value tf.Shape) TensorListConcatAttr {
+	return func(m optionalAttr) {
+		m["element_shape"] = value
+	}
+}
+
+// Concats all tensors in the list along the 0th dimension.
+//
+// Requires that all tensors have the same shape except the first dimension.
+//
+// input_handle: The input list.
+// tensor: The concated result.
+// lengths: Output tensor containing sizes of the 0th dimension of tensors in the list, used for computing the gradient.
+//
+func TensorListConcat(scope *Scope, input_handle tf.Output, element_dtype tf.DataType, optional ...TensorListConcatAttr) (tensor tf.Output, lengths tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"element_dtype": element_dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorListConcat",
+		Input: []tf.Input{
+			input_handle,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingRMSPropParametersGradAccumDebug.
+type LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr func(optionalAttr)
+
+// LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingRMSPropParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingRMSPropParametersGradAccumDebugConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingRMSPropParametersGradAccumDebugConfig(value string) LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load RMSProp embedding parameters with debug support.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the RMSProp optimization algorithm.
+//	ms: Value of ms used in the RMSProp optimization algorithm.
+//	mom: Value of mom used in the RMSProp optimization algorithm.
+//	gradient_accumulators: Value of gradient_accumulators used in the RMSProp optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingRMSPropParametersGradAccumDebug(scope *Scope, parameters tf.Output, ms tf.Output, mom tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingRMSPropParametersGradAccumDebugAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingRMSPropParametersGradAccumDebug",
+		Input: []tf.Input{
+			parameters, ms, mom, gradient_accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// EqualAttr is an optional argument to Equal.
+type EqualAttr func(optionalAttr)
+
+// EqualIncompatibleShapeError sets the optional incompatible_shape_error attribute to value.
+// If not specified, defaults to true
+func EqualIncompatibleShapeError(value bool) EqualAttr {
+	return func(m optionalAttr) {
+		m["incompatible_shape_error"] = value
+	}
+}
+
+// Returns the truth value of (x == y) element-wise.
+//
+// *NOTE*: `Equal` supports broadcasting. More about broadcasting
+// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+//
+// ```python
+// x = tf.constant([2, 4])
+// y = tf.constant(2)
+// tf.math.equal(x, y) ==> array([True, False])
+//
+// x = tf.constant([2, 4])
+// y = tf.constant([2, 4])
+// tf.math.equal(x, y) ==> array([True,  True])
+// ```
+func Equal(scope *Scope, x tf.Output, y tf.Output, optional ...EqualAttr) (z tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "Equal",
+		Input: []tf.Input{
+			x, y,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// SparseToSparseSetOperationAttr is an optional argument to SparseToSparseSetOperation.
+type SparseToSparseSetOperationAttr func(optionalAttr)
+
+// SparseToSparseSetOperationValidateIndices sets the optional validate_indices attribute to value.
+// If not specified, defaults to true
+func SparseToSparseSetOperationValidateIndices(value bool) SparseToSparseSetOperationAttr {
+	return func(m optionalAttr) {
+		m["validate_indices"] = value
+	}
+}
+
+// Applies set operation along last dimension of 2 `SparseTensor` inputs.
+//
+// See SetOperationOp::SetOperationFromContext for values of `set_operation`.
+//
+// If `validate_indices` is `True`, `SparseToSparseSetOperation` validates the
+// order and range of `set1` and `set2` indices.
+//
+// Input `set1` is a `SparseTensor` represented by `set1_indices`, `set1_values`,
+// and `set1_shape`. For `set1` ranked `n`, 1st `n-1` dimensions must be the same
+// as `set2`. Dimension `n` contains values in a set, duplicates are allowed but
+// ignored.
+//
+// Input `set2` is a `SparseTensor` represented by `set2_indices`, `set2_values`,
+// and `set2_shape`. For `set2` ranked `n`, 1st `n-1` dimensions must be the same
+// as `set1`. Dimension `n` contains values in a set, duplicates are allowed but
+// ignored.
+//
+// If `validate_indices` is `True`, this op validates the order and range of `set1`
+// and `set2` indices.
+//
+// Output `result` is a `SparseTensor` represented by `result_indices`,
+// `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this
+// has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth`
+// dimension contains the result of `set_operation` applied to the corresponding
+// `[0...n-1]` dimension of `set`.
+//
+// Arguments:
+//	set1_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
+// order.
+//	set1_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
+// order.
+//	set1_shape: 1D `Tensor`, shape of a `SparseTensor`. `set1_shape[0...n-1]` must
+// be the same as `set2_shape[0...n-1]`, `set1_shape[n]` is the
+// max set size across `0...n-1` dimensions.
+//	set2_indices: 2D `Tensor`, indices of a `SparseTensor`. Must be in row-major
+// order.
+//	set2_values: 1D `Tensor`, values of a `SparseTensor`. Must be in row-major
+// order.
+//	set2_shape: 1D `Tensor`, shape of a `SparseTensor`. `set2_shape[0...n-1]` must
+// be the same as `set1_shape[0...n-1]`, `set2_shape[n]` is the
+// max set size across `0...n-1` dimensions.
+//
+//
+// Returns:
+//	result_indices: 2D indices of a `SparseTensor`.
+//	result_values: 1D values of a `SparseTensor`.
+//	result_shape: 1D `Tensor` shape of a `SparseTensor`. `result_shape[0...n-1]` is
+// the same as the 1st `n-1` dimensions of `set1` and `set2`, `result_shape[n]`
+// is the max result set size across all `0...n-1` dimensions.
+func SparseToSparseSetOperation(scope *Scope, set1_indices tf.Output, set1_values tf.Output, set1_shape tf.Output, set2_indices tf.Output, set2_values tf.Output, set2_shape tf.Output, set_operation string, optional ...SparseToSparseSetOperationAttr) (result_indices tf.Output, result_values tf.Output, result_shape tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"set_operation": set_operation}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseToSparseSetOperation",
+		Input: []tf.Input{
+			set1_indices, set1_values, set1_shape, set2_indices, set2_values, set2_shape,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2)
+}
+
+// Greedily selects a subset of bounding boxes in descending order of score,
+//
+// pruning away boxes that have high intersection-over-union (IOU) overlap
+// with previously selected boxes.  Bounding boxes with score less than
+// `score_threshold` are removed.  Bounding boxes are supplied as
+// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
+// diagonal pair of box corners and the coordinates can be provided as normalized
+// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
+// is agnostic to where the origin is in the coordinate system and more
+// generally is invariant to orthogonal transformations and translations
+// of the coordinate system; thus translating or reflections of the coordinate
+// system result in the same boxes being selected by the algorithm.
+// The output of this operation is a set of integers indexing into the input
+// collection of bounding boxes representing the selected boxes.  The bounding
+// box coordinates corresponding to the selected indices can then be obtained
+// using the `tf.gather operation`.  For example:
+//   selected_indices = tf.image.non_max_suppression_v2(
+//       boxes, scores, max_output_size, iou_threshold, score_threshold)
+//   selected_boxes = tf.gather(boxes, selected_indices)
+//
+// Arguments:
+//	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
+//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
+// score corresponding to each box (each row of boxes).
+//	max_output_size: A scalar integer tensor representing the maximum number of
+// boxes to be selected by non max suppression.
+//	iou_threshold: A 0-D float tensor representing the threshold for deciding whether
+// boxes overlap too much with respect to IOU.
+//	score_threshold: A 0-D float tensor representing the threshold for deciding when to remove
+// boxes based on score.
+//
+// Returns A 1-D integer tensor of shape `[M]` representing the selected
+// indices from the boxes tensor, where `M <= max_output_size`.
+func NonMaxSuppressionV3(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output, score_threshold tf.Output) (selected_indices tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "NonMaxSuppressionV3",
+		Input: []tf.Input{
+			boxes, scores, max_output_size, iou_threshold, score_threshold,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// UnsortedSegmentJoinAttr is an optional argument to UnsortedSegmentJoin.
+type UnsortedSegmentJoinAttr func(optionalAttr)
+
+// UnsortedSegmentJoinSeparator sets the optional separator attribute to value.
+//
+// value: The separator to use when joining.
+// If not specified, defaults to ""
+func UnsortedSegmentJoinSeparator(value string) UnsortedSegmentJoinAttr {
+	return func(m optionalAttr) {
+		m["separator"] = value
+	}
+}
+
+// Joins the elements of `inputs` based on `segment_ids`.
+//
+// Computes the string join along segments of a tensor.
+// Given `segment_ids` with rank `N` and `data` with rank `N+M`:
+//
+//     `output[i, k1...kM] = strings.join([data[j1...jN, k1...kM])`
+//
+// where the join is over all [j1...jN] such that segment_ids[j1...jN] = i.
+// Strings are joined in row-major order.
+//
+// For example:
+//
+// ```python
+// inputs = [['Y', 'q', 'c'], ['Y', '6', '6'], ['p', 'G', 'a']]
+// output_array = string_ops.unsorted_segment_join(inputs=inputs,
+//                                                 segment_ids=[1, 0, 1],
+//                                                 num_segments=2,
+//                                                 separator=':'))
+// # output_array ==> [['Y', '6', '6'], ['Y:p', 'q:G', 'c:a']]
+//
+//
+// inputs = ['this', 'is', 'a', 'test']
+// output_array = string_ops.unsorted_segment_join(inputs=inputs,
+//                                                 segment_ids=[0, 0, 0, 0],
+//                                                 num_segments=1,
+//                                                 separator=':'))
+// # output_array ==> ['this:is:a:test']
+// ```
+//
+// Arguments:
+//	inputs: The input to be joined.
+//	segment_ids: A tensor whose shape is a prefix of data.shape.  Negative segment ids are not
+// supported.
+//	num_segments: A scalar.
+func UnsortedSegmentJoin(scope *Scope, inputs tf.Output, segment_ids tf.Output, num_segments tf.Output, optional ...UnsortedSegmentJoinAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "UnsortedSegmentJoin",
+		Input: []tf.Input{
+			inputs, segment_ids, num_segments,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Converts the given `resource_handle` representing an iterator to a variant tensor.
+//
+// Arguments:
+//	resource_handle: A handle to an iterator resource.
+//
+// Returns A variant tensor storing the state of the iterator contained in the
+// resource.
+func SerializeIterator(scope *Scope, resource_handle tf.Output) (serialized tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SerializeIterator",
+		Input: []tf.Input{
+			resource_handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// ResourceApplyCenteredRMSPropAttr is an optional argument to ResourceApplyCenteredRMSProp.
+type ResourceApplyCenteredRMSPropAttr func(optionalAttr)
+
+// ResourceApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var, mg, ms, and mom tensors is
+// protected by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyCenteredRMSPropUseLocking(value bool) ResourceApplyCenteredRMSPropAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update '*var' according to the centered RMSProp algorithm.
+//
+// The centered RMSProp algorithm uses an estimate of the centered second moment
+// (i.e., the variance) for normalization, as opposed to regular RMSProp, which
+// uses the (uncentered) second moment. This often helps with training, but is
+// slightly more expensive in terms of computation and memory.
+//
+// Note that in dense implementation of this algorithm, mg, ms, and mom will
+// update even if the grad is zero, but in this sparse implementation, mg, ms,
+// and mom will not update in iterations during which the grad is zero.
+//
+// mean_square = decay * mean_square + (1-decay) * gradient ** 2
+// mean_grad = decay * mean_grad + (1-decay) * gradient
+//
+// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2)
+//
+// mg <- rho * mg_{t-1} + (1-rho) * grad
+// ms <- rho * ms_{t-1} + (1-rho) * grad * grad
+// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms - mg * mg + epsilon)
+// var <- var - mom
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	mg: Should be from a Variable().
+//	ms: Should be from a Variable().
+//	mom: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	rho: Decay rate. Must be a scalar.
+//
+//	epsilon: Ridge term. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyCenteredRMSProp(scope *Scope, var_ tf.Output, mg tf.Output, ms tf.Output, mom tf.Output, lr tf.Output, rho tf.Output, momentum tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyCenteredRMSPropAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyCenteredRMSProp",
+		Input: []tf.Input{
+			var_, mg, ms, mom, lr, rho, momentum, epsilon, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Returns which elements of x are Inf.
+//
+// @compatibility(numpy)
+// Equivalent to np.isinf
+// @end_compatibility
+//
+// Example:
+//
+// ```python
+// x = tf.constant([5.0, np.inf, 6.8, np.inf])
+// tf.math.is_inf(x) ==> [False, True, False, True]
+// ```
+func IsInf(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "IsInf",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// MaxPool3DAttr is an optional argument to MaxPool3D.
+type MaxPool3DAttr func(optionalAttr)
+
+// MaxPool3DDataFormat sets the optional data_format attribute to value.
+//
+// value: The data format of the input and output data. With the
+// default format "NDHWC", the data is stored in the order of:
+//     [batch, in_depth, in_height, in_width, in_channels].
+// Alternatively, the format could be "NCDHW", the data storage order is:
+//     [batch, in_channels, in_depth, in_height, in_width].
+// If not specified, defaults to "NDHWC"
+func MaxPool3DDataFormat(value string) MaxPool3DAttr {
+	return func(m optionalAttr) {
+		m["data_format"] = value
+	}
+}
+
+// Performs 3D max pooling on the input.
+//
+// Arguments:
+//	input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over.
+//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
+// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
+//	strides: 1-D tensor of length 5. The stride of the sliding window for each
+// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
+//	padding: The type of padding algorithm to use.
+//
+// Returns The max pooled output tensor.
+func MaxPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MaxPool3D",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// EnqueueTPUEmbeddingIntegerBatchAttr is an optional argument to EnqueueTPUEmbeddingIntegerBatch.
+type EnqueueTPUEmbeddingIntegerBatchAttr func(optionalAttr)
+
+// EnqueueTPUEmbeddingIntegerBatchDeviceOrdinal sets the optional device_ordinal attribute to value.
+//
+// value: The TPU device to use. Should be >= 0 and less than the number
+// of TPU cores in the task on which the node is placed.
+// If not specified, defaults to -1
+func EnqueueTPUEmbeddingIntegerBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingIntegerBatchAttr {
+	return func(m optionalAttr) {
+		m["device_ordinal"] = value
+	}
+}
+
+// An op that enqueues a list of input batch tensors to TPUEmbedding.
+//
+// Arguments:
+//	batch: A list of 1D tensors, one for each embedding table, containing the
+// indices into the tables.
+//	mode_override: A string input that overrides the mode specified in the
+// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
+// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
+// in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
+//
+// Returns the created operation.
+func EnqueueTPUEmbeddingIntegerBatch(scope *Scope, batch []tf.Output, mode_override tf.Output, optional ...EnqueueTPUEmbeddingIntegerBatchAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "EnqueueTPUEmbeddingIntegerBatch",
+		Input: []tf.Input{
+			tf.OutputList(batch), mode_override,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Component-wise divides a SparseTensor by a dense Tensor.
+//
+// *Limitation*: this Op only broadcasts the dense side to the sparse side, but not
+// the other direction.
+//
+// Arguments:
+//	sp_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
+// SparseTensor, possibly not in canonical ordering.
+//	sp_values: 1-D.  `N` non-empty values corresponding to `sp_indices`.
+//	sp_shape: 1-D.  Shape of the input SparseTensor.
+//	dense: `R`-D.  The dense Tensor operand.
+//
+// Returns 1-D.  The `N` values that are operated on.
+func SparseDenseCwiseDiv(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output, dense tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseDenseCwiseDiv",
+		Input: []tf.Input{
+			sp_indices, sp_values, sp_shape, dense,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Returns the gradient of `Tile`.
+//
+// DEPRECATED at GraphDef version 3: TileGrad has been replaced with reduce_sum
+//
+// Since `Tile` takes an input and repeats the input `multiples` times
+// along each dimension, `TileGrad` takes in `multiples` and aggregates
+// each repeated tile of `input` into `output`.
+func TileGrad(scope *Scope, input tf.Output, multiples tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "TileGrad",
+		Input: []tf.Input{
+			input, multiples,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// AudioSummaryAttr is an optional argument to AudioSummary.
+type AudioSummaryAttr func(optionalAttr)
+
+// AudioSummaryMaxOutputs sets the optional max_outputs attribute to value.
+//
+// value: Max number of batch elements to generate audio for.
+// If not specified, defaults to 3
+//
+// REQUIRES: value >= 1
+func AudioSummaryMaxOutputs(value int64) AudioSummaryAttr {
+	return func(m optionalAttr) {
+		m["max_outputs"] = value
+	}
+}
+
+// Outputs a `Summary` protocol buffer with audio.
+//
+// DEPRECATED at GraphDef version 15: Use AudioSummaryV2.
+//
+// The summary has up to `max_outputs` summary values containing audio. The
+// audio is built from `tensor` which must be 3-D with shape `[batch_size,
+// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are
+// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`.
+//
+// The `tag` argument is a scalar `Tensor` of type `string`.  It is used to
+// build the `tag` of the summary values:
+//
+// *  If `max_outputs` is 1, the summary value tag is '*tag*/audio'.
+// *  If `max_outputs` is greater than 1, the summary value tags are
+//    generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc.
+//
+// Arguments:
+//	tag: Scalar. Used to build the `tag` attribute of the summary values.
+//	tensor: 2-D of shape `[batch_size, frames]`.
+//	sample_rate: The sample rate of the signal in hertz.
+//
+// Returns Scalar. Serialized `Summary` protocol buffer.
+func AudioSummary(scope *Scope, tag tf.Output, tensor tf.Output, sample_rate float32, optional ...AudioSummaryAttr) (summary tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"sample_rate": sample_rate}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "AudioSummary",
+		Input: []tf.Input{
+			tag, tensor,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingFTRLParametersAttr is an optional argument to LoadTPUEmbeddingFTRLParameters.
+type LoadTPUEmbeddingFTRLParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingFTRLParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingFTRLParametersTableId(value int64) LoadTPUEmbeddingFTRLParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingFTRLParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingFTRLParametersTableName(value string) LoadTPUEmbeddingFTRLParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingFTRLParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingFTRLParametersConfig(value string) LoadTPUEmbeddingFTRLParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load FTRL embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the FTRL optimization algorithm.
+//	accumulators: Value of accumulators used in the FTRL optimization algorithm.
+//	linears: Value of linears used in the FTRL optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingFTRLParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, linears tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingFTRLParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingFTRLParameters",
+		Input: []tf.Input{
+			parameters, accumulators, linears,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes the minimum along segments of a tensor.
+//
+// Read
+// [the section on segmentation](https://tensorflow.org/api_docs/python/tf/math#Segmentation)
+// for an explanation of segments.
+//
+// Computes a tensor such that
+// \\(output_i = \min_j(data_j)\\) where `min` is over `j` such
+// that `segment_ids[j] == i`.
+//
+// If the min is empty for a given segment ID `i`, `output[i] = 0`.
+//
+// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
+// <img style="width:100%" src="https://www.tensorflow.org/images/SegmentMin.png" alt>
+// </div>
+//
+// For example:
+//
+// ```
+// c = tf.constant([[1,2,3,4], [4, 3, 2, 1], [5,6,7,8]])
+// tf.segment_min(c, tf.constant([0, 0, 1]))
+// # ==> [[1, 2, 2, 1],
+// #      [5, 6, 7, 8]]
+// ```
+//
+// Arguments:
+//
+//	segment_ids: A 1-D tensor whose size is equal to the size of `data`'s
+// first dimension.  Values should be sorted and can be repeated.
+//
+// Returns Has same shape as data, except for dimension 0 which
+// has size `k`, the number of segments.
+func SegmentMin(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SegmentMin",
+		Input: []tf.Input{
+			data, segment_ids,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Execute a sub graph on a remote processor.
+//
+// The graph specifications(such as graph itself, input tensors and output names)
+// are stored as a serialized protocol buffer of RemoteFusedGraphExecuteInfo
+// as serialized_remote_fused_graph_execute_info.
+// The specifications will be passed to a dedicated registered
+// remote fused graph executor.  The executor will send the graph specifications
+// to a remote processor and execute that graph.  The execution results
+// will be passed to consumer nodes as outputs of this node.
+//
+// Arguments:
+//	inputs: Arbitrary number of tensors with arbitrary data types
+//
+//	serialized_remote_fused_graph_execute_info: Serialized protocol buffer
+// of RemoteFusedGraphExecuteInfo which contains graph specifications.
+//
+// Returns Arbitrary number of tensors with arbitrary data types
+func RemoteFusedGraphExecute(scope *Scope, inputs []tf.Output, Toutputs []tf.DataType, serialized_remote_fused_graph_execute_info string) (outputs []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"Toutputs": Toutputs, "serialized_remote_fused_graph_execute_info": serialized_remote_fused_graph_execute_info}
+	opspec := tf.OpSpec{
+		Type: "RemoteFusedGraphExecute",
+		Input: []tf.Input{
+			tf.OutputList(inputs),
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
+		scope.UpdateErr("RemoteFusedGraphExecute", err)
+		return
+	}
+	return outputs
+}
+
+// ResourceApplyAdagradV2Attr is an optional argument to ResourceApplyAdagradV2.
+type ResourceApplyAdagradV2Attr func(optionalAttr)
+
+// ResourceApplyAdagradV2UseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceApplyAdagradV2UseLocking(value bool) ResourceApplyAdagradV2Attr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// ResourceApplyAdagradV2UpdateSlots sets the optional update_slots attribute to value.
+// If not specified, defaults to true
+func ResourceApplyAdagradV2UpdateSlots(value bool) ResourceApplyAdagradV2Attr {
+	return func(m optionalAttr) {
+		m["update_slots"] = value
+	}
+}
+
+// Update '*var' according to the adagrad scheme.
+//
+// accum += grad * grad
+// var -= lr * grad * (1 / sqrt(accum))
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Scaling factor. Must be a scalar.
+//	epsilon: Constant factor. Must be a scalar.
+//	grad: The gradient.
+//
+// Returns the created operation.
+func ResourceApplyAdagradV2(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, epsilon tf.Output, grad tf.Output, optional ...ResourceApplyAdagradV2Attr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceApplyAdagradV2",
+		Input: []tf.Input{
+			var_, accum, lr, epsilon, grad,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Counts the number of occurrences of each value in an integer array.
+//
+// Outputs a vector with length `size` and the same dtype as `weights`. If
+// `weights` are empty, then index `i` stores the number of times the value `i` is
+// counted in `arr`. If `weights` are non-empty, then index `i` stores the sum of
+// the value in `weights` at each index where the corresponding value in `arr` is
+// `i`.
+//
+// Values in `arr` outside of the range [0, size) are ignored.
+//
+// Arguments:
+//	arr: int32 `Tensor`.
+//	size: non-negative int32 scalar `Tensor`.
+//	weights: is an int32, int64, float32, or float64 `Tensor` with the same
+// shape as `arr`, or a length-0 `Tensor`, in which case it acts as all weights
+// equal to 1.
+//
+// Returns 1D `Tensor` with length equal to `size`. The counts or summed weights for
+// each value in the range [0, size).
+func Bincount(scope *Scope, arr tf.Output, size tf.Output, weights tf.Output) (bins tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Bincount",
+		Input: []tf.Input{
+			arr, size, weights,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Gradients for batch normalization.
+//
+// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization()
+//
+// This op is deprecated. See `tf.nn.batch_normalization`.
+//
+// Arguments:
+//	t: A 4D input Tensor.
+//	m: A 1D mean Tensor with size matching the last dimension of t.
+// This is the first output from tf.nn.moments,
+// or a saved moving average thereof.
+//	v: A 1D variance Tensor with size matching the last dimension of t.
+// This is the second output from tf.nn.moments,
+// or a saved moving average thereof.
+//	gamma: A 1D gamma Tensor with size matching the last dimension of t.
+// If "scale_after_normalization" is true, this Tensor will be multiplied
+// with the normalized Tensor.
+//	backprop: 4D backprop Tensor.
+//	variance_epsilon: A small float number to avoid dividing by 0.
+//	scale_after_normalization: A bool indicating whether the resulted tensor
+// needs to be multiplied with gamma.
+//
+// Returns:
+//	dx: 4D backprop tensor for input.
+//	dm: 1D backprop tensor for mean.
+//	dv: 1D backprop tensor for variance.
+//	db: 1D backprop tensor for beta.
+//	dg: 1D backprop tensor for gamma.
+func BatchNormWithGlobalNormalizationGrad(scope *Scope, t tf.Output, m tf.Output, v tf.Output, gamma tf.Output, backprop tf.Output, variance_epsilon float32, scale_after_normalization bool) (dx tf.Output, dm tf.Output, dv tf.Output, db tf.Output, dg tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization}
+	opspec := tf.OpSpec{
+		Type: "BatchNormWithGlobalNormalizationGrad",
+		Input: []tf.Input{
+			t, m, v, gamma, backprop,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
+}
+
+// Returns the number of gradients aggregated in the given accumulators.
+//
+// Arguments:
+//	handle: The handle to an accumulator.
+//
+// Returns The number of gradients aggregated in the given accumulator.
+func ResourceAccumulatorNumAccumulated(scope *Scope, handle tf.Output) (num_accumulated tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceAccumulatorNumAccumulated",
+		Input: []tf.Input{
+			handle,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Connects N outputs from an N-way replicated TPU computation.
+//
+// This operation holds a replicated output from a `tpu.replicate()` computation subgraph.
+// Each replicated output has the same shape and type alongside the input.
+//
+// For example:
+// ```
+// %computation = "tf.Computation"()
+// %replicated_output:2 = "tf.TPUReplicatedOutput"(%computation)
+// ```
+// The above computation has a replicated output of two replicas.
+func TPUReplicatedOutput(scope *Scope, input tf.Output, num_replicas int64) (outputs []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_replicas": num_replicas}
+	opspec := tf.OpSpec{
+		Type: "TPUReplicatedOutput",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
+		scope.UpdateErr("TPUReplicatedOutput", err)
+		return
+	}
+	return outputs
+}
+
+// LoadTPUEmbeddingMDLAdagradLightParametersAttr is an optional argument to LoadTPUEmbeddingMDLAdagradLightParameters.
+type LoadTPUEmbeddingMDLAdagradLightParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingMDLAdagradLightParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingMDLAdagradLightParametersTableId(value int64) LoadTPUEmbeddingMDLAdagradLightParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingMDLAdagradLightParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingMDLAdagradLightParametersTableName(value string) LoadTPUEmbeddingMDLAdagradLightParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingMDLAdagradLightParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingMDLAdagradLightParametersConfig(value string) LoadTPUEmbeddingMDLAdagradLightParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load MDL Adagrad Light embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the MDL Adagrad Light optimization algorithm.
+//	accumulators: Value of accumulators used in the MDL Adagrad Light optimization algorithm.
+//	weights: Value of weights used in the MDL Adagrad Light optimization algorithm.
+//	benefits: Value of benefits used in the MDL Adagrad Light optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingMDLAdagradLightParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, weights tf.Output, benefits tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingMDLAdagradLightParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingMDLAdagradLightParameters",
+		Input: []tf.Input{
+			parameters, accumulators, weights, benefits,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Output a fact about factorials.
+func Fact(scope *Scope) (fact tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Fact",
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// TensorArrayConcatV2Attr is an optional argument to TensorArrayConcatV2.
+type TensorArrayConcatV2Attr func(optionalAttr)
+
+// TensorArrayConcatV2ElementShapeExcept0 sets the optional element_shape_except0 attribute to value.
+// If not specified, defaults to {unknown_rank:true}
+func TensorArrayConcatV2ElementShapeExcept0(value tf.Shape) TensorArrayConcatV2Attr {
+	return func(m optionalAttr) {
+		m["element_shape_except0"] = value
+	}
+}
+
+// Deprecated. Use TensorArrayConcatV3
+func TensorArrayConcatV2(scope *Scope, handle tf.Output, flow_in tf.Output, dtype tf.DataType, optional ...TensorArrayConcatV2Attr) (value tf.Output, lengths tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "TensorArrayConcatV2",
+		Input: []tf.Input{
+			handle, flow_in,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1)
+}
+
+// MatrixSolveAttr is an optional argument to MatrixSolve.
+type MatrixSolveAttr func(optionalAttr)
+
+// MatrixSolveAdjoint sets the optional adjoint attribute to value.
+//
+// value: Boolean indicating whether to solve with `matrix` or its (block-wise)
+// adjoint.
+// If not specified, defaults to false
+func MatrixSolveAdjoint(value bool) MatrixSolveAttr {
+	return func(m optionalAttr) {
+		m["adjoint"] = value
+	}
+}
+
+// Solves systems of linear equations.
+//
+// `Matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
+// form square matrices. `Rhs` is a tensor of shape `[..., M, K]`. The `output` is
+// a tensor shape `[..., M, K]`.  If `adjoint` is `False` then each output matrix
+// satisfies `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`.
+// If `adjoint` is `True` then each output matrix satisfies
+// `adjoint(matrix[..., :, :]) * output[..., :, :] = rhs[..., :, :]`.
+//
+// Arguments:
+//	matrix: Shape is `[..., M, M]`.
+//	rhs: Shape is `[..., M, K]`.
+//
+// Returns Shape is `[..., M, K]`.
+func MatrixSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixSolveAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "MatrixSolve",
+		Input: []tf.Input{
+			matrix, rhs,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Writes contents to the file at input filename. Creates file and recursively
+//
+// creates directory if not existing.
+//
+// Arguments:
+//	filename: scalar. The name of the file to which we write the contents.
+//	contents: scalar. The content to be written to the output file.
+//
+// Returns the created operation.
+func WriteFile(scope *Scope, filename tf.Output, contents tf.Output) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "WriteFile",
+		Input: []tf.Input{
+			filename, contents,
+		},
+	}
+	return scope.AddOperation(opspec)
+}
+
+// ResourceSparseApplyMomentumAttr is an optional argument to ResourceSparseApplyMomentum.
+type ResourceSparseApplyMomentumAttr func(optionalAttr)
+
+// ResourceSparseApplyMomentumUseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
+// If not specified, defaults to false
+func ResourceSparseApplyMomentumUseLocking(value bool) ResourceSparseApplyMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// ResourceSparseApplyMomentumUseNesterov sets the optional use_nesterov attribute to value.
+//
+// value: If `True`, the tensor passed to compute grad will be
+// var - lr * momentum * accum, so in the end, the var you get is actually
+// var - lr * momentum * accum.
+// If not specified, defaults to false
+func ResourceSparseApplyMomentumUseNesterov(value bool) ResourceSparseApplyMomentumAttr {
+	return func(m optionalAttr) {
+		m["use_nesterov"] = value
+	}
+}
+
+// Update relevant entries in '*var' and '*accum' according to the momentum scheme.
+//
+// Set use_nesterov = True if you want to use Nesterov momentum.
+//
+// That is for rows we have grad for, we update var and accum as follows:
+//
+// accum = accum * momentum + grad
+// var -= lr * accum
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Learning rate. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//	momentum: Momentum. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceSparseApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, indices tf.Output, momentum tf.Output, optional ...ResourceSparseApplyMomentumAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceSparseApplyMomentum",
+		Input: []tf.Input{
+			var_, accum, lr, grad, indices, momentum,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// ReverseSequenceAttr is an optional argument to ReverseSequence.
+type ReverseSequenceAttr func(optionalAttr)
+
+// ReverseSequenceBatchDim sets the optional batch_dim attribute to value.
+//
+// value: The dimension along which reversal is performed.
+// If not specified, defaults to 0
+func ReverseSequenceBatchDim(value int64) ReverseSequenceAttr {
+	return func(m optionalAttr) {
+		m["batch_dim"] = value
+	}
+}
+
+// Reverses variable length slices.
+//
+// This op first slices `input` along the dimension `batch_dim`, and for each
+// slice `i`, reverses the first `seq_lengths[i]` elements along
+// the dimension `seq_dim`.
+//
+// The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`,
+// and `seq_lengths` must be a vector of length `input.dims[batch_dim]`.
+//
+// The output slice `i` along dimension `batch_dim` is then given by input
+// slice `i`, with the first `seq_lengths[i]` slices along dimension
+// `seq_dim` reversed.
+//
+// For example:
+//
+// ```
+// # Given this:
+// batch_dim = 0
+// seq_dim = 1
+// input.dims = (4, 8, ...)
+// seq_lengths = [7, 2, 3, 5]
+//
+// # then slices of input are reversed on seq_dim, but only up to seq_lengths:
+// output[0, 0:7, :, ...] = input[0, 7:0:-1, :, ...]
+// output[1, 0:2, :, ...] = input[1, 2:0:-1, :, ...]
+// output[2, 0:3, :, ...] = input[2, 3:0:-1, :, ...]
+// output[3, 0:5, :, ...] = input[3, 5:0:-1, :, ...]
+//
+// # while entries past seq_lens are copied through:
+// output[0, 7:, :, ...] = input[0, 7:, :, ...]
+// output[1, 2:, :, ...] = input[1, 2:, :, ...]
+// output[2, 3:, :, ...] = input[2, 3:, :, ...]
+// output[3, 2:, :, ...] = input[3, 2:, :, ...]
+// ```
+//
+// In contrast, if:
+//
+// ```
+// # Given this:
+// batch_dim = 2
+// seq_dim = 0
+// input.dims = (8, ?, 4, ...)
+// seq_lengths = [7, 2, 3, 5]
+//
+// # then slices of input are reversed on seq_dim, but only up to seq_lengths:
+// output[0:7, :, 0, :, ...] = input[7:0:-1, :, 0, :, ...]
+// output[0:2, :, 1, :, ...] = input[2:0:-1, :, 1, :, ...]
+// output[0:3, :, 2, :, ...] = input[3:0:-1, :, 2, :, ...]
+// output[0:5, :, 3, :, ...] = input[5:0:-1, :, 3, :, ...]
+//
+// # while entries past seq_lens are copied through:
+// output[7:, :, 0, :, ...] = input[7:, :, 0, :, ...]
+// output[2:, :, 1, :, ...] = input[2:, :, 1, :, ...]
+// output[3:, :, 2, :, ...] = input[3:, :, 2, :, ...]
+// output[2:, :, 3, :, ...] = input[2:, :, 3, :, ...]
+// ```
+//
+// Arguments:
+//	input: The input to reverse.
+//	seq_lengths: 1-D with length `input.dims(batch_dim)` and
+// `max(seq_lengths) <= input.dims(seq_dim)`
+//	seq_dim: The dimension which is partially reversed.
+//
+// Returns The partially reversed input. It has the same shape as `input`.
+func ReverseSequence(scope *Scope, input tf.Output, seq_lengths tf.Output, seq_dim int64, optional ...ReverseSequenceAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"seq_dim": seq_dim}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ReverseSequence",
+		Input: []tf.Input{
+			input, seq_lengths,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// Fetches multiple values from infeed as an XLA tuple.
+//
+// Arguments:
+//	dtypes: The element types of each element in `outputs`.
+//	shapes: The shapes of each tensor in `outputs`.
+//
+// Returns A list of tensors that will be provided using the infeed mechanism.
+func InfeedDequeueTuple(scope *Scope, dtypes []tf.DataType, shapes []tf.Shape) (outputs []tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtypes": dtypes, "shapes": shapes}
+	opspec := tf.OpSpec{
+		Type: "InfeedDequeueTuple",
+
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	if scope.Err() != nil {
+		return
+	}
+	var idx int
+	var err error
+	if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil {
+		scope.UpdateErr("InfeedDequeueTuple", err)
+		return
+	}
+	return outputs
+}
+
+// Applies softmax to a batched N-D `SparseTensor`.
+//
+// The inputs represent an N-D SparseTensor  with logical shape `[..., B, C]`
+// (where `N >= 2`), and with indices sorted in the canonical lexicographic order.
+//
+// This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost
+// logical submatrix with shape `[B, C]`, but with the catch that *the implicitly
+// zero elements do not participate*.  Specifically, the algorithm is equivalent
+// to the following:
+//
+//   (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix
+//       with shape `[B, C]`, along the size-C dimension;
+//   (2) Masks out the original implicitly-zero locations;
+//   (3) Renormalizes the remaining elements.
+//
+// Hence, the `SparseTensor` result has exactly the same non-zero indices and
+// shape.
+//
+// Arguments:
+//	sp_indices: 2-D.  `NNZ x R` matrix with the indices of non-empty values in a
+// SparseTensor, in canonical ordering.
+//	sp_values: 1-D.  `NNZ` non-empty values corresponding to `sp_indices`.
+//	sp_shape: 1-D.  Shape of the input SparseTensor.
+//
+// Returns 1-D.  The `NNZ` values for the result `SparseTensor`.
+func SparseSoftmax(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "SparseSoftmax",
+		Input: []tf.Input{
+			sp_indices, sp_values, sp_shape,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// LoadTPUEmbeddingProximalAdagradParametersAttr is an optional argument to LoadTPUEmbeddingProximalAdagradParameters.
+type LoadTPUEmbeddingProximalAdagradParametersAttr func(optionalAttr)
+
+// LoadTPUEmbeddingProximalAdagradParametersTableId sets the optional table_id attribute to value.
+// If not specified, defaults to -1
+//
+// REQUIRES: value >= -1
+func LoadTPUEmbeddingProximalAdagradParametersTableId(value int64) LoadTPUEmbeddingProximalAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["table_id"] = value
+	}
+}
+
+// LoadTPUEmbeddingProximalAdagradParametersTableName sets the optional table_name attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingProximalAdagradParametersTableName(value string) LoadTPUEmbeddingProximalAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["table_name"] = value
+	}
+}
+
+// LoadTPUEmbeddingProximalAdagradParametersConfig sets the optional config attribute to value.
+// If not specified, defaults to ""
+func LoadTPUEmbeddingProximalAdagradParametersConfig(value string) LoadTPUEmbeddingProximalAdagradParametersAttr {
+	return func(m optionalAttr) {
+		m["config"] = value
+	}
+}
+
+// Load proximal Adagrad embedding parameters.
+//
+// An op that loads optimization parameters into HBM for embedding. Must be
+// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
+// embedding table configuration. For example, this op is used to install
+// parameters that are loaded from a checkpoint before a training loop is
+// executed.
+//
+// Arguments:
+//	parameters: Value of parameters used in the proximal Adagrad optimization algorithm.
+//	accumulators: Value of accumulators used in the proximal Adagrad optimization algorithm.
+//
+//
+//
+// Returns the created operation.
+func LoadTPUEmbeddingProximalAdagradParameters(scope *Scope, parameters tf.Output, accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingProximalAdagradParametersAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "LoadTPUEmbeddingProximalAdagradParameters",
+		Input: []tf.Input{
+			parameters, accumulators,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// ResourceSparseApplyAdagradDAAttr is an optional argument to ResourceSparseApplyAdagradDA.
+type ResourceSparseApplyAdagradDAAttr func(optionalAttr)
+
+// ResourceSparseApplyAdagradDAUseLocking sets the optional use_locking attribute to value.
+//
+// value: If True, updating of the var and accum tensors will be protected by
+// a lock; otherwise the behavior is undefined, but may exhibit less contention.
+// If not specified, defaults to false
+func ResourceSparseApplyAdagradDAUseLocking(value bool) ResourceSparseApplyAdagradDAAttr {
+	return func(m optionalAttr) {
+		m["use_locking"] = value
+	}
+}
+
+// Update entries in '*var' and '*accum' according to the proximal adagrad scheme.
+//
+// Arguments:
+//	var_: Should be from a Variable().
+//	gradient_accumulator: Should be from a Variable().
+//	gradient_squared_accumulator: Should be from a Variable().
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
+//	lr: Learning rate. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	global_step: Training step number. Must be a scalar.
+//
+// Returns the created operation.
+func ResourceSparseApplyAdagradDA(scope *Scope, var_ tf.Output, gradient_accumulator tf.Output, gradient_squared_accumulator tf.Output, grad tf.Output, indices tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, global_step tf.Output, optional ...ResourceSparseApplyAdagradDAAttr) (o *tf.Operation) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "ResourceSparseApplyAdagradDA",
+		Input: []tf.Input{
+			var_, gradient_accumulator, gradient_squared_accumulator, grad, indices, lr, l1, l2, global_step,
+		},
+		Attrs: attrs,
+	}
+	return scope.AddOperation(opspec)
+}
+
+// Computes hyperbolic cosine of x element-wise.
+//
+//   Given an input tensor, this function computes hyperbolic cosine of every
+//   element in the tensor. Input range is `[-inf, inf]` and output range
+//   is `[1, inf]`.
+//
+//   ```python
+//   x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 2, 10, float("inf")])
+//   tf.math.cosh(x) ==> [inf 4.0515420e+03 1.1276259e+00 1.5430807e+00 1.8106556e+00 3.7621956e+00 1.1013233e+04 inf]
+//   ```
+func Cosh(scope *Scope, x tf.Output) (y tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	opspec := tf.OpSpec{
+		Type: "Cosh",
+		Input: []tf.Input{
+			x,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// CollectiveReduceAttr is an optional argument to CollectiveReduce.
+type CollectiveReduceAttr func(optionalAttr)
+
+// CollectiveReduceWaitFor sets the optional wait_for attribute to value.
+// If not specified, defaults to {}
+func CollectiveReduceWaitFor(value []int64) CollectiveReduceAttr {
+	return func(m optionalAttr) {
+		m["wait_for"] = value
+	}
+}
+
+// CollectiveReduceCommunicationHint sets the optional communication_hint attribute to value.
+// If not specified, defaults to "auto"
+func CollectiveReduceCommunicationHint(value string) CollectiveReduceAttr {
+	return func(m optionalAttr) {
+		m["communication_hint"] = value
+	}
+}
+
+// Mutually reduces multiple tensors of identical type and shape.
+func CollectiveReduce(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, merge_op string, final_op string, subdiv_offsets []int64, optional ...CollectiveReduceAttr) (data tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "merge_op": merge_op, "final_op": final_op, "subdiv_offsets": subdiv_offsets}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CollectiveReduce",
+		Input: []tf.Input{
+			input,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// CudnnRNNAttr is an optional argument to CudnnRNN.
+type CudnnRNNAttr func(optionalAttr)
+
+// CudnnRNNRnnMode sets the optional rnn_mode attribute to value.
+// If not specified, defaults to "lstm"
+func CudnnRNNRnnMode(value string) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["rnn_mode"] = value
+	}
+}
+
+// CudnnRNNInputMode sets the optional input_mode attribute to value.
+// If not specified, defaults to "linear_input"
+func CudnnRNNInputMode(value string) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["input_mode"] = value
+	}
+}
+
+// CudnnRNNDirection sets the optional direction attribute to value.
+// If not specified, defaults to "unidirectional"
+func CudnnRNNDirection(value string) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["direction"] = value
+	}
+}
+
+// CudnnRNNDropout sets the optional dropout attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNDropout(value float32) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["dropout"] = value
+	}
+}
+
+// CudnnRNNSeed sets the optional seed attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNSeed(value int64) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// CudnnRNNSeed2 sets the optional seed2 attribute to value.
+// If not specified, defaults to 0
+func CudnnRNNSeed2(value int64) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// CudnnRNNIsTraining sets the optional is_training attribute to value.
+// If not specified, defaults to true
+func CudnnRNNIsTraining(value bool) CudnnRNNAttr {
+	return func(m optionalAttr) {
+		m["is_training"] = value
+	}
+}
+
+// A RNN backed by cuDNN.
+//
+// Computes the RNN from the input and initial states, with respect to the params
+// buffer.
+//
+// rnn_mode: Indicates the type of the RNN model.
+// input_mode: Indicate whether there is a linear projection between the input and
+//   the actual computation before the first layer. 'skip_input' is only allowed
+//   when input_size == num_units; 'auto_select' implies 'skip_input' when
+//   input_size == num_units; otherwise, it implies 'linear_input'.
+// direction: Indicates whether a bidirectional model will be used. Should be
+//   "unidirectional" or "bidirectional".
+// dropout: Dropout probability. When set to 0., dropout is disabled.
+// seed: The 1st part of a seed to initialize dropout.
+// seed2: The 2nd part of a seed to initialize dropout.
+// input: A 3-D tensor with the shape of [seq_length, batch_size, input_size].
+// input_h: A 3-D tensor with the shape of [num_layer * dir, batch_size,
+//     num_units].
+// input_c: For LSTM, a 3-D tensor with the shape of
+//     [num_layer * dir, batch, num_units]. For other models, it is ignored.
+// params: A 1-D tensor that contains the weights and biases in an opaque layout.
+//     The size must be created through CudnnRNNParamsSize, and initialized
+//     separately. Note that they might not be compatible across different
+//     generations. So it is a good idea to save and restore
+// output: A 3-D tensor with the shape of [seq_length, batch_size,
+//     dir * num_units].
+// output_h: The same shape has input_h.
+// output_c: The same shape as input_c for LSTM. An empty tensor for other models.
+// is_training: Indicates whether this operation is used for inferenece or
+//   training.
+// reserve_space: An opaque tensor that can be used in backprop calculation. It
+//   is only produced if is_training is false.
+func CudnnRNN(scope *Scope, input tf.Output, input_h tf.Output, input_c tf.Output, params tf.Output, optional ...CudnnRNNAttr) (output tf.Output, output_h tf.Output, output_c tf.Output, reserve_space tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{}
+	for _, a := range optional {
+		a(attrs)
+	}
+	opspec := tf.OpSpec{
+		Type: "CudnnRNN",
+		Input: []tf.Input{
+			input, input_h, input_c, params,
+		},
+		Attrs: attrs,
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
+}
+
+// Creates a dataset that batches `batch_size` elements from `input_dataset`.
+//
+// Arguments:
+//
+//	batch_size: A scalar representing the number of elements to accumulate in a
+// batch.
+//
+//
+func BatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
+	opspec := tf.OpSpec{
+		Type: "BatchDataset",
+		Input: []tf.Input{
+			input_dataset, batch_size,
+		},
+		Attrs: attrs,
+	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
@@ -38950,7 +47199,7 @@ func EnqueueTPUEmbeddingSparseTensorBatchDeviceOrdinal(value int64) EnqueueTPUEm
 // the sum of the weights be 0 for 'mean' or the sum of the squared weights be
 // 0 for 'sqrtn'. If combiners isn't passed, the default is to use 'sum' for
 // all tables.
-// If not specified, defaults to <>
+// If not specified, defaults to {}
 func EnqueueTPUEmbeddingSparseTensorBatchCombiners(value []string) EnqueueTPUEmbeddingSparseTensorBatchAttr {
 	return func(m optionalAttr) {
 		m["combiners"] = value
@@ -38958,7 +47207,7 @@ func EnqueueTPUEmbeddingSparseTensorBatchCombiners(value []string) EnqueueTPUEmb
 }
 
 // EnqueueTPUEmbeddingSparseTensorBatchMaxSequenceLengths sets the optional max_sequence_lengths attribute to value.
-// If not specified, defaults to <>
+// If not specified, defaults to {}
 func EnqueueTPUEmbeddingSparseTensorBatchMaxSequenceLengths(value []int64) EnqueueTPUEmbeddingSparseTensorBatchAttr {
 	return func(m optionalAttr) {
 		m["max_sequence_lengths"] = value
@@ -39014,934 +47263,45 @@ func EnqueueTPUEmbeddingSparseTensorBatch(scope *Scope, sample_indices []tf.Outp
 	return scope.AddOperation(opspec)
 }
 
-// MatrixInverseAttr is an optional argument to MatrixInverse.
-type MatrixInverseAttr func(optionalAttr)
+// ResourceSparseApplyAdagradV2Attr is an optional argument to ResourceSparseApplyAdagradV2.
+type ResourceSparseApplyAdagradV2Attr func(optionalAttr)
 
-// MatrixInverseAdjoint sets the optional adjoint attribute to value.
+// ResourceSparseApplyAdagradV2UseLocking sets the optional use_locking attribute to value.
+//
+// value: If `True`, updating of the var and accum tensors will be protected
+// by a lock; otherwise the behavior is undefined, but may exhibit less
+// contention.
 // If not specified, defaults to false
-func MatrixInverseAdjoint(value bool) MatrixInverseAttr {
-	return func(m optionalAttr) {
-		m["adjoint"] = value
-	}
-}
-
-// Computes the inverse of one or more square invertible matrices or their
-//
-// adjoints (conjugate transposes).
-//
-// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-// form square matrices. The output is a tensor of the same shape as the input
-// containing the inverse for all input submatrices `[..., :, :]`.
-//
-// The op uses LU decomposition with partial pivoting to compute the inverses.
-//
-// If a matrix is not invertible there is no guarantee what the op does. It
-// may detect the condition and raise an exception or it may simply return a
-// garbage result.
-//
-// Arguments:
-//	input: Shape is `[..., M, M]`.
-//
-// Returns Shape is `[..., M, M]`.
-//
-// @compatibility(numpy)
-// Equivalent to np.linalg.inv
-// @end_compatibility
-func MatrixInverse(scope *Scope, input tf.Output, optional ...MatrixInverseAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MatrixInverse",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Deprecated, use python implementation tf.linalg.matrix_exponential.
-//
-// DEPRECATED at GraphDef version 27: Use Python implementation tf.linalg.matrix_exponential instead.
-func MatrixExponential(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MatrixExponential",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Computes the Eigen Decomposition of a batch of square self-adjoint matrices.
-//
-// DEPRECATED at GraphDef version 11: Use SelfAdjointEigV2 instead.
-//
-// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-// form square matrices, with the same constraints as the single matrix
-// SelfAdjointEig.
-//
-// The result is a [..., M+1, M] matrix with [..., 0,:] containing the
-// eigenvalues, and subsequent [...,1:, :] containing the eigenvectors. The eigenvalues
-// are sorted in non-decreasing order.
-//
-// Arguments:
-//	input: Shape is `[..., M, M]`.
-//
-// Returns Shape is `[..., M+1, M]`.
-func SelfAdjointEig(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SelfAdjointEig",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// FusedBatchNormGradV3Attr is an optional argument to FusedBatchNormGradV3.
-type FusedBatchNormGradV3Attr func(optionalAttr)
-
-// FusedBatchNormGradV3Epsilon sets the optional epsilon attribute to value.
-//
-// value: A small float number added to the variance of x.
-// If not specified, defaults to 0.0001
-func FusedBatchNormGradV3Epsilon(value float32) FusedBatchNormGradV3Attr {
-	return func(m optionalAttr) {
-		m["epsilon"] = value
-	}
-}
-
-// FusedBatchNormGradV3DataFormat sets the optional data_format attribute to value.
-//
-// value: The data format for y_backprop, x, x_backprop.
-// Either "NHWC" (default) or "NCHW".
-// If not specified, defaults to "NHWC"
-func FusedBatchNormGradV3DataFormat(value string) FusedBatchNormGradV3Attr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// FusedBatchNormGradV3IsTraining sets the optional is_training attribute to value.
-//
-// value: A bool value to indicate the operation is for training (default)
-// or inference.
-// If not specified, defaults to true
-func FusedBatchNormGradV3IsTraining(value bool) FusedBatchNormGradV3Attr {
-	return func(m optionalAttr) {
-		m["is_training"] = value
-	}
-}
-
-// Gradient for batch normalization.
-//
-// Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
-// The size of 1D Tensors matches the dimension C of the 4D Tensors.
-//
-// Arguments:
-//	y_backprop: A 4D Tensor for the gradient with respect to y.
-//	x: A 4D Tensor for input data.
-//	scale: A 1D Tensor for scaling factor, to scale the normalized x.
-//	reserve_space_1: When is_training is True, a 1D Tensor for the computed batch
-// mean to be reused in gradient computation. When is_training is
-// False, a 1D Tensor for the population mean to be reused in both
-// 1st and 2nd order gradient computation.
-//	reserve_space_2: When is_training is True, a 1D Tensor for the computed batch
-// variance (inverted variance in the cuDNN case) to be reused in
-// gradient computation. When is_training is False, a 1D Tensor
-// for the population variance to be reused in both 1st and 2nd
-// order gradient computation.
-//	reserve_space_3: When is_training is True, a 1D Tensor for some intermediate results to be reused
-// in gradient computation. When is_training is False, a dummy empty Tensor will be
-// created.
-//
-// Returns A 4D Tensor for the gradient with respect to x.A 1D Tensor for the gradient with respect to scale.A 1D Tensor for the gradient with respect to offset.Unused placeholder to match the mean input in FusedBatchNorm.Unused placeholder to match the variance input
-// in FusedBatchNorm.
-func FusedBatchNormGradV3(scope *Scope, y_backprop tf.Output, x tf.Output, scale tf.Output, reserve_space_1 tf.Output, reserve_space_2 tf.Output, reserve_space_3 tf.Output, optional ...FusedBatchNormGradV3Attr) (x_backprop tf.Output, scale_backprop tf.Output, offset_backprop tf.Output, reserve_space_4 tf.Output, reserve_space_5 tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FusedBatchNormGradV3",
-		Input: []tf.Input{
-			y_backprop, x, scale, reserve_space_1, reserve_space_2, reserve_space_3,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4)
-}
-
-// MatrixTriangularSolveAttr is an optional argument to MatrixTriangularSolve.
-type MatrixTriangularSolveAttr func(optionalAttr)
-
-// MatrixTriangularSolveLower sets the optional lower attribute to value.
-//
-// value: Boolean indicating whether the innermost matrices in `matrix` are
-// lower or upper triangular.
-// If not specified, defaults to true
-func MatrixTriangularSolveLower(value bool) MatrixTriangularSolveAttr {
-	return func(m optionalAttr) {
-		m["lower"] = value
-	}
-}
-
-// MatrixTriangularSolveAdjoint sets the optional adjoint attribute to value.
-//
-// value: Boolean indicating whether to solve with `matrix` or its (block-wise)
-//          adjoint.
-//
-// @compatibility(numpy)
-// Equivalent to scipy.linalg.solve_triangular
-// @end_compatibility
-// If not specified, defaults to false
-func MatrixTriangularSolveAdjoint(value bool) MatrixTriangularSolveAttr {
-	return func(m optionalAttr) {
-		m["adjoint"] = value
-	}
-}
-
-// Solves systems of linear equations with upper or lower triangular matrices by backsubstitution.
-//
-//
-// `matrix` is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions form
-// square matrices. If `lower` is `True` then the strictly upper triangular part
-// of each inner-most matrix is assumed to be zero and not accessed.
-// If `lower` is False then the strictly lower triangular part of each inner-most
-// matrix is assumed to be zero and not accessed.
-// `rhs` is a tensor of shape `[..., M, K]`.
-//
-// The output is a tensor of shape `[..., M, K]`. If `adjoint` is
-// `True` then the innermost matrices in `output` satisfy matrix equations
-// `matrix[..., :, :] * output[..., :, :] = rhs[..., :, :]`.
-// If `adjoint` is `False` then the strictly then the  innermost matrices in
-// `output` satisfy matrix equations
-// `adjoint(matrix[..., i, k]) * output[..., k, j] = rhs[..., i, j]`.
-//
-// Example:
-// ```python
-//
-// a = tf.constant([[3,  0,  0,  0],
-//                  [2,  1,  0,  0],
-//                  [1,  0,  1,  0],
-//                  [1,  1,  1,  1]], dtype=tf.float32)
-//
-// b = tf.constant([[4],
-//                  [2],
-//                  [4],
-//                  [2]], dtype=tf.float32)
-//
-// x = tf.linalg.triangular_solve(a, b, lower=True)
-// x
-// # <tf.Tensor: id=257, shape=(4, 1), dtype=float32, numpy=
-// # array([[ 1.3333334 ],
-// #        [-0.66666675],
-// #        [ 2.6666665 ],
-// #        [-1.3333331 ]], dtype=float32)>
-//
-// # in python3 one can use `a@x`
-// tf.matmul(a, x)
-// # <tf.Tensor: id=263, shape=(4, 1), dtype=float32, numpy=
-// # array([[4.       ],
-// #        [2.       ],
-// #        [4.       ],
-// #        [1.9999999]], dtype=float32)>
-// ```
-//
-// Arguments:
-//	matrix: Shape is `[..., M, M]`.
-//	rhs: Shape is `[..., M, K]`.
-//
-// Returns Shape is `[..., M, K]`.
-func MatrixTriangularSolve(scope *Scope, matrix tf.Output, rhs tf.Output, optional ...MatrixTriangularSolveAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MatrixTriangularSolve",
-		Input: []tf.Input{
-			matrix, rhs,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// UnpackAttr is an optional argument to Unpack.
-type UnpackAttr func(optionalAttr)
-
-// UnpackAxis sets the optional axis attribute to value.
-//
-// value: Dimension along which to unpack.  Negative values wrap around, so the
-// valid range is `[-R, R)`.
-// If not specified, defaults to 0
-func UnpackAxis(value int64) UnpackAttr {
-	return func(m optionalAttr) {
-		m["axis"] = value
-	}
-}
-
-// Unpacks a given dimension of a rank-`R` tensor into `num` rank-`(R-1)` tensors.
-//
-// Unpacks `num` tensors from `value` by chipping it along the `axis` dimension.
-// For example, given a tensor of shape `(A, B, C, D)`;
-//
-// If `axis == 0` then the i'th tensor in `output` is the slice `value[i, :, :, :]`
-//   and each tensor in `output` will have shape `(B, C, D)`. (Note that the
-//   dimension unpacked along is gone, unlike `split`).
-//
-// If `axis == 1` then the i'th tensor in `output` is the slice `value[:, i, :, :]`
-//   and each tensor in `output` will have shape `(A, C, D)`.
-// Etc.
-//
-// This is the opposite of `pack`.
-//
-// Arguments:
-//	value: 1-D or higher, with `axis` dimension size equal to `num`.
-//
-//
-// Returns The list of tensors unpacked from `value`.
-func Unpack(scope *Scope, value tf.Output, num int64, optional ...UnpackAttr) (output []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num": num}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Unpack",
-		Input: []tf.Input{
-			value,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if output, idx, err = makeOutputList(op, idx, "output"); err != nil {
-		scope.UpdateErr("Unpack", err)
-		return
-	}
-	return output
-}
-
-// Computes the matrix square root of one or more square matrices:
-//
-// matmul(sqrtm(A), sqrtm(A)) = A
-//
-// The input matrix should be invertible. If the input matrix is real, it should
-// have no eigenvalues which are real and negative (pairs of complex conjugate
-// eigenvalues are allowed).
-//
-// The matrix square root is computed by first reducing the matrix to
-// quasi-triangular form with the real Schur decomposition. The square root
-// of the quasi-triangular matrix is then computed directly. Details of
-// the algorithm can be found in: Nicholas J. Higham, "Computing real
-// square roots of a real matrix", Linear Algebra Appl., 1987.
-//
-// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-// form square matrices. The output is a tensor of the same shape as the input
-// containing the matrix square root for all input submatrices `[..., :, :]`.
-//
-// Arguments:
-//	input: Shape is `[..., M, M]`.
-//
-// Returns Shape is `[..., M, M]`.
-//
-// @compatibility(scipy)
-// Equivalent to scipy.linalg.sqrtm
-// @end_compatibility
-func MatrixSquareRoot(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MatrixSquareRoot",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// DepthwiseConv2dNativeBackpropInputAttr is an optional argument to DepthwiseConv2dNativeBackpropInput.
-type DepthwiseConv2dNativeBackpropInputAttr func(optionalAttr)
-
-// DepthwiseConv2dNativeBackpropInputDataFormat sets the optional data_format attribute to value.
-//
-// value: Specify the data format of the input and output data. With the
-// default format "NHWC", the data is stored in the order of:
-//     [batch, height, width, channels].
-// Alternatively, the format could be "NCHW", the data storage order of:
-//     [batch, channels, height, width].
-// If not specified, defaults to "NHWC"
-func DepthwiseConv2dNativeBackpropInputDataFormat(value string) DepthwiseConv2dNativeBackpropInputAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// DepthwiseConv2dNativeBackpropInputDilations sets the optional dilations attribute to value.
-//
-// value: 1-D tensor of length 4.  The dilation factor for each dimension of
-// `input`. If set to k > 1, there will be k-1 skipped cells between each filter
-// element on that dimension. The dimension order is determined by the value of
-// `data_format`, see above for details. Dilations in the batch and depth
-// dimensions must be 1.
-// If not specified, defaults to <i:1 i:1 i:1 i:1 >
-func DepthwiseConv2dNativeBackpropInputDilations(value []int64) DepthwiseConv2dNativeBackpropInputAttr {
-	return func(m optionalAttr) {
-		m["dilations"] = value
-	}
-}
-
-// Computes the gradients of depthwise convolution with respect to the input.
-//
-// Arguments:
-//	input_sizes: An integer vector representing the shape of `input`, based
-// on `data_format`.  For example, if `data_format` is 'NHWC' then
-//  `input` is a 4-D `[batch, height, width, channels]` tensor.
-//	filter: 4-D with shape
-// `[filter_height, filter_width, in_channels, depthwise_multiplier]`.
-//	out_backprop: 4-D with shape  based on `data_format`.
-// For example, if `data_format` is 'NHWC' then
-// out_backprop shape is `[batch, out_height, out_width, out_channels]`.
-// Gradients w.r.t. the output of the convolution.
-//	strides: The stride of the sliding window for each dimension of the input
-// of the convolution.
-//	padding: The type of padding algorithm to use.
-//
-// Returns 4-D with shape according to `data_format`.  For example, if
-// `data_format` is 'NHWC', output shape is `[batch, in_height,
-// in_width, in_channels]`.  Gradient w.r.t. the input of the
-// convolution.
-func DepthwiseConv2dNativeBackpropInput(scope *Scope, input_sizes tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string, optional ...DepthwiseConv2dNativeBackpropInputAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "DepthwiseConv2dNativeBackpropInput",
-		Input: []tf.Input{
-			input_sizes, filter, out_backprop,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Batch normalization.
-//
-// DEPRECATED at GraphDef version 9: Use tf.nn.batch_normalization()
-//
-// This op is deprecated. Prefer `tf.nn.batch_normalization`.
-//
-// Arguments:
-//	t: A 4D input Tensor.
-//	m: A 1D mean Tensor with size matching the last dimension of t.
-// This is the first output from tf.nn.moments,
-// or a saved moving average thereof.
-//	v: A 1D variance Tensor with size matching the last dimension of t.
-// This is the second output from tf.nn.moments,
-// or a saved moving average thereof.
-//	beta: A 1D beta Tensor with size matching the last dimension of t.
-// An offset to be added to the normalized tensor.
-//	gamma: A 1D gamma Tensor with size matching the last dimension of t.
-// If "scale_after_normalization" is true, this tensor will be multiplied
-// with the normalized tensor.
-//	variance_epsilon: A small float number to avoid dividing by 0.
-//	scale_after_normalization: A bool indicating whether the resulted tensor
-// needs to be multiplied with gamma.
-func BatchNormWithGlobalNormalization(scope *Scope, t tf.Output, m tf.Output, v tf.Output, beta tf.Output, gamma tf.Output, variance_epsilon float32, scale_after_normalization bool) (result tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"variance_epsilon": variance_epsilon, "scale_after_normalization": scale_after_normalization}
-	opspec := tf.OpSpec{
-		Type: "BatchNormWithGlobalNormalization",
-		Input: []tf.Input{
-			t, m, v, beta, gamma,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceGatherAttr is an optional argument to ResourceGather.
-type ResourceGatherAttr func(optionalAttr)
-
-// ResourceGatherBatchDims sets the optional batch_dims attribute to value.
-// If not specified, defaults to 0
-func ResourceGatherBatchDims(value int64) ResourceGatherAttr {
-	return func(m optionalAttr) {
-		m["batch_dims"] = value
-	}
-}
-
-// ResourceGatherValidateIndices sets the optional validate_indices attribute to value.
-// If not specified, defaults to true
-func ResourceGatherValidateIndices(value bool) ResourceGatherAttr {
-	return func(m optionalAttr) {
-		m["validate_indices"] = value
-	}
-}
-
-// Gather slices from the variable pointed to by `resource` according to `indices`.
-//
-// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
-// Produces an output tensor with shape `indices.shape + params.shape[1:]` where:
-//
-// ```python
-//     # Scalar indices
-//     output[:, ..., :] = params[indices, :, ... :]
-//
-//     # Vector indices
-//     output[i, :, ..., :] = params[indices[i], :, ... :]
-//
-//     # Higher rank indices
-//     output[i, ..., j, :, ... :] = params[indices[i, ..., j], :, ..., :]
-// ```
-func ResourceGather(scope *Scope, resource tf.Output, indices tf.Output, dtype tf.DataType, optional ...ResourceGatherAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceGather",
-		Input: []tf.Input{
-			resource, indices,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// SvdAttr is an optional argument to Svd.
-type SvdAttr func(optionalAttr)
-
-// SvdComputeUv sets the optional compute_uv attribute to value.
-//
-// value: If true, left and right singular vectors will be
-// computed and returned in `u` and `v`, respectively.
-// If false, `u` and `v` are not set and should never referenced.
-// If not specified, defaults to true
-func SvdComputeUv(value bool) SvdAttr {
-	return func(m optionalAttr) {
-		m["compute_uv"] = value
-	}
-}
-
-// SvdFullMatrices sets the optional full_matrices attribute to value.
-//
-// value: If true, compute full-sized `u` and `v`. If false
-// (the default), compute only the leading `P` singular vectors.
-// Ignored if `compute_uv` is `False`.
-// If not specified, defaults to false
-func SvdFullMatrices(value bool) SvdAttr {
-	return func(m optionalAttr) {
-		m["full_matrices"] = value
-	}
-}
-
-// Computes the singular value decompositions of one or more matrices.
-//
-// Computes the SVD of each inner matrix in `input` such that
-// `input[..., :, :] = u[..., :, :] * diag(s[..., :, :]) * transpose(v[..., :, :])`
-//
-// ```python
-// # a is a tensor containing a batch of matrices.
-// # s is a tensor of singular values for each matrix.
-// # u is the tensor containing of left singular vectors for each matrix.
-// # v is the tensor containing of right singular vectors for each matrix.
-// s, u, v = svd(a)
-// s, _, _ = svd(a, compute_uv=False)
-// ```
-//
-// Arguments:
-//	input: A tensor of shape `[..., M, N]` whose inner-most 2 dimensions
-// form matrices of size `[M, N]`. Let `P` be the minimum of `M` and `N`.
-//
-// Returns Singular values. Shape is `[..., P]`.Left singular vectors. If `full_matrices` is `False` then shape is
-// `[..., M, P]`; if `full_matrices` is `True` then shape is
-// `[..., M, M]`. Undefined if `compute_uv` is `False`.Left singular vectors. If `full_matrices` is `False` then shape is
-// `[..., N, P]`. If `full_matrices` is `True` then shape is `[..., N, N]`.
-// Undefined if `compute_uv` is false.
-func Svd(scope *Scope, input tf.Output, optional ...SvdAttr) (s tf.Output, u tf.Output, v tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Svd",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Check if the input matches the regex pattern.
-//
-// The input is a string tensor of any shape. The pattern is a scalar
-// string tensor which is applied to every element of the input tensor.
-// The boolean values (True or False) of the output tensor indicate
-// if the input matches the regex pattern provided.
-//
-// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
-//
-// Arguments:
-//	input: A string tensor of the text to be processed.
-//	pattern: A scalar string tensor containing the regular expression to match the input.
-//
-// Returns A bool tensor with the same shape as `input`.
-func RegexFullMatch(scope *Scope, input tf.Output, pattern tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "RegexFullMatch",
-		Input: []tf.Input{
-			input, pattern,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Tensor contraction according to Einstein summation convention.
-//
-// Implements generalized Tensor contraction and reduction. Each input Tensor must
-// have a corresponding input subscript appearing in the comma-separated left-hand
-// side of the equation. The right-hand side of the equation consists of the
-// output subscript. The input subscripts and the output subscript should consist
-// of zero or more named axis labels and at most one ellipsis (`...`).
-//
-// The named axis labels may be any single character other than those having
-// special meaning, namely `,.->`. The behavior of this Op is undefined if it
-// receives an ill-formatted equation; since the validation is done at
-// graph-building time, we omit format validation checks at runtime.
-//
-// Note: This Op is *not* intended to be called by the user; instead users should
-// call `tf.einsum` directly. It is a hidden Op used by `tf.einsum`.
-//
-// Operations are applied to the input(s) according to the following rules:
-//
-//  (a) Generalized Diagonals: For input dimensions corresponding to axis labels
-//      appearing more than once in the same input subscript, we take the
-//      generalized (`k`-dimensional) diagonal.
-//      For example, in the equation `iii->i` with input shape `[3, 3, 3]`, the
-//      generalized diagonal would consist of `3` elements at indices `(0, 0, 0)`,
-//      `(1, 1, 1)` and `(2, 2, 2)` to create a Tensor of shape `[3]`.
-//
-//  (b) Reduction: Axes corresponding to labels appearing only in one input
-//      subscript but not in the output subscript are summed over prior to Tensor
-//      contraction.
-//      For example, in the equation `ab,bc->b`, the axis labels `a` and `c` are
-//      the reduction axis labels.
-//
-//  (c) Batch Dimensions: Axes corresponding to labels appearing in each of the
-//      input subscripts and also in the output subscript make up the batch
-//      dimensions in Tensor contraction. Unnamed axis labels corresponding to
-//      ellipsis (`...`) also correspond to batch dimensions.
-//      For example, for the equation denoting batch matrix multiplication,
-//      `bij,bjk->bik`, the axis label `b` corresponds to a batch dimension.
-//
-//  (d) Contraction: In case of binary einsum, axes corresponding to labels
-//      appearing in two different inputs (and not in the output) are contracted
-//      against each other.
-//      Considering the batch matrix multiplication equation again
-//      (`bij,bjk->bik`), the contracted axis label is `j`.
-//
-//  (e) Expand Diagonal: If the output subcripts contain repeated (explicit) axis
-//      labels, the opposite operation of (a) is applied. For example, in the
-//      equation `i->iii`, and input shape `[3]`, the output of shape `[3, 3, 3]`
-//      are all zeros, except for the (generalized) diagonal which is populated
-//      with values from the input.
-//      Note: This operation is not supported by `np.einsum` or `tf.einsum`; it is
-//      provided to enable computing the symbolic gradient of `tf.einsum`.
-//
-// The output subcripts must contain only labels appearing in at least one of the
-// input subscripts. Furthermore, all dimensions mapping to the same axis label
-// must be equal.
-//
-// Any of the input and output subscripts may contain at most a single ellipsis
-// (`...`). These ellipsis are mapped against dimensions not corresponding to any
-// named axis label. If two inputs contain ellipsis, then they are broadcasted
-// according to standard NumPy broadcasting
-// [rules](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html).
-//
-// The broadcasted dimensions are placed in the corresponding location of the
-// ellipsis in the output subscript. If the broadcasted dimensions are non-empty
-// and the output subcripts do not contain ellipsis, then an InvalidArgument error
-// is raised.
-//
-// @compatibility(numpy)
-// Similar to [`numpy.einsum`](https://docs.scipy.org/doc/numpy/reference/generated/numpy.einsum.html).
-//
-// Comparison with `numpy.einsum`:
-//
-//  * This Op only supports unary and binary forms of `numpy.einsum`.
-//  * This Op does not support implicit form. (i.e. equations without `->`).
-//  * This Op also supports repeated indices in the output subscript, which is not
-//    supported by `numpy.einsum`.
-// @end_compatibility
-//
-//
-// Arguments:
-//	inputs: List of 1 or 2 Tensors.
-//	equation: String describing the Einstein Summation operation; in the format of np.einsum.
-//
-// Returns Output Tensor with shape depending upon `equation`.
-func Einsum(scope *Scope, inputs []tf.Output, equation string) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"equation": equation}
-	opspec := tf.OpSpec{
-		Type: "Einsum",
-		Input: []tf.Input{
-			tf.OutputList(inputs),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Reduces `input` from `num_devices` using `reduction` to a single device.
-//
-// Reduces `input` from `num_devices` using `reduction` to a single device.
-//
-// The graph should be constructed so that all inputs have a valid device
-// assignment, and the op itself is assigned one of these devices.
-//
-// input: The input to the reduction.
-// data: the value of the reduction across all `num_devices` devices.
-// reduction: the reduction operation to perform.
-func NcclReduce(scope *Scope, input []tf.Output, reduction string) (data tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"reduction": reduction}
-	opspec := tf.OpSpec{
-		Type: "NcclReduce",
-		Input: []tf.Input{
-			tf.OutputList(input),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// RecordInputAttr is an optional argument to RecordInput.
-type RecordInputAttr func(optionalAttr)
-
-// RecordInputFileRandomSeed sets the optional file_random_seed attribute to value.
-//
-// value: Random seeds used to produce randomized records.
-// If not specified, defaults to 301
-func RecordInputFileRandomSeed(value int64) RecordInputAttr {
-	return func(m optionalAttr) {
-		m["file_random_seed"] = value
-	}
-}
-
-// RecordInputFileShuffleShiftRatio sets the optional file_shuffle_shift_ratio attribute to value.
-//
-// value: Shifts the list of files after the list is randomly
-// shuffled.
-// If not specified, defaults to 0
-func RecordInputFileShuffleShiftRatio(value float32) RecordInputAttr {
-	return func(m optionalAttr) {
-		m["file_shuffle_shift_ratio"] = value
-	}
-}
-
-// RecordInputFileBufferSize sets the optional file_buffer_size attribute to value.
-//
-// value: The randomization shuffling buffer.
-// If not specified, defaults to 10000
-func RecordInputFileBufferSize(value int64) RecordInputAttr {
-	return func(m optionalAttr) {
-		m["file_buffer_size"] = value
-	}
-}
-
-// RecordInputFileParallelism sets the optional file_parallelism attribute to value.
-//
-// value: How many sstables are opened and concurrently iterated over.
-// If not specified, defaults to 16
-func RecordInputFileParallelism(value int64) RecordInputAttr {
-	return func(m optionalAttr) {
-		m["file_parallelism"] = value
-	}
-}
-
-// RecordInputBatchSize sets the optional batch_size attribute to value.
-//
-// value: The batch size.
-// If not specified, defaults to 32
-func RecordInputBatchSize(value int64) RecordInputAttr {
-	return func(m optionalAttr) {
-		m["batch_size"] = value
-	}
-}
-
-// RecordInputCompressionType sets the optional compression_type attribute to value.
-//
-// value: The type of compression for the file. Currently ZLIB and
-// GZIP are supported. Defaults to none.
-// If not specified, defaults to ""
-func RecordInputCompressionType(value string) RecordInputAttr {
-	return func(m optionalAttr) {
-		m["compression_type"] = value
-	}
-}
-
-// Emits randomized records.
-//
-// Arguments:
-//	file_pattern: Glob pattern for the data files.
-//
-// Returns A tensor of shape [batch_size].
-func RecordInput(scope *Scope, file_pattern string, optional ...RecordInputAttr) (records tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"file_pattern": file_pattern}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RecordInput",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Deprecated. Disallowed in GraphDef version >= 2.
-//
-// DEPRECATED at GraphDef version 2: Use AdjustContrastv2 instead
-func AdjustContrast(scope *Scope, images tf.Output, contrast_factor tf.Output, min_value tf.Output, max_value tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "AdjustContrast",
-		Input: []tf.Input{
-			images, contrast_factor, min_value, max_value,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResourceScatterNdSubAttr is an optional argument to ResourceScatterNdSub.
-type ResourceScatterNdSubAttr func(optionalAttr)
-
-// ResourceScatterNdSubUseLocking sets the optional use_locking attribute to value.
-//
-// value: An optional bool. Defaults to True. If True, the assignment will
-// be protected by a lock; otherwise the behavior is undefined,
-// but may exhibit less contention.
-// If not specified, defaults to true
-func ResourceScatterNdSubUseLocking(value bool) ResourceScatterNdSubAttr {
+func ResourceSparseApplyAdagradV2UseLocking(value bool) ResourceSparseApplyAdagradV2Attr {
 	return func(m optionalAttr) {
 		m["use_locking"] = value
 	}
 }
 
-// Applies sparse subtraction to individual values or slices in a Variable.
+// ResourceSparseApplyAdagradV2UpdateSlots sets the optional update_slots attribute to value.
+// If not specified, defaults to true
+func ResourceSparseApplyAdagradV2UpdateSlots(value bool) ResourceSparseApplyAdagradV2Attr {
+	return func(m optionalAttr) {
+		m["update_slots"] = value
+	}
+}
+
+// Update relevant entries in '*var' and '*accum' according to the adagrad scheme.
 //
-// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`.
-//
-// `indices` must be integer tensor, containing indices into `ref`.
-// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`.
-//
-// The innermost dimension of `indices` (with length `K`) corresponds to
-// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th
-// dimension of `ref`.
-//
-// `updates` is `Tensor` of rank `Q-1+P-K` with shape:
-//
-// ```
-// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]
-// ```
-//
-// For example, say we want to subtract 4 scattered elements from a rank-1 tensor
-// with 8 elements. In Python, that subtraction would look like this:
-//
-// ```python
-// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8], use_resource=True)
-// indices = tf.constant([[4], [3], [1], [7]])
-// updates = tf.constant([9, 10, 11, 12])
-// sub = tf.scatter_nd_sub(ref, indices, updates)
-// with tf.Session() as sess:
-//   print sess.run(sub)
-// ```
-//
-// The resulting update to ref would look like this:
-//
-//     [1, -9, 3, -6, -4, 6, 7, -4]
-//
-// See `tf.scatter_nd` for more details about how to make updates to
-// slices.
+// That is for rows we have grad for, we update var and accum as follows:
+// accum += grad * grad
+// var -= lr * grad * (1 / sqrt(accum))
 //
 // Arguments:
-//	ref: A resource handle. Must be from a VarHandleOp.
-//	indices: A Tensor. Must be one of the following types: int32, int64.
-// A tensor of indices into ref.
-//	updates: A Tensor. Must have the same type as ref. A tensor of
-// values to add to ref.
+//	var_: Should be from a Variable().
+//	accum: Should be from a Variable().
+//	lr: Learning rate. Must be a scalar.
+//	epsilon: Constant factor. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
 //
 // Returns the created operation.
-func ResourceScatterNdSub(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdSubAttr) (o *tf.Operation) {
+func ResourceSparseApplyAdagradV2(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, epsilon tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyAdagradV2Attr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -39950,808 +47310,15 @@ func ResourceScatterNdSub(scope *Scope, ref tf.Output, indices tf.Output, update
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResourceScatterNdSub",
+		Type: "ResourceSparseApplyAdagradV2",
 		Input: []tf.Input{
-			ref, indices, updates,
+			var_, accum, lr, epsilon, grad, indices,
 		},
 		Attrs: attrs,
 	}
 	return scope.AddOperation(opspec)
 }
 
-// SparseReduceSumSparseAttr is an optional argument to SparseReduceSumSparse.
-type SparseReduceSumSparseAttr func(optionalAttr)
-
-// SparseReduceSumSparseKeepDims sets the optional keep_dims attribute to value.
-//
-// value: If true, retain reduced dimensions with length 1.
-// If not specified, defaults to false
-func SparseReduceSumSparseKeepDims(value bool) SparseReduceSumSparseAttr {
-	return func(m optionalAttr) {
-		m["keep_dims"] = value
-	}
-}
-
-// Computes the sum of elements across dimensions of a SparseTensor.
-//
-// This Op takes a SparseTensor and is the sparse counterpart to
-// `tf.reduce_sum()`.  In contrast to SparseReduceSum, this Op returns a
-// SparseTensor.
-//
-// Reduces `sp_input` along the dimensions given in `reduction_axes`.  Unless
-// `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
-// `reduction_axes`. If `keep_dims` is true, the reduced dimensions are retained
-// with length 1.
-//
-// If `reduction_axes` has no entries, all dimensions are reduced, and a tensor
-// with a single element is returned.  Additionally, the axes can be negative,
-// which are interpreted according to the indexing rules in Python.
-//
-// Arguments:
-//	input_indices: 2-D.  `N x R` matrix with the indices of non-empty values in a
-// SparseTensor, possibly not in canonical ordering.
-//	input_values: 1-D.  `N` non-empty values corresponding to `input_indices`.
-//	input_shape: 1-D.  Shape of the input SparseTensor.
-//	reduction_axes: 1-D.  Length-`K` vector containing the reduction axes.
-func SparseReduceSumSparse(scope *Scope, input_indices tf.Output, input_values tf.Output, input_shape tf.Output, reduction_axes tf.Output, optional ...SparseReduceSumSparseAttr) (output_indices tf.Output, output_values tf.Output, output_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseReduceSumSparse",
-		Input: []tf.Input{
-			input_indices, input_values, input_shape, reduction_axes,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Check if the input matches the regex pattern.
-//
-// The input is a string tensor of any shape. The pattern is the
-// regular expression to be matched with every element of the input tensor.
-// The boolean values (True or False) of the output tensor indicate
-// if the input matches the regex pattern provided.
-//
-// The pattern follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)
-//
-// Arguments:
-//	input: A string tensor of the text to be processed.
-//	pattern: The regular expression to match the input.
-//
-// Returns A bool tensor with the same shape as `input`.
-func StaticRegexFullMatch(scope *Scope, input tf.Output, pattern string) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"pattern": pattern}
-	opspec := tf.OpSpec{
-		Type: "StaticRegexFullMatch",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// TextLineReaderV2Attr is an optional argument to TextLineReaderV2.
-type TextLineReaderV2Attr func(optionalAttr)
-
-// TextLineReaderV2SkipHeaderLines sets the optional skip_header_lines attribute to value.
-//
-// value: Number of lines to skip from the beginning of every file.
-// If not specified, defaults to 0
-func TextLineReaderV2SkipHeaderLines(value int64) TextLineReaderV2Attr {
-	return func(m optionalAttr) {
-		m["skip_header_lines"] = value
-	}
-}
-
-// TextLineReaderV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this reader is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func TextLineReaderV2Container(value string) TextLineReaderV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// TextLineReaderV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this reader is named in the given bucket
-// with this shared_name. Otherwise, the node name is used instead.
-// If not specified, defaults to ""
-func TextLineReaderV2SharedName(value string) TextLineReaderV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// A Reader that outputs the lines of a file delimited by '\n'.
-//
-// Returns The handle to reference the Reader.
-func TextLineReaderV2(scope *Scope, optional ...TextLineReaderV2Attr) (reader_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TextLineReaderV2",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Assigns sparse updates to the variable referenced by `resource`.
-//
-// This operation computes
-//
-//     # Scalar indices
-//     ref[indices, ...] = updates[...]
-//
-//     # Vector indices (for each i)
-//     ref[indices[i], ...] = updates[i, ...]
-//
-//     # High rank indices (for each i, ..., j)
-//     ref[indices[i, ..., j], ...] = updates[i, ..., j, ...]
-//
-// Arguments:
-//	resource: Should be from a `Variable` node.
-//	indices: A tensor of indices into the first dimension of `ref`.
-//	updates: A tensor of updated values to add to `ref`.
-//
-// Returns the created operation.
-func ResourceScatterUpdate(scope *Scope, resource tf.Output, indices tf.Output, updates tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ResourceScatterUpdate",
-		Input: []tf.Input{
-			resource, indices, updates,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Saves tensors in V2 checkpoint format.
-//
-// By default, saves the named tensors in full.  If the caller wishes to save
-// specific slices of full tensors, "shape_and_slices" should be non-empty strings
-// and correspondingly well-formed.
-//
-// Arguments:
-//	prefix: Must have a single element. The prefix of the V2 checkpoint to which we
-// write the tensors.
-//	tensor_names: shape {N}. The names of the tensors to be saved.
-//	shape_and_slices: shape {N}.  The slice specs of the tensors to be saved.
-// Empty strings indicate that they are non-partitioned tensors.
-//	tensors: `N` tensors to save.
-//
-// Returns the created operation.
-func SaveV2(scope *Scope, prefix tf.Output, tensor_names tf.Output, shape_and_slices tf.Output, tensors []tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SaveV2",
-		Input: []tf.Input{
-			prefix, tensor_names, shape_and_slices, tf.OutputList(tensors),
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// MaxPoolGradGradWithArgmaxAttr is an optional argument to MaxPoolGradGradWithArgmax.
-type MaxPoolGradGradWithArgmaxAttr func(optionalAttr)
-
-// MaxPoolGradGradWithArgmaxIncludeBatchInIndex sets the optional include_batch_in_index attribute to value.
-//
-// value: Whether to include batch dimension in flattened index of `argmax`.
-// If not specified, defaults to false
-func MaxPoolGradGradWithArgmaxIncludeBatchInIndex(value bool) MaxPoolGradGradWithArgmaxAttr {
-	return func(m optionalAttr) {
-		m["include_batch_in_index"] = value
-	}
-}
-
-// Computes second-order gradients of the maxpooling function.
-//
-// Arguments:
-//	input: The original input.
-//	grad: 4-D with shape `[batch, height, width, channels]`.  Gradients w.r.t. the
-// input of `max_pool`.
-//	argmax: The indices of the maximum values chosen for each output of `max_pool`.
-//	ksize: The size of the window for each dimension of the input tensor.
-//	strides: The stride of the sliding window for each dimension of the
-// input tensor.
-//	padding: The type of padding algorithm to use.
-//
-// Returns Gradients of gradients w.r.t. the input of `max_pool`.
-func MaxPoolGradGradWithArgmax(scope *Scope, input tf.Output, grad tf.Output, argmax tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradGradWithArgmaxAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MaxPoolGradGradWithArgmax",
-		Input: []tf.Input{
-			input, grad, argmax,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// LuAttr is an optional argument to Lu.
-type LuAttr func(optionalAttr)
-
-// LuOutputIdxType sets the optional output_idx_type attribute to value.
-// If not specified, defaults to DT_INT32
-func LuOutputIdxType(value tf.DataType) LuAttr {
-	return func(m optionalAttr) {
-		m["output_idx_type"] = value
-	}
-}
-
-// Computes the LU decomposition of one or more square matrices.
-//
-// The input is a tensor of shape `[..., M, M]` whose inner-most 2 dimensions
-// form square matrices.
-//
-// The input has to be invertible.
-//
-// The output consists of two tensors LU and P containing the LU decomposition
-// of all input submatrices `[..., :, :]`. LU encodes the lower triangular and
-// upper triangular factors.
-//
-// For each input submatrix of shape `[M, M]`, L is a lower triangular matrix of
-// shape `[M, M]` with unit diagonal whose entries correspond to the strictly lower
-// triangular part of LU. U is a upper triangular matrix of shape `[M, M]` whose
-// entries correspond to the upper triangular part, including the diagonal, of LU.
-//
-// P represents a permutation matrix encoded as a list of indices each between `0`
-// and `M-1`, inclusive. If P_mat denotes the permutation matrix corresponding to
-// P, then the L, U and P satisfies P_mat * input = L * U.
-//
-// Arguments:
-//	input: A tensor of shape `[..., M, M]` whose inner-most 2 dimensions form matrices of
-// size `[M, M]`.
-//
-// Returns A tensor of shape `[..., M, M]` whose strictly lower triangular part denotes the
-// lower triangular factor `L` with unit diagonal, and whose upper triangular part
-// denotes the upper triangular factor `U`.Permutation of the rows encoded as a list of indices in `0..M-1`. Shape is
-// `[..., M]`.
-// @compatibility(scipy)
-// Similar to `scipy.linalg.lu`, except the triangular factors `L` and `U` are
-// packed into a single tensor, the permutation is applied to `input` instead of
-// the right hand side and the permutation `P` is returned as a list of indices
-// instead of a permutation matrix.
-// @end_compatibility
-func Lu(scope *Scope, input tf.Output, optional ...LuAttr) (lu tf.Output, p tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Lu",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// RestoreSliceAttr is an optional argument to RestoreSlice.
-type RestoreSliceAttr func(optionalAttr)
-
-// RestoreSlicePreferredShard sets the optional preferred_shard attribute to value.
-//
-// value: Index of file to open first if multiple files match
-// `file_pattern`. See the documentation for `Restore`.
-// If not specified, defaults to -1
-func RestoreSlicePreferredShard(value int64) RestoreSliceAttr {
-	return func(m optionalAttr) {
-		m["preferred_shard"] = value
-	}
-}
-
-// Restores a tensor from checkpoint files.
-//
-// This is like `Restore` except that restored tensor can be listed as filling
-// only a slice of a larger tensor.  `shape_and_slice` specifies the shape of the
-// larger tensor and the slice that the restored tensor covers.
-//
-// The `shape_and_slice` input has the same format as the
-// elements of the `shapes_and_slices` input of the `SaveSlices` op.
-//
-// Arguments:
-//	file_pattern: Must have a single element. The pattern of the files from
-// which we read the tensor.
-//	tensor_name: Must have a single element. The name of the tensor to be
-// restored.
-//	shape_and_slice: Scalar. The shapes and slice specifications to use when
-// restoring a tensors.
-//	dt: The type of the tensor to be restored.
-//
-// Returns The restored tensor.
-func RestoreSlice(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, shape_and_slice tf.Output, dt tf.DataType, optional ...RestoreSliceAttr) (tensor tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dt": dt}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RestoreSlice",
-		Input: []tf.Input{
-			file_pattern, tensor_name, shape_and_slice,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Generate a sharded filename. The filename is printf formatted as
-//
-//    %s-%05d-of-%05d, basename, shard, num_shards.
-func ShardedFilename(scope *Scope, basename tf.Output, shard tf.Output, num_shards tf.Output) (filename tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ShardedFilename",
-		Input: []tf.Input{
-			basename, shard, num_shards,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Serializes the tree ensemble to a proto.
-//
-// Arguments:
-//	tree_ensemble_handle: Handle to the tree ensemble.
-//
-// Returns Stamp token of the tree ensemble resource.Serialized proto of the ensemble.
-func BoostedTreesSerializeEnsemble(scope *Scope, tree_ensemble_handle tf.Output) (stamp_token tf.Output, tree_ensemble_serialized tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesSerializeEnsemble",
-		Input: []tf.Input{
-			tree_ensemble_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// WholeFileReaderV2Attr is an optional argument to WholeFileReaderV2.
-type WholeFileReaderV2Attr func(optionalAttr)
-
-// WholeFileReaderV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this reader is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func WholeFileReaderV2Container(value string) WholeFileReaderV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// WholeFileReaderV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this reader is named in the given bucket
-// with this shared_name. Otherwise, the node name is used instead.
-// If not specified, defaults to ""
-func WholeFileReaderV2SharedName(value string) WholeFileReaderV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// A Reader that outputs the entire contents of a file as a value.
-//
-// To use, enqueue filenames in a Queue.  The output of ReaderRead will
-// be a filename (key) and the contents of that file (value).
-//
-// Returns The handle to reference the Reader.
-func WholeFileReaderV2(scope *Scope, optional ...WholeFileReaderV2Attr) (reader_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "WholeFileReaderV2",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// PackAttr is an optional argument to Pack.
-type PackAttr func(optionalAttr)
-
-// PackAxis sets the optional axis attribute to value.
-//
-// value: Dimension along which to pack.  Negative values wrap around, so the
-// valid range is `[-(R+1), R+1)`.
-// If not specified, defaults to 0
-func PackAxis(value int64) PackAttr {
-	return func(m optionalAttr) {
-		m["axis"] = value
-	}
-}
-
-// Packs a list of `N` rank-`R` tensors into one rank-`(R+1)` tensor.
-//
-// Packs the `N` tensors in `values` into a tensor with rank one higher than each
-// tensor in `values`, by packing them along the `axis` dimension.
-// Given a list of tensors of shape `(A, B, C)`;
-//
-// if `axis == 0` then the `output` tensor will have the shape `(N, A, B, C)`.
-// if `axis == 1` then the `output` tensor will have the shape `(A, N, B, C)`.
-// Etc.
-//
-// For example:
-//
-// ```
-// # 'x' is [1, 4]
-// # 'y' is [2, 5]
-// # 'z' is [3, 6]
-// pack([x, y, z]) => [[1, 4], [2, 5], [3, 6]]  # Pack along first dim.
-// pack([x, y, z], axis=1) => [[1, 2, 3], [4, 5, 6]]
-// ```
-//
-// This is the opposite of `unpack`.
-//
-// Arguments:
-//	values: Must be of same shape and type.
-//
-// Returns The packed tensor.
-func Pack(scope *Scope, values []tf.Output, optional ...PackAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "Pack",
-		Input: []tf.Input{
-			tf.OutputList(values),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// FixedLengthRecordReaderV2Attr is an optional argument to FixedLengthRecordReaderV2.
-type FixedLengthRecordReaderV2Attr func(optionalAttr)
-
-// FixedLengthRecordReaderV2HeaderBytes sets the optional header_bytes attribute to value.
-//
-// value: Number of bytes in the header, defaults to 0.
-// If not specified, defaults to 0
-func FixedLengthRecordReaderV2HeaderBytes(value int64) FixedLengthRecordReaderV2Attr {
-	return func(m optionalAttr) {
-		m["header_bytes"] = value
-	}
-}
-
-// FixedLengthRecordReaderV2FooterBytes sets the optional footer_bytes attribute to value.
-//
-// value: Number of bytes in the footer, defaults to 0.
-// If not specified, defaults to 0
-func FixedLengthRecordReaderV2FooterBytes(value int64) FixedLengthRecordReaderV2Attr {
-	return func(m optionalAttr) {
-		m["footer_bytes"] = value
-	}
-}
-
-// FixedLengthRecordReaderV2HopBytes sets the optional hop_bytes attribute to value.
-//
-// value: Number of bytes to hop before each read. Default of 0 means using
-// record_bytes.
-// If not specified, defaults to 0
-func FixedLengthRecordReaderV2HopBytes(value int64) FixedLengthRecordReaderV2Attr {
-	return func(m optionalAttr) {
-		m["hop_bytes"] = value
-	}
-}
-
-// FixedLengthRecordReaderV2Container sets the optional container attribute to value.
-//
-// value: If non-empty, this reader is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func FixedLengthRecordReaderV2Container(value string) FixedLengthRecordReaderV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// FixedLengthRecordReaderV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this reader is named in the given bucket
-// with this shared_name. Otherwise, the node name is used instead.
-// If not specified, defaults to ""
-func FixedLengthRecordReaderV2SharedName(value string) FixedLengthRecordReaderV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// FixedLengthRecordReaderV2Encoding sets the optional encoding attribute to value.
-//
-// value: The type of encoding for the file. Currently ZLIB and GZIP
-// are supported. Defaults to none.
-// If not specified, defaults to ""
-func FixedLengthRecordReaderV2Encoding(value string) FixedLengthRecordReaderV2Attr {
-	return func(m optionalAttr) {
-		m["encoding"] = value
-	}
-}
-
-// A Reader that outputs fixed-length records from a file.
-//
-// Arguments:
-//	record_bytes: Number of bytes in the record.
-//
-// Returns The handle to reference the Reader.
-func FixedLengthRecordReaderV2(scope *Scope, record_bytes int64, optional ...FixedLengthRecordReaderV2Attr) (reader_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"record_bytes": record_bytes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "FixedLengthRecordReaderV2",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// CollectiveReduceAttr is an optional argument to CollectiveReduce.
-type CollectiveReduceAttr func(optionalAttr)
-
-// CollectiveReduceWaitFor sets the optional wait_for attribute to value.
-// If not specified, defaults to <>
-func CollectiveReduceWaitFor(value []int64) CollectiveReduceAttr {
-	return func(m optionalAttr) {
-		m["wait_for"] = value
-	}
-}
-
-// Mutually reduces multiple tensors of identical type and shape.
-func CollectiveReduce(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, merge_op string, final_op string, subdiv_offsets []int64, optional ...CollectiveReduceAttr) (data tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "merge_op": merge_op, "final_op": final_op, "subdiv_offsets": subdiv_offsets}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CollectiveReduce",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the number of records this Reader has produced.
-//
-// This is the same as the number of ReaderRead executions that have
-// succeeded.
-//
-// Arguments:
-//	reader_handle: Handle to a Reader.
-func ReaderNumRecordsProducedV2(scope *Scope, reader_handle tf.Output) (records_produced tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ReaderNumRecordsProducedV2",
-		Input: []tf.Input{
-			reader_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Adds two `SparseTensor` objects to produce another `SparseTensor`.
-//
-// The input `SparseTensor` objects' indices are assumed ordered in standard
-// lexicographic order.  If this is not the case, before this step run
-// `SparseReorder` to restore index ordering.
-//
-// By default, if two values sum to zero at some index, the output `SparseTensor`
-// would still include that particular location in its index, storing a zero in the
-// corresponding value slot.  To override this, callers can specify `thresh`,
-// indicating that if the sum has a magnitude strictly smaller than `thresh`, its
-// corresponding value and index would then not be included.  In particular,
-// `thresh == 0` (default) means everything is kept and actual thresholding happens
-// only for a positive value.
-//
-// In the following shapes, `nnz` is the count after taking `thresh` into account.
-//
-// Arguments:
-//	a_indices: 2-D.  The `indices` of the first `SparseTensor`, size `[nnz, ndims]` Matrix.
-//	a_values: 1-D.  The `values` of the first `SparseTensor`, size `[nnz]` Vector.
-//	a_shape: 1-D.  The `shape` of the first `SparseTensor`, size `[ndims]` Vector.
-//	b_indices: 2-D.  The `indices` of the second `SparseTensor`, size `[nnz, ndims]` Matrix.
-//	b_values: 1-D.  The `values` of the second `SparseTensor`, size `[nnz]` Vector.
-//	b_shape: 1-D.  The `shape` of the second `SparseTensor`, size `[ndims]` Vector.
-//	thresh: 0-D.  The magnitude threshold that determines if an output value/index
-// pair takes space.
-func SparseAdd(scope *Scope, a_indices tf.Output, a_values tf.Output, a_shape tf.Output, b_indices tf.Output, b_values tf.Output, b_shape tf.Output, thresh tf.Output) (sum_indices tf.Output, sum_values tf.Output, sum_shape tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseAdd",
-		Input: []tf.Input{
-			a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// Returns the number of work units this Reader has finished processing.
-//
-// Arguments:
-//	reader_handle: Handle to a Reader.
-func ReaderNumWorkUnitsCompletedV2(scope *Scope, reader_handle tf.Output) (units_completed tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ReaderNumWorkUnitsCompletedV2",
-		Input: []tf.Input{
-			reader_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// NonMaxSuppressionV4Attr is an optional argument to NonMaxSuppressionV4.
-type NonMaxSuppressionV4Attr func(optionalAttr)
-
-// NonMaxSuppressionV4PadToMaxOutputSize sets the optional pad_to_max_output_size attribute to value.
-//
-// value: If true, the output `selected_indices` is padded to be of length
-// `max_output_size`. Defaults to false.
-// If not specified, defaults to false
-func NonMaxSuppressionV4PadToMaxOutputSize(value bool) NonMaxSuppressionV4Attr {
-	return func(m optionalAttr) {
-		m["pad_to_max_output_size"] = value
-	}
-}
-
-// Greedily selects a subset of bounding boxes in descending order of score,
-//
-// pruning away boxes that have high intersection-over-union (IOU) overlap
-// with previously selected boxes.  Bounding boxes with score less than
-// `score_threshold` are removed.  Bounding boxes are supplied as
-// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
-// diagonal pair of box corners and the coordinates can be provided as normalized
-// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
-// is agnostic to where the origin is in the coordinate system and more
-// generally is invariant to orthogonal transformations and translations
-// of the coordinate system; thus translating or reflections of the coordinate
-// system result in the same boxes being selected by the algorithm.
-// The output of this operation is a set of integers indexing into the input
-// collection of bounding boxes representing the selected boxes.  The bounding
-// box coordinates corresponding to the selected indices can then be obtained
-// using the `tf.gather operation`.  For example:
-//   selected_indices = tf.image.non_max_suppression_v2(
-//       boxes, scores, max_output_size, iou_threshold, score_threshold)
-//   selected_boxes = tf.gather(boxes, selected_indices)
-//
-// Arguments:
-//	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
-//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
-// score corresponding to each box (each row of boxes).
-//	max_output_size: A scalar integer tensor representing the maximum number of
-// boxes to be selected by non max suppression.
-//	iou_threshold: A 0-D float tensor representing the threshold for deciding whether
-// boxes overlap too much with respect to IOU.
-//	score_threshold: A 0-D float tensor representing the threshold for deciding when to remove
-// boxes based on score.
-//
-// Returns A 1-D integer tensor of shape `[M]` representing the selected
-// indices from the boxes tensor, where `M <= max_output_size`.A 0-D integer tensor representing the number of valid elements in
-// `selected_indices`, with the valid elements appearing first.
-func NonMaxSuppressionV4(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output, score_threshold tf.Output, optional ...NonMaxSuppressionV4Attr) (selected_indices tf.Output, valid_outputs tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "NonMaxSuppressionV4",
-		Input: []tf.Input{
-			boxes, scores, max_output_size, iou_threshold, score_threshold,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Restore a reader to a previously saved state.
-//
-// Not all Readers support being restored, so this can produce an
-// Unimplemented error.
-//
-// Arguments:
-//	reader_handle: Handle to a Reader.
-//	state: Result of a ReaderSerializeState of a Reader with type
-// matching reader_handle.
-//
-// Returns the created operation.
-func ReaderRestoreStateV2(scope *Scope, reader_handle tf.Output, state tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ReaderRestoreStateV2",
-		Input: []tf.Input{
-			reader_handle, state,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
 // Restore a Reader to its initial clean state.
 //
 // Arguments:
@@ -40771,100 +47338,49 @@ func ReaderResetV2(scope *Scope, reader_handle tf.Output) (o *tf.Operation) {
 	return scope.AddOperation(opspec)
 }
 
-// Creates a dataset that concatenates `input_dataset` with `another_dataset`.
-func ConcatenateDataset(scope *Scope, input_dataset tf.Output, another_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ConcatenateDataset",
-		Input: []tf.Input{
-			input_dataset, another_dataset,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
+// FakeQuantWithMinMaxVarsPerChannelGradientAttr is an optional argument to FakeQuantWithMinMaxVarsPerChannelGradient.
+type FakeQuantWithMinMaxVarsPerChannelGradientAttr func(optionalAttr)
 
-// Creates a dataset that contains the unique elements of `input_dataset`.
-func UniqueDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "UniqueDataset",
-		Input: []tf.Input{
-			input_dataset,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the set of files matching one or more glob patterns.
+// FakeQuantWithMinMaxVarsPerChannelGradientNumBits sets the optional num_bits attribute to value.
 //
-// Note that this routine only supports wildcard characters in the
-// basename portion of the pattern, not in the directory portion.
-// Note also that the order of filenames returned is deterministic.
-//
-// Arguments:
-//	pattern: Shell wildcard pattern(s). Scalar or vector of type string.
-//
-// Returns A vector of matching filenames.
-func MatchingFiles(scope *Scope, pattern tf.Output) (filenames tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "MatchingFiles",
-		Input: []tf.Input{
-			pattern,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResizeAreaAttr is an optional argument to ResizeArea.
-type ResizeAreaAttr func(optionalAttr)
-
-// ResizeAreaAlignCorners sets the optional align_corners attribute to value.
-//
-// value: If true, the centers of the 4 corner pixels of the input and output tensors are
-// aligned, preserving the values at the corner pixels. Defaults to false.
-// If not specified, defaults to false
-func ResizeAreaAlignCorners(value bool) ResizeAreaAttr {
+// value: The bitwidth of the quantization; between 2 and 16, inclusive.
+// If not specified, defaults to 8
+func FakeQuantWithMinMaxVarsPerChannelGradientNumBits(value int64) FakeQuantWithMinMaxVarsPerChannelGradientAttr {
 	return func(m optionalAttr) {
-		m["align_corners"] = value
+		m["num_bits"] = value
 	}
 }
 
-// Resize `images` to `size` using area interpolation.
+// FakeQuantWithMinMaxVarsPerChannelGradientNarrowRange sets the optional narrow_range attribute to value.
 //
-// Input images can be of different types but output images are always float.
-//
-// The range of pixel values for the output image might be slightly different
-// from the range for the input image because of limited numerical precision.
-// To guarantee an output range, for example `[0.0, 1.0]`, apply
-// `tf.clip_by_value` to the output.
-//
-// Each output pixel is computed by first transforming the pixel's footprint into
-// the input tensor and then averaging the pixels that intersect the footprint. An
-// input pixel's contribution to the average is weighted by the fraction of its
-// area that intersects the footprint.  This is the same as OpenCV's INTER_AREA.
+// value: Whether to quantize into 2^num_bits - 1 distinct values.
+// If not specified, defaults to false
+func FakeQuantWithMinMaxVarsPerChannelGradientNarrowRange(value bool) FakeQuantWithMinMaxVarsPerChannelGradientAttr {
+	return func(m optionalAttr) {
+		m["narrow_range"] = value
+	}
+}
+
+// Compute gradients for a FakeQuantWithMinMaxVarsPerChannel operation.
 //
 // Arguments:
-//	images: 4-D with shape `[batch, height, width, channels]`.
-//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-// new size for the images.
+//	gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation,
+// shape one of: `[d]`, `[b, d]`,  `[b, h, w, d]`.
+//	inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation, shape
+//   same as `gradients`.
+// min, max: Quantization interval, floats of shape `[d]`.
 //
-// Returns 4-D with shape
-// `[batch, new_height, new_width, channels]`.
-func ResizeArea(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeAreaAttr) (resized_images tf.Output) {
+//
+//
+// Returns:
+//	backprops_wrt_input: Backpropagated gradients w.r.t. inputs, shape same as
+// `inputs`:
+//   `gradients * (inputs >= min && inputs <= max)`.
+//	backprop_wrt_min: Backpropagated gradients w.r.t. min parameter, shape `[d]`:
+// `sum_per_d(gradients * (inputs < min))`.
+//	backprop_wrt_max: Backpropagated gradients w.r.t. max parameter, shape `[d]`:
+// `sum_per_d(gradients * (inputs > max))`.
+func FakeQuantWithMinMaxVarsPerChannelGradient(scope *Scope, gradients tf.Output, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsPerChannelGradientAttr) (backprops_wrt_input tf.Output, backprop_wrt_min tf.Output, backprop_wrt_max tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -40873,37 +47389,46 @@ func ResizeArea(scope *Scope, images tf.Output, size tf.Output, optional ...Resi
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResizeArea",
+		Type: "FakeQuantWithMinMaxVarsPerChannelGradient",
 		Input: []tf.Input{
-			images, size,
+			gradients, inputs, min, max,
 		},
 		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
-	return op.Output(0)
+	return op.Output(0), op.Output(1), op.Output(2)
 }
 
-// InfeedEnqueuePrelinearizedBufferAttr is an optional argument to InfeedEnqueuePrelinearizedBuffer.
-type InfeedEnqueuePrelinearizedBufferAttr func(optionalAttr)
+// PrintV2Attr is an optional argument to PrintV2.
+type PrintV2Attr func(optionalAttr)
 
-// InfeedEnqueuePrelinearizedBufferDeviceOrdinal sets the optional device_ordinal attribute to value.
+// PrintV2OutputStream sets the optional output_stream attribute to value.
 //
-// value: The TPU device to use. This should be -1 when the Op is running on a TPU device
-// and = 0 when the Op is running on the CPU device.
-// If not specified, defaults to -1
-func InfeedEnqueuePrelinearizedBufferDeviceOrdinal(value int64) InfeedEnqueuePrelinearizedBufferAttr {
+// value: A string specifying the output stream or logging level to print to.
+// If not specified, defaults to "stderr"
+func PrintV2OutputStream(value string) PrintV2Attr {
 	return func(m optionalAttr) {
-		m["device_ordinal"] = value
+		m["output_stream"] = value
 	}
 }
 
-// An op which enqueues prelinearized buffer into TPU infeed.
+// PrintV2End sets the optional end attribute to value.
+// If not specified, defaults to "\n"
+func PrintV2End(value string) PrintV2Attr {
+	return func(m optionalAttr) {
+		m["end"] = value
+	}
+}
+
+// Prints a string scalar.
+//
+// Prints a string scalar to the desired output_stream.
 //
 // Arguments:
-//	input: A variant tensor representing linearized output.
+//	input: The string scalar to print.
 //
 // Returns the created operation.
-func InfeedEnqueuePrelinearizedBuffer(scope *Scope, input tf.Output, optional ...InfeedEnqueuePrelinearizedBufferAttr) (o *tf.Operation) {
+func PrintV2(scope *Scope, input tf.Output, optional ...PrintV2Attr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -40912,7 +47437,7 @@ func InfeedEnqueuePrelinearizedBuffer(scope *Scope, input tf.Output, optional ..
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "InfeedEnqueuePrelinearizedBuffer",
+		Type: "PrintV2",
 		Input: []tf.Input{
 			input,
 		},
@@ -40921,217 +47446,91 @@ func InfeedEnqueuePrelinearizedBuffer(scope *Scope, input tf.Output, optional ..
 	return scope.AddOperation(opspec)
 }
 
-// RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr is an optional argument to RetrieveTPUEmbeddingFTRLParametersGradAccumDebug.
-type RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr func(optionalAttr)
-
-// RetrieveTPUEmbeddingFTRLParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
+// Inserts a dimension of 1 into a tensor's shape.
 //
-// REQUIRES: value >= -1
-func RetrieveTPUEmbeddingFTRLParametersGradAccumDebugTableId(value int64) RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_id"] = value
-	}
-}
-
-// RetrieveTPUEmbeddingFTRLParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func RetrieveTPUEmbeddingFTRLParametersGradAccumDebugTableName(value string) RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Retrieve FTRL embedding parameters with debug support.
+// Given a tensor `input`, this operation inserts a dimension of 1 at the
+// dimension index `axis` of `input`'s shape. The dimension index `axis` starts at
+// zero; if you specify a negative number for `axis` it is counted backward from
+// the end.
 //
-// An op that retrieves optimization parameters from embedding to host
-// memory. Must be preceded by a ConfigureTPUEmbeddingHost op that sets up
-// the correct embedding table configuration. For example, this op is
-// used to retrieve updated parameters before saving a checkpoint.
+// This operation is useful if you want to add a batch dimension to a single
+// element. For example, if you have a single image of shape `[height, width,
+// channels]`, you can make it a batch of 1 image with `expand_dims(image, 0)`,
+// which will make the shape `[1, height, width, channels]`.
 //
-// Returns Parameter parameters updated by the FTRL optimization algorithm.Parameter accumulators updated by the FTRL optimization algorithm.Parameter linears updated by the FTRL optimization algorithm.Parameter gradient_accumulators updated by the FTRL optimization algorithm.
-func RetrieveTPUEmbeddingFTRLParametersGradAccumDebug(scope *Scope, num_shards int64, shard_id int64, optional ...RetrieveTPUEmbeddingFTRLParametersGradAccumDebugAttr) (parameters tf.Output, accumulators tf.Output, linears tf.Output, gradient_accumulators tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RetrieveTPUEmbeddingFTRLParametersGradAccumDebug",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
-}
-
-// ResizeBicubicAttr is an optional argument to ResizeBicubic.
-type ResizeBicubicAttr func(optionalAttr)
-
-// ResizeBicubicAlignCorners sets the optional align_corners attribute to value.
+// Other examples:
 //
-// value: If true, the centers of the 4 corner pixels of the input and output tensors are
-// aligned, preserving the values at the corner pixels. Defaults to false.
-// If not specified, defaults to false
-func ResizeBicubicAlignCorners(value bool) ResizeBicubicAttr {
-	return func(m optionalAttr) {
-		m["align_corners"] = value
-	}
-}
-
-// ResizeBicubicHalfPixelCenters sets the optional half_pixel_centers attribute to value.
-// If not specified, defaults to false
-func ResizeBicubicHalfPixelCenters(value bool) ResizeBicubicAttr {
-	return func(m optionalAttr) {
-		m["half_pixel_centers"] = value
-	}
-}
-
-// Resize `images` to `size` using bicubic interpolation.
+// ```
+// # 't' is a tensor of shape [2]
+// shape(expand_dims(t, 0)) ==> [1, 2]
+// shape(expand_dims(t, 1)) ==> [2, 1]
+// shape(expand_dims(t, -1)) ==> [2, 1]
 //
-// Input images can be of different types but output images are always float.
-//
-// Arguments:
-//	images: 4-D with shape `[batch, height, width, channels]`.
-//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-// new size for the images.
-//
-// Returns 4-D with shape
-// `[batch, new_height, new_width, channels]`.
-func ResizeBicubic(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBicubicAttr) (resized_images tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResizeBicubic",
-		Input: []tf.Input{
-			images, size,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Calculates the prior from the training data (the bias) and fills in the first node with the logits' prior. Returns a boolean indicating whether to continue centering.
-//
-// Arguments:
-//	tree_ensemble_handle: Handle to the tree ensemble.
-//	mean_gradients: A tensor with shape=[logits_dimension] with mean of gradients for a first node.
-//	mean_hessians: A tensor with shape=[logits_dimension] mean of hessians for a first node.
-//	l1: l1 regularization factor on leaf weights, per instance based.
-//	l2: l2 regularization factor on leaf weights, per instance based.
-//
-// Returns Bool, whether to continue bias centering.
-func BoostedTreesCenterBias(scope *Scope, tree_ensemble_handle tf.Output, mean_gradients tf.Output, mean_hessians tf.Output, l1 tf.Output, l2 tf.Output) (continue_centering tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesCenterBias",
-		Input: []tf.Input{
-			tree_ensemble_handle, mean_gradients, mean_hessians, l1, l2,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// HistogramFixedWidthAttr is an optional argument to HistogramFixedWidth.
-type HistogramFixedWidthAttr func(optionalAttr)
-
-// HistogramFixedWidthDtype sets the optional dtype attribute to value.
-// If not specified, defaults to DT_INT32
-func HistogramFixedWidthDtype(value tf.DataType) HistogramFixedWidthAttr {
-	return func(m optionalAttr) {
-		m["dtype"] = value
-	}
-}
-
-// Return histogram of values.
-//
-// Given the tensor `values`, this operation returns a rank 1 histogram counting
-// the number of entries in `values` that fall into every bin.  The bins are
-// equal width and determined by the arguments `value_range` and `nbins`.
-//
-// ```python
-// # Bins will be:  (-inf, 1), [1, 2), [2, 3), [3, 4), [4, inf)
-// nbins = 5
-// value_range = [0.0, 5.0]
-// new_values = [-1.0, 0.0, 1.5, 2.0, 5.0, 15]
-//
-// with tf.get_default_session() as sess:
-//   hist = tf.histogram_fixed_width(new_values, value_range, nbins=5)
-//   variables.global_variables_initializer().run()
-//   sess.run(hist) => [2, 1, 1, 0, 2]
+// # 't2' is a tensor of shape [2, 3, 5]
+// shape(expand_dims(t2, 0)) ==> [1, 2, 3, 5]
+// shape(expand_dims(t2, 2)) ==> [2, 3, 1, 5]
+// shape(expand_dims(t2, 3)) ==> [2, 3, 5, 1]
 // ```
 //
-// Arguments:
-//	values: Numeric `Tensor`.
-//	value_range: Shape [2] `Tensor` of same `dtype` as `values`.
-// values <= value_range[0] will be mapped to hist[0],
-// values >= value_range[1] will be mapped to hist[-1].
-//	nbins: Scalar `int32 Tensor`.  Number of histogram bins.
+// This operation requires that:
 //
-// Returns A 1-D `Tensor` holding histogram of values.
-func HistogramFixedWidth(scope *Scope, values tf.Output, value_range tf.Output, nbins tf.Output, optional ...HistogramFixedWidthAttr) (out tf.Output) {
+// `-1-input.dims() <= dim <= input.dims()`
+//
+// This operation is related to `squeeze()`, which removes dimensions of
+// size 1.
+//
+// Arguments:
+//
+//	axis: 0-D (scalar). Specifies the dimension index at which to
+// expand the shape of `input`. Must be in the range
+// `[-rank(input) - 1, rank(input)]`.
+//
+// Returns Contains the same data as `input`, but its shape has an additional
+// dimension of size 1 added.
+func ExpandDims(scope *Scope, input tf.Output, axis tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "HistogramFixedWidth",
+		Type: "ExpandDims",
 		Input: []tf.Input{
-			values, value_range, nbins,
+			input, axis,
 		},
-		Attrs: attrs,
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// ResizeBicubicGradAttr is an optional argument to ResizeBicubicGrad.
-type ResizeBicubicGradAttr func(optionalAttr)
+// ResourceSparseApplyProximalGradientDescentAttr is an optional argument to ResourceSparseApplyProximalGradientDescent.
+type ResourceSparseApplyProximalGradientDescentAttr func(optionalAttr)
 
-// ResizeBicubicGradAlignCorners sets the optional align_corners attribute to value.
+// ResourceSparseApplyProximalGradientDescentUseLocking sets the optional use_locking attribute to value.
 //
-// value: If true, the centers of the 4 corner pixels of the input and grad tensors are
-// aligned. Defaults to false.
+// value: If True, the subtraction will be protected by a lock;
+// otherwise the behavior is undefined, but may exhibit less contention.
 // If not specified, defaults to false
-func ResizeBicubicGradAlignCorners(value bool) ResizeBicubicGradAttr {
+func ResourceSparseApplyProximalGradientDescentUseLocking(value bool) ResourceSparseApplyProximalGradientDescentAttr {
 	return func(m optionalAttr) {
-		m["align_corners"] = value
+		m["use_locking"] = value
 	}
 }
 
-// ResizeBicubicGradHalfPixelCenters sets the optional half_pixel_centers attribute to value.
-// If not specified, defaults to false
-func ResizeBicubicGradHalfPixelCenters(value bool) ResizeBicubicGradAttr {
-	return func(m optionalAttr) {
-		m["half_pixel_centers"] = value
-	}
-}
-
-// Computes the gradient of bicubic interpolation.
+// Sparse update '*var' as FOBOS algorithm with fixed learning rate.
+//
+// That is for rows we have grad for, we update var as follows:
+// prox_v = var - alpha * grad
+// var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}
 //
 // Arguments:
-//	grads: 4-D with shape `[batch, height, width, channels]`.
-//	original_image: 4-D with shape `[batch, orig_height, orig_width, channels]`,
-// The image tensor that was resized.
+//	var_: Should be from a Variable().
+//	alpha: Scaling factor. Must be a scalar.
+//	l1: L1 regularization. Must be a scalar.
+//	l2: L2 regularization. Must be a scalar.
+//	grad: The gradient.
+//	indices: A vector of indices into the first dimension of var and accum.
 //
-// Returns 4-D with shape `[batch, orig_height, orig_width, channels]`.
-// Gradients with respect to the input image. Input image must have been
-// float or double.
-func ResizeBicubicGrad(scope *Scope, grads tf.Output, original_image tf.Output, optional ...ResizeBicubicGradAttr) (output tf.Output) {
+// Returns the created operation.
+func ResourceSparseApplyProximalGradientDescent(scope *Scope, var_ tf.Output, alpha tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, indices tf.Output, optional ...ResourceSparseApplyProximalGradientDescentAttr) (o *tf.Operation) {
 	if scope.Err() != nil {
 		return
 	}
@@ -41140,143 +47539,13 @@ func ResizeBicubicGrad(scope *Scope, grads tf.Output, original_image tf.Output,
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "ResizeBicubicGrad",
+		Type: "ResourceSparseApplyProximalGradientDescent",
 		Input: []tf.Input{
-			grads, original_image,
+			var_, alpha, l1, l2, grad, indices,
 		},
 		Attrs: attrs,
 	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ResizeBilinearAttr is an optional argument to ResizeBilinear.
-type ResizeBilinearAttr func(optionalAttr)
-
-// ResizeBilinearAlignCorners sets the optional align_corners attribute to value.
-//
-// value: If true, the centers of the 4 corner pixels of the input and output tensors are
-// aligned, preserving the values at the corner pixels. Defaults to false.
-// If not specified, defaults to false
-func ResizeBilinearAlignCorners(value bool) ResizeBilinearAttr {
-	return func(m optionalAttr) {
-		m["align_corners"] = value
-	}
-}
-
-// ResizeBilinearHalfPixelCenters sets the optional half_pixel_centers attribute to value.
-// If not specified, defaults to false
-func ResizeBilinearHalfPixelCenters(value bool) ResizeBilinearAttr {
-	return func(m optionalAttr) {
-		m["half_pixel_centers"] = value
-	}
-}
-
-// Resize `images` to `size` using bilinear interpolation.
-//
-// Input images can be of different types but output images are always float.
-//
-// Arguments:
-//	images: 4-D with shape `[batch, height, width, channels]`.
-//	size: = A 1-D int32 Tensor of 2 elements: `new_height, new_width`.  The
-// new size for the images.
-//
-// Returns 4-D with shape
-// `[batch, new_height, new_width, channels]`.
-func ResizeBilinear(scope *Scope, images tf.Output, size tf.Output, optional ...ResizeBilinearAttr) (resized_images tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ResizeBilinear",
-		Input: []tf.Input{
-			images, size,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QuantizedMatMulAttr is an optional argument to QuantizedMatMul.
-type QuantizedMatMulAttr func(optionalAttr)
-
-// QuantizedMatMulToutput sets the optional Toutput attribute to value.
-// If not specified, defaults to DT_QINT32
-func QuantizedMatMulToutput(value tf.DataType) QuantizedMatMulAttr {
-	return func(m optionalAttr) {
-		m["Toutput"] = value
-	}
-}
-
-// QuantizedMatMulTransposeA sets the optional transpose_a attribute to value.
-//
-// value: If true, `a` is transposed before multiplication.
-// If not specified, defaults to false
-func QuantizedMatMulTransposeA(value bool) QuantizedMatMulAttr {
-	return func(m optionalAttr) {
-		m["transpose_a"] = value
-	}
-}
-
-// QuantizedMatMulTransposeB sets the optional transpose_b attribute to value.
-//
-// value: If true, `b` is transposed before multiplication.
-// If not specified, defaults to false
-func QuantizedMatMulTransposeB(value bool) QuantizedMatMulAttr {
-	return func(m optionalAttr) {
-		m["transpose_b"] = value
-	}
-}
-
-// QuantizedMatMulTactivation sets the optional Tactivation attribute to value.
-//
-// value: The type of output produced by activation function
-// following this operation.
-// If not specified, defaults to DT_QUINT8
-func QuantizedMatMulTactivation(value tf.DataType) QuantizedMatMulAttr {
-	return func(m optionalAttr) {
-		m["Tactivation"] = value
-	}
-}
-
-// Perform a quantized matrix multiplication of  `a` by the matrix `b`.
-//
-// The inputs must be two-dimensional matrices and the inner dimension of
-// `a` (after being transposed if `transpose_a` is non-zero) must match the
-// outer dimension of `b` (after being transposed if `transposed_b` is
-// non-zero).
-//
-// Arguments:
-//	a: Must be a two-dimensional tensor.
-//	b: Must be a two-dimensional tensor.
-//	min_a: The float value that the lowest quantized `a` value represents.
-//	max_a: The float value that the highest quantized `a` value represents.
-//	min_b: The float value that the lowest quantized `b` value represents.
-//	max_b: The float value that the highest quantized `b` value represents.
-//
-// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
-func QuantizedMatMul(scope *Scope, a tf.Output, b tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, optional ...QuantizedMatMulAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedMatMul",
-		Input: []tf.Input{
-			a, b, min_a, max_a, min_b, max_b,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
+	return scope.AddOperation(opspec)
 }
 
 // Fast Fourier transform.
@@ -41307,114 +47576,41 @@ func FFT(scope *Scope, input tf.Output) (output tf.Output) {
 	return op.Output(0)
 }
 
-// LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr is an optional argument to LoadTPUEmbeddingAdagradParametersGradAccumDebug.
-type LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr func(optionalAttr)
+// SparseMatrixTransposeAttr is an optional argument to SparseMatrixTranspose.
+type SparseMatrixTransposeAttr func(optionalAttr)
 
-// LoadTPUEmbeddingAdagradParametersGradAccumDebugTableId sets the optional table_id attribute to value.
-// If not specified, defaults to -1
+// SparseMatrixTransposeConjugate sets the optional conjugate attribute to value.
 //
-// REQUIRES: value >= -1
-func LoadTPUEmbeddingAdagradParametersGradAccumDebugTableId(value int64) LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr {
+// value: Indicates whether `input` should be conjugated.
+// If not specified, defaults to false
+func SparseMatrixTransposeConjugate(value bool) SparseMatrixTransposeAttr {
 	return func(m optionalAttr) {
-		m["table_id"] = value
+		m["conjugate"] = value
 	}
 }
 
-// LoadTPUEmbeddingAdagradParametersGradAccumDebugTableName sets the optional table_name attribute to value.
-// If not specified, defaults to ""
-func LoadTPUEmbeddingAdagradParametersGradAccumDebugTableName(value string) LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr {
-	return func(m optionalAttr) {
-		m["table_name"] = value
-	}
-}
-
-// Load Adagrad embedding parameters with debug support.
+// Transposes the inner (matrix) dimensions of a CSRSparseMatrix.
 //
-// An op that loads optimization parameters into HBM for embedding. Must be
-// preceded by a ConfigureTPUEmbeddingHost op that sets up the correct
-// embedding table configuration. For example, this op is used to install
-// parameters that are loaded from a checkpoint before a training loop is
-// executed.
+// Transposes the inner (matrix) dimensions of a SparseMatrix and optionally
+// conjugates its values.
 //
 // Arguments:
-//	parameters: Value of parameters used in the Adagrad optimization algorithm.
-//	accumulators: Value of accumulators used in the Adagrad optimization algorithm.
-//	gradient_accumulators: Value of gradient_accumulators used in the Adagrad optimization algorithm.
+//	input: A CSRSparseMatrix.
 //
 //
-//
-// Returns the created operation.
-func LoadTPUEmbeddingAdagradParametersGradAccumDebug(scope *Scope, parameters tf.Output, accumulators tf.Output, gradient_accumulators tf.Output, num_shards int64, shard_id int64, optional ...LoadTPUEmbeddingAdagradParametersGradAccumDebugAttr) (o *tf.Operation) {
+// Returns A CSRSparseMatrix.
+func SparseMatrixTranspose(scope *Scope, input tf.Output, type_ tf.DataType, optional ...SparseMatrixTransposeAttr) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"num_shards": num_shards, "shard_id": shard_id}
+	attrs := map[string]interface{}{"type": type_}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "LoadTPUEmbeddingAdagradParametersGradAccumDebug",
+		Type: "SparseMatrixTranspose",
 		Input: []tf.Input{
-			parameters, accumulators, gradient_accumulators,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-// RandomCropAttr is an optional argument to RandomCrop.
-type RandomCropAttr func(optionalAttr)
-
-// RandomCropSeed sets the optional seed attribute to value.
-//
-// value: If either seed or seed2 are set to be non-zero, the random number
-// generator is seeded by the given seed.  Otherwise, it is seeded by a
-// random seed.
-// If not specified, defaults to 0
-func RandomCropSeed(value int64) RandomCropAttr {
-	return func(m optionalAttr) {
-		m["seed"] = value
-	}
-}
-
-// RandomCropSeed2 sets the optional seed2 attribute to value.
-//
-// value: An second seed to avoid seed collision.
-// If not specified, defaults to 0
-func RandomCropSeed2(value int64) RandomCropAttr {
-	return func(m optionalAttr) {
-		m["seed2"] = value
-	}
-}
-
-// Randomly crop `image`.
-//
-// DEPRECATED at GraphDef version 8: Random crop is now pure Python
-//
-// `size` is a 1-D int64 tensor with 2 elements representing the crop height and
-// width.  The values must be non negative.
-//
-// This Op picks a random location in `image` and crops a `height` by `width`
-// rectangle from that location.  The random location is picked so the cropped
-// area will fit inside the original image.
-//
-// Arguments:
-//	image: 3-D of shape `[height, width, channels]`.
-//	size: 1-D of length 2 containing: `crop_height`, `crop_width`..
-//
-// Returns 3-D of shape `[crop_height, crop_width, channels].`
-func RandomCrop(scope *Scope, image tf.Output, size tf.Output, optional ...RandomCropAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "RandomCrop",
-		Input: []tf.Input{
-			image, size,
+			input,
 		},
 		Attrs: attrs,
 	}
@@ -41422,461 +47618,85 @@ func RandomCrop(scope *Scope, image tf.Output, size tf.Output, optional ...Rando
 	return op.Output(0)
 }
 
-// JPEG encode input image with provided compression quality.
-//
-// `image` is a 3-D uint8 Tensor of shape `[height, width, channels]`.
-// `quality` is an int32 jpeg compression quality value between 0 and 100.
-//
-//
-// Arguments:
-//	images: Images to adjust.  At least 3-D.
-//	quality: An int quality to encode to.
-//
-// Returns 0-D. JPEG-encoded image.
-func EncodeJpegVariableQuality(scope *Scope, images tf.Output, quality tf.Output) (contents tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "EncodeJpegVariableQuality",
-		Input: []tf.Input{
-			images, quality,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
+// LSTMBlockCellAttr is an optional argument to LSTMBlockCell.
+type LSTMBlockCellAttr func(optionalAttr)
 
-// EncodePngAttr is an optional argument to EncodePng.
-type EncodePngAttr func(optionalAttr)
-
-// EncodePngCompression sets the optional compression attribute to value.
+// LSTMBlockCellForgetBias sets the optional forget_bias attribute to value.
 //
-// value: Compression level.
-// If not specified, defaults to -1
-func EncodePngCompression(value int64) EncodePngAttr {
+// value: The forget gate bias.
+// If not specified, defaults to 1
+func LSTMBlockCellForgetBias(value float32) LSTMBlockCellAttr {
 	return func(m optionalAttr) {
-		m["compression"] = value
+		m["forget_bias"] = value
 	}
 }
 
-// PNG-encode an image.
+// LSTMBlockCellCellClip sets the optional cell_clip attribute to value.
 //
-// `image` is a 3-D uint8 or uint16 Tensor of shape `[height, width, channels]`
-// where `channels` is:
-//
-// *   1: for grayscale.
-// *   2: for grayscale + alpha.
-// *   3: for RGB.
-// *   4: for RGBA.
-//
-// The ZLIB compression level, `compression`, can be -1 for the PNG-encoder
-// default or a value from 0 to 9.  9 is the highest compression level, generating
-// the smallest output, but is slower.
-//
-// Arguments:
-//	image: 3-D with shape `[height, width, channels]`.
-//
-// Returns 0-D. PNG-encoded image.
-func EncodePng(scope *Scope, image tf.Output, optional ...EncodePngAttr) (contents tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "EncodePng",
-		Input: []tf.Input{
-			image,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Decode the frame(s) of a GIF-encoded image to a uint8 tensor.
-//
-// GIF images with frame or transparency compression are not supported.
-// On Linux and MacOS systems, convert animated GIFs from compressed to
-// uncompressed by running:
-//
-//     convert $src.gif -coalesce $dst.gif
-//
-// This op also supports decoding JPEGs and PNGs, though it is cleaner to use
-// `tf.image.decode_image`.
-//
-// Arguments:
-//	contents: 0-D.  The GIF-encoded image.
-//
-// Returns 4-D with shape `[num_frames, height, width, 3]`. RGB channel order.
-func DecodeGif(scope *Scope, contents tf.Output) (image tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DecodeGif",
-		Input: []tf.Input{
-			contents,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Draw bounding boxes on a batch of images.
-//
-// Outputs a copy of `images` but draws on top of the pixels zero or more bounding
-// boxes specified by the locations in `boxes`. The coordinates of the each
-// bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The
-// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
-// height of the underlying image.
-//
-// For example, if an image is 100 x 200 pixels (height x width) and the bounding
-// box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
-// the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates).
-//
-// Parts of the bounding box may fall outside the image.
-//
-// Arguments:
-//	images: 4-D with shape `[batch, height, width, depth]`. A batch of images.
-//	boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding
-// boxes.
-//
-// Returns 4-D with the same shape as `images`. The batch of input images with
-// bounding boxes drawn on the images.
-func DrawBoundingBoxes(scope *Scope, images tf.Output, boxes tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DrawBoundingBoxes",
-		Input: []tf.Input{
-			images, boxes,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// CropAndResizeAttr is an optional argument to CropAndResize.
-type CropAndResizeAttr func(optionalAttr)
-
-// CropAndResizeMethod sets the optional method attribute to value.
-//
-// value: A string specifying the sampling method for resizing. It can be either
-// `"bilinear"` or `"nearest"` and default to `"bilinear"`. Currently two sampling
-// methods are supported: Bilinear and Nearest Neighbor.
-// If not specified, defaults to "bilinear"
-func CropAndResizeMethod(value string) CropAndResizeAttr {
+// value: Value to clip the 'cs' value to.
+// If not specified, defaults to 3
+func LSTMBlockCellCellClip(value float32) LSTMBlockCellAttr {
 	return func(m optionalAttr) {
-		m["method"] = value
+		m["cell_clip"] = value
 	}
 }
 
-// CropAndResizeExtrapolationValue sets the optional extrapolation_value attribute to value.
+// LSTMBlockCellUsePeephole sets the optional use_peephole attribute to value.
 //
-// value: Value used for extrapolation, when applicable.
-// If not specified, defaults to 0
-func CropAndResizeExtrapolationValue(value float32) CropAndResizeAttr {
+// value: Whether to use peephole weights.
+// If not specified, defaults to false
+func LSTMBlockCellUsePeephole(value bool) LSTMBlockCellAttr {
 	return func(m optionalAttr) {
-		m["extrapolation_value"] = value
+		m["use_peephole"] = value
 	}
 }
 
-// Extracts crops from the input image tensor and resizes them.
+// Computes the LSTM cell forward propagation for 1 time step.
 //
-// Extracts crops from the input image tensor and resizes them using bilinear
-// sampling or nearest neighbor sampling (possibly with aspect ratio change) to a
-// common output size specified by `crop_size`. This is more general than the
-// `crop_to_bounding_box` op which extracts a fixed size slice from the input image
-// and does not allow resizing or aspect ratio change.
+// This implementation uses 1 weight matrix and 1 bias vector, and there's an
+// optional peephole connection.
 //
-// Returns a tensor with `crops` from the input `image` at positions defined at the
-// bounding box locations in `boxes`. The cropped boxes are all resized (with
-// bilinear or nearest neighbor interpolation) to a fixed
-// `size = [crop_height, crop_width]`. The result is a 4-D tensor
-// `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned.
-// In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical
-// results to using `tf.image.resize_bilinear()` or
-// `tf.image.resize_nearest_neighbor()`(depends on the `method` argument) with
-// `align_corners=True`.
-//
-// Arguments:
-//	image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
-// Both `image_height` and `image_width` need to be positive.
-//	boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
-// specifies the coordinates of a box in the `box_ind[i]` image and is specified
-// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
-// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
-// `[0, 1]` interval of normalized image height is mapped to
-// `[0, image_height - 1]` in image height coordinates. We do allow `y1` > `y2`, in
-// which case the sampled crop is an up-down flipped version of the original
-// image. The width dimension is treated similarly. Normalized coordinates
-// outside the `[0, 1]` range are allowed, in which case we use
-// `extrapolation_value` to extrapolate the input image values.
-//	box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
-// The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
-//	crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`. All
-// cropped image patches are resized to this size. The aspect ratio of the image
-// content is not preserved. Both `crop_height` and `crop_width` need to be
-// positive.
-//
-// Returns A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
-func CropAndResize(scope *Scope, image tf.Output, boxes tf.Output, box_ind tf.Output, crop_size tf.Output, optional ...CropAndResizeAttr) (crops tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CropAndResize",
-		Input: []tf.Input{
-			image, boxes, box_ind, crop_size,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Checks whether a resource handle-based variable has been initialized.
-//
-// Arguments:
-//	resource: the input resource handle.
-//
-// Returns a scalar boolean which is true if the variable has been
-// initialized.
-func VarIsInitializedOp(scope *Scope, resource tf.Output) (is_initialized tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "VarIsInitializedOp",
-		Input: []tf.Input{
-			resource,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// The gradient operator for the SparseSlice op.
-//
-// This op takes in the upstream gradient w.r.t. non-empty values of
-// the sliced `SparseTensor`, and outputs the gradients w.r.t.
-// the non-empty values of input `SparseTensor`.
-//
-// Arguments:
-//	backprop_val_grad: 1-D. The gradient with respect to
-// the non-empty values of the sliced `SparseTensor`.
-//	input_indices: 2-D.  The `indices` of the input `SparseTensor`.
-//	input_start: 1-D. tensor represents the start of the slice.
-//	output_indices: 2-D.  The `indices` of the sliced `SparseTensor`.
-//
-// Returns 1-D. The gradient with respect to the non-empty values of input `SparseTensor`.
-func SparseSliceGrad(scope *Scope, backprop_val_grad tf.Output, input_indices tf.Output, input_start tf.Output, output_indices tf.Output) (val_grad tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "SparseSliceGrad",
-		Input: []tf.Input{
-			backprop_val_grad, input_indices, input_start, output_indices,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// CropAndResizeGradImageAttr is an optional argument to CropAndResizeGradImage.
-type CropAndResizeGradImageAttr func(optionalAttr)
-
-// CropAndResizeGradImageMethod sets the optional method attribute to value.
-//
-// value: A string specifying the interpolation method. Only 'bilinear' is
-// supported for now.
-// If not specified, defaults to "bilinear"
-func CropAndResizeGradImageMethod(value string) CropAndResizeGradImageAttr {
-	return func(m optionalAttr) {
-		m["method"] = value
-	}
-}
-
-// Computes the gradient of the crop_and_resize op wrt the input image tensor.
-//
-// Arguments:
-//	grads: A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
-//	boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
-// specifies the coordinates of a box in the `box_ind[i]` image and is specified
-// in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of
-// `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the
-// `[0, 1]` interval of normalized image height is mapped to
-// `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in
-// which case the sampled crop is an up-down flipped version of the original
-// image. The width dimension is treated similarly. Normalized coordinates
-// outside the `[0, 1]` range are allowed, in which case we use
-// `extrapolation_value` to extrapolate the input image values.
-//	box_ind: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
-// The value of `box_ind[i]` specifies the image that the `i`-th box refers to.
-//	image_size: A 1-D tensor with value `[batch, image_height, image_width, depth]`
-// containing the original image size. Both `image_height` and `image_width` need
-// to be positive.
-//
-//
-// Returns A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
-func CropAndResizeGradImage(scope *Scope, grads tf.Output, boxes tf.Output, box_ind tf.Output, image_size tf.Output, T tf.DataType, optional ...CropAndResizeGradImageAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"T": T}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CropAndResizeGradImage",
-		Input: []tf.Input{
-			grads, boxes, box_ind, image_size,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// NonMaxSuppressionAttr is an optional argument to NonMaxSuppression.
-type NonMaxSuppressionAttr func(optionalAttr)
-
-// NonMaxSuppressionIouThreshold sets the optional iou_threshold attribute to value.
-//
-// value: A float representing the threshold for deciding whether boxes
-// overlap too much with respect to IOU.
-// If not specified, defaults to 0.5
-func NonMaxSuppressionIouThreshold(value float32) NonMaxSuppressionAttr {
-	return func(m optionalAttr) {
-		m["iou_threshold"] = value
-	}
-}
-
-// Greedily selects a subset of bounding boxes in descending order of score,
-//
-// pruning away boxes that have high intersection-over-union (IOU) overlap
-// with previously selected boxes.  Bounding boxes are supplied as
-// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
-// diagonal pair of box corners and the coordinates can be provided as normalized
-// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
-// is agnostic to where the origin is in the coordinate system.  Note that this
-// algorithm is invariant to orthogonal transformations and translations
-// of the coordinate system; thus translating or reflections of the coordinate
-// system result in the same boxes being selected by the algorithm.
-// The output of this operation is a set of integers indexing into the input
-// collection of bounding boxes representing the selected boxes.  The bounding
-// box coordinates corresponding to the selected indices can then be obtained
-// using the `tf.gather operation`.  For example:
-//   selected_indices = tf.image.non_max_suppression(
-//       boxes, scores, max_output_size, iou_threshold)
-//   selected_boxes = tf.gather(boxes, selected_indices)
-//
-// Arguments:
-//	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
-//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
-// score corresponding to each box (each row of boxes).
-//	max_output_size: A scalar integer tensor representing the maximum number of
-// boxes to be selected by non max suppression.
-//
-// Returns A 1-D integer tensor of shape `[M]` representing the selected
-// indices from the boxes tensor, where `M <= max_output_size`.
-func NonMaxSuppression(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, optional ...NonMaxSuppressionAttr) (selected_indices tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "NonMaxSuppression",
-		Input: []tf.Input{
-			boxes, scores, max_output_size,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that contains the elements of `input_dataset` ignoring errors.
-func ExperimentalIgnoreErrorsDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalIgnoreErrorsDataset",
-		Input: []tf.Input{
-			input_dataset,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// GatherV2Attr is an optional argument to GatherV2.
-type GatherV2Attr func(optionalAttr)
-
-// GatherV2BatchDims sets the optional batch_dims attribute to value.
-// If not specified, defaults to 0
-func GatherV2BatchDims(value int64) GatherV2Attr {
-	return func(m optionalAttr) {
-		m["batch_dims"] = value
-	}
-}
-
-// Gather slices from `params` axis `axis` according to `indices`.
-//
-// `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
-// Produces an output tensor with shape `params.shape[:axis] + indices.shape +
-// params.shape[axis + 1:]` where:
+// This kernel op implements the following mathematical equations:
 //
 // ```python
-//     # Scalar indices (output is rank(params) - 1).
-//     output[a_0, ..., a_n, b_0, ..., b_n] =
-//       params[a_0, ..., a_n, indices, b_0, ..., b_n]
+// xh = [x, h_prev]
+// [i, f, ci, o] = xh * w + b
+// f = f + forget_bias
 //
-//     # Vector indices (output is rank(params)).
-//     output[a_0, ..., a_n, i, b_0, ..., b_n] =
-//       params[a_0, ..., a_n, indices[i], b_0, ..., b_n]
+// if not use_peephole:
+//   wci = wcf = wco = 0
 //
-//     # Higher rank indices (output is rank(params) + rank(indices) - 1).
-//     output[a_0, ..., a_n, i, ..., j, b_0, ... b_n] =
-//       params[a_0, ..., a_n, indices[i, ..., j], b_0, ..., b_n]
+// i = sigmoid(cs_prev * wci + i)
+// f = sigmoid(cs_prev * wcf + f)
+// ci = tanh(ci)
+//
+// cs = ci .* i + cs_prev .* f
+// cs = clip(cs, cell_clip)
+//
+// o = sigmoid(cs * wco + o)
+// co = tanh(cs)
+// h = co .* o
 // ```
 //
-// <div style="width:70%; margin:auto; margin-bottom:10px; margin-top:20px;">
-// <img style="width:100%" src="https://www.tensorflow.org/images/Gather.png" alt>
-// </div>
-//
-// Note that on CPU, if an out of bound index is found, an error is returned.
-// On GPU, if an out of bound index is found, a 0 is stored in the
-// corresponding output value.
-//
-// See also `tf.batch_gather` and `tf.gather_nd`.
-//
 // Arguments:
-//	params: The tensor from which to gather values. Must be at least rank
-// `axis + 1`.
-//	indices: Index tensor. Must be in range `[0, params.shape[axis])`.
-//	axis: The axis in `params` to gather `indices` from. Defaults to the first
-// dimension. Supports negative indexes.
+//	x: The input to the LSTM cell, shape (batch_size, num_inputs).
+//	cs_prev: Value of the cell state at previous time step.
+//	h_prev: Output of the previous cell at previous time step.
+//	w: The weight matrix.
+//	wci: The weight matrix for input gate peephole connection.
+//	wcf: The weight matrix for forget gate peephole connection.
+//	wco: The weight matrix for output gate peephole connection.
+//	b: The bias vector.
 //
-// Returns Values from `params` gathered from indices given by `indices`, with
-// shape `params.shape[:axis] + indices.shape + params.shape[axis + 1:]`.
-func GatherV2(scope *Scope, params tf.Output, indices tf.Output, axis tf.Output, optional ...GatherV2Attr) (output tf.Output) {
+// Returns:
+//	i: The input gate.
+//	cs: The cell state before the tanh.
+//	f: The forget gate.
+//	o: The output gate.
+//	ci: The cell input.
+//	co: The cell after the tanh.
+//	h: The output h vector.
+func LSTMBlockCell(scope *Scope, x tf.Output, cs_prev tf.Output, h_prev tf.Output, w tf.Output, wci tf.Output, wcf tf.Output, wco tf.Output, b tf.Output, optional ...LSTMBlockCellAttr) (i tf.Output, cs tf.Output, f tf.Output, o tf.Output, ci tf.Output, co tf.Output, h tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -41885,487 +47705,9 @@ func GatherV2(scope *Scope, params tf.Output, indices tf.Output, axis tf.Output,
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "GatherV2",
+		Type: "LSTMBlockCell",
 		Input: []tf.Input{
-			params, indices, axis,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Greedily selects a subset of bounding boxes in descending order of score,
-//
-// pruning away boxes that have high intersection-over-union (IOU) overlap
-// with previously selected boxes.  Bounding boxes are supplied as
-// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
-// diagonal pair of box corners and the coordinates can be provided as normalized
-// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
-// is agnostic to where the origin is in the coordinate system.  Note that this
-// algorithm is invariant to orthogonal transformations and translations
-// of the coordinate system; thus translating or reflections of the coordinate
-// system result in the same boxes being selected by the algorithm.
-//
-// The output of this operation is a set of integers indexing into the input
-// collection of bounding boxes representing the selected boxes.  The bounding
-// box coordinates corresponding to the selected indices can then be obtained
-// using the `tf.gather operation`.  For example:
-//
-//   selected_indices = tf.image.non_max_suppression_v2(
-//       boxes, scores, max_output_size, iou_threshold)
-//   selected_boxes = tf.gather(boxes, selected_indices)
-//
-// Arguments:
-//	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
-//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
-// score corresponding to each box (each row of boxes).
-//	max_output_size: A scalar integer tensor representing the maximum number of
-// boxes to be selected by non max suppression.
-//	iou_threshold: A 0-D float tensor representing the threshold for deciding whether
-// boxes overlap too much with respect to IOU.
-//
-// Returns A 1-D integer tensor of shape `[M]` representing the selected
-// indices from the boxes tensor, where `M <= max_output_size`.
-func NonMaxSuppressionV2(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output) (selected_indices tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "NonMaxSuppressionV2",
-		Input: []tf.Input{
-			boxes, scores, max_output_size, iou_threshold,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Shuts down a running distributed TPU system.
-//
-// The op returns an error if no system is running.
-//
-// Returns the created operation.
-func ShutdownDistributedTPU(scope *Scope) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ShutdownDistributedTPU",
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Greedily selects a subset of bounding boxes in descending order of score,
-//
-// pruning away boxes that have high intersection-over-union (IOU) overlap
-// with previously selected boxes.  Bounding boxes with score less than
-// `score_threshold` are removed.  Bounding boxes are supplied as
-// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
-// diagonal pair of box corners and the coordinates can be provided as normalized
-// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
-// is agnostic to where the origin is in the coordinate system and more
-// generally is invariant to orthogonal transformations and translations
-// of the coordinate system; thus translating or reflections of the coordinate
-// system result in the same boxes being selected by the algorithm.
-// The output of this operation is a set of integers indexing into the input
-// collection of bounding boxes representing the selected boxes.  The bounding
-// box coordinates corresponding to the selected indices can then be obtained
-// using the `tf.gather operation`.  For example:
-//   selected_indices = tf.image.non_max_suppression_v2(
-//       boxes, scores, max_output_size, iou_threshold, score_threshold)
-//   selected_boxes = tf.gather(boxes, selected_indices)
-//
-// Arguments:
-//	boxes: A 2-D float tensor of shape `[num_boxes, 4]`.
-//	scores: A 1-D float tensor of shape `[num_boxes]` representing a single
-// score corresponding to each box (each row of boxes).
-//	max_output_size: A scalar integer tensor representing the maximum number of
-// boxes to be selected by non max suppression.
-//	iou_threshold: A 0-D float tensor representing the threshold for deciding whether
-// boxes overlap too much with respect to IOU.
-//	score_threshold: A 0-D float tensor representing the threshold for deciding when to remove
-// boxes based on score.
-//
-// Returns A 1-D integer tensor of shape `[M]` representing the selected
-// indices from the boxes tensor, where `M <= max_output_size`.
-func NonMaxSuppressionV3(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size tf.Output, iou_threshold tf.Output, score_threshold tf.Output) (selected_indices tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "NonMaxSuppressionV3",
-		Input: []tf.Input{
-			boxes, scores, max_output_size, iou_threshold, score_threshold,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// CombinedNonMaxSuppressionAttr is an optional argument to CombinedNonMaxSuppression.
-type CombinedNonMaxSuppressionAttr func(optionalAttr)
-
-// CombinedNonMaxSuppressionPadPerClass sets the optional pad_per_class attribute to value.
-//
-// value: If false, the output nmsed boxes, scores and classes
-// are padded/clipped to `max_total_size`. If true, the
-// output nmsed boxes, scores and classes are padded to be of length
-// `max_size_per_class`*`num_classes`, unless it exceeds `max_total_size` in
-// which case it is clipped to `max_total_size`. Defaults to false.
-// If not specified, defaults to false
-func CombinedNonMaxSuppressionPadPerClass(value bool) CombinedNonMaxSuppressionAttr {
-	return func(m optionalAttr) {
-		m["pad_per_class"] = value
-	}
-}
-
-// CombinedNonMaxSuppressionClipBoxes sets the optional clip_boxes attribute to value.
-//
-// value: If true, assume the box coordinates are between [0, 1] and clip the output boxes
-// if they fall beyond [0, 1]. If false, do not do clipping and output the box
-// coordinates as it is.
-// If not specified, defaults to true
-func CombinedNonMaxSuppressionClipBoxes(value bool) CombinedNonMaxSuppressionAttr {
-	return func(m optionalAttr) {
-		m["clip_boxes"] = value
-	}
-}
-
-// Greedily selects a subset of bounding boxes in descending order of score,
-//
-// This operation performs non_max_suppression on the inputs per batch, across
-// all classes.
-// Prunes away boxes that have high intersection-over-union (IOU) overlap
-// with previously selected boxes.  Bounding boxes are supplied as
-// [y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
-// diagonal pair of box corners and the coordinates can be provided as normalized
-// (i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
-// is agnostic to where the origin is in the coordinate system. Also note that
-// this algorithm is invariant to orthogonal transformations and translations
-// of the coordinate system; thus translating or reflections of the coordinate
-// system result in the same boxes being selected by the algorithm.
-// The output of this operation is the final boxes, scores and classes tensor
-// returned after performing non_max_suppression.
-//
-// Arguments:
-//	boxes: A 4-D float tensor of shape `[batch_size, num_boxes, q, 4]`. If `q` is 1 then
-// same boxes are used for all classes otherwise, if `q` is equal to number of
-// classes, class-specific boxes are used.
-//	scores: A 3-D float tensor of shape `[batch_size, num_boxes, num_classes]`
-// representing a single score corresponding to each box (each row of boxes).
-//	max_output_size_per_class: A scalar integer tensor representing the maximum number of
-// boxes to be selected by non max suppression per class
-//	max_total_size: A scalar representing maximum number of boxes retained over all classes.
-//	iou_threshold: A 0-D float tensor representing the threshold for deciding whether
-// boxes overlap too much with respect to IOU.
-//	score_threshold: A 0-D float tensor representing the threshold for deciding when to remove
-// boxes based on score.
-//
-// Returns A [batch_size, max_detections, 4] float32 tensor
-// containing the non-max suppressed boxes.A [batch_size, max_detections] float32 tensor
-// containing the scores for the boxes.A [batch_size, max_detections] float32 tensor
-// containing the classes for the boxes.A [batch_size] int32 tensor indicating the number of
-// valid detections per batch item. Only the top num_detections[i] entries in
-// nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the
-// entries are zero paddings.
-func CombinedNonMaxSuppression(scope *Scope, boxes tf.Output, scores tf.Output, max_output_size_per_class tf.Output, max_total_size tf.Output, iou_threshold tf.Output, score_threshold tf.Output, optional ...CombinedNonMaxSuppressionAttr) (nmsed_boxes tf.Output, nmsed_scores tf.Output, nmsed_classes tf.Output, valid_detections tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "CombinedNonMaxSuppression",
-		Input: []tf.Input{
-			boxes, scores, max_output_size_per_class, max_total_size, iou_threshold, score_threshold,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2), op.Output(3)
-}
-
-// Computes log softmax activations.
-//
-// For each batch `i` and class `j` we have
-//
-//     logsoftmax[i, j] = logits[i, j] - log(sum(exp(logits[i])))
-//
-// Arguments:
-//	logits: 2-D with shape `[batch_size, num_classes]`.
-//
-// Returns Same shape as `logits`.
-func LogSoftmax(scope *Scope, logits tf.Output) (logsoftmax tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "LogSoftmax",
-		Input: []tf.Input{
-			logits,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the next record (key, value pair) produced by a Reader.
-//
-// Will dequeue from the input queue if necessary (e.g. when the
-// Reader needs to start reading from a new file since it has finished
-// with the previous file).
-//
-// Arguments:
-//	reader_handle: Handle to a Reader.
-//	queue_handle: Handle to a Queue, with string work items.
-//
-// Returns A scalar.A scalar.
-func ReaderReadV2(scope *Scope, reader_handle tf.Output, queue_handle tf.Output) (key tf.Output, value tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ReaderReadV2",
-		Input: []tf.Input{
-			reader_handle, queue_handle,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1)
-}
-
-// Creates a dataset that emits the outputs of `input_dataset` `count` times.
-//
-// Arguments:
-//
-//	count: A scalar representing the number of times that `input_dataset` should
-// be repeated. A value of `-1` indicates that it should be repeated infinitely.
-//
-//
-func RepeatDataset(scope *Scope, input_dataset tf.Output, count tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "RepeatDataset",
-		Input: []tf.Input{
-			input_dataset, count,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a sequence of numbers.
-//
-// This operation creates a sequence of numbers that begins at `start` and
-// extends by increments of `delta` up to but not including `limit`.
-//
-// For example:
-//
-// ```
-// # 'start' is 3
-// # 'limit' is 18
-// # 'delta' is 3
-// tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15]
-// ```
-//
-// Arguments:
-//	start: 0-D (scalar). First entry in the sequence.
-//	limit: 0-D (scalar). Upper limit of sequence, exclusive.
-//	delta: 0-D (scalar). Optional. Default is 1. Number that increments `start`.
-//
-// Returns 1-D.
-func Range(scope *Scope, start tf.Output, limit tf.Output, delta tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Range",
-		Input: []tf.Input{
-			start, limit, delta,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// TryRpcAttr is an optional argument to TryRpc.
-type TryRpcAttr func(optionalAttr)
-
-// TryRpcProtocol sets the optional protocol attribute to value.
-//
-// value: RPC protocol to use.  Empty string means use the default protocol.
-// Options include 'grpc'.
-// If not specified, defaults to ""
-func TryRpcProtocol(value string) TryRpcAttr {
-	return func(m optionalAttr) {
-		m["protocol"] = value
-	}
-}
-
-// TryRpcFailFast sets the optional fail_fast attribute to value.
-//
-// value: `boolean`. If `true` (default), then failures to connect
-// (i.e., the server does not immediately respond) cause an RPC failure.
-// If not specified, defaults to true
-func TryRpcFailFast(value bool) TryRpcAttr {
-	return func(m optionalAttr) {
-		m["fail_fast"] = value
-	}
-}
-
-// TryRpcTimeoutInMs sets the optional timeout_in_ms attribute to value.
-//
-// value: `int`. If `0` (default), then the kernel will run the RPC
-// request and only time out if the RPC deadline passes or the session times out.
-// If this value is greater than `0`, then the op will raise an exception if
-// the RPC takes longer than `timeout_in_ms`.
-// If not specified, defaults to 0
-func TryRpcTimeoutInMs(value int64) TryRpcAttr {
-	return func(m optionalAttr) {
-		m["timeout_in_ms"] = value
-	}
-}
-
-// Perform batches of RPC requests.
-//
-// This op asynchronously performs either a single RPC request, or a batch
-// of requests.  RPC requests are defined by three main parameters:
-//
-//   - `address` (the host+port or BNS address of the request)
-//   - `method` (the method name for the request)
-//   - `request` (the serialized proto string, or vector of strings,
-//      of the RPC request argument).
-//
-// For example, if you have an RPC service running on port localhost:2345,
-// and its interface is configured with the following proto declaration:
-//
-// ```
-// service MyService {
-//   rpc MyMethod(MyRequestProto) returns (MyResponseProto) {
-//   }
-// };
-// ```
-//
-// then call this op with arguments:
-//
-// ```
-// address = "localhost:2345"
-// method = "MyService/MyMethod"
-// ```
-//
-// The `request` tensor is a string tensor representing serialized `MyRequestProto`
-// strings; and the output string tensor `response` will have the same shape
-// and contain (upon successful completion) corresponding serialized
-// `MyResponseProto` strings.
-//
-// For example, to send a single, empty, `MyRequestProto`, call
-// this op with `request = ""`.  To send 5 **parallel** empty requests,
-// call this op with `request = ["", "", "", "", ""]`.
-//
-// More generally, one can create a batch of `MyRequestProto` serialized protos
-// from regular batched tensors using the `encode_proto` op, and convert
-// the response `MyResponseProto` serialized protos to batched tensors
-// using the `decode_proto` op.
-//
-// **NOTE** Working with serialized proto strings is faster than instantiating
-// actual proto objects in memory, so no performance degradation is expected
-// compared to writing custom kernels for this workflow.
-//
-// Unlike the standard `Rpc` op, if the connection fails or the remote worker
-// returns an error status, this op does **not** reraise the exception.
-// Instead, the `status_code` and `status_message` entry for the corresponding RPC
-// call is set with the error returned from the RPC call.  The `response` tensor
-// will contain valid response values for those minibatch entries whose RPCs did
-// not fail; the rest of the entries will have empty strings.
-//
-// Arguments:
-//	address: `0-D` or `1-D`.  The address (i.e. host_name:port) of the RPC server.
-// If this tensor has more than 1 element, then multiple parallel rpc requests
-// are sent.  This argument broadcasts with `method` and `request`.
-//	method: `0-D` or `1-D`.  The method address on the RPC server.
-// If this tensor has more than 1 element, then multiple parallel rpc requests
-// are sent.  This argument broadcasts with `address` and `request`.
-//	request: `0-D` or `1-D`.  Serialized proto strings: the rpc request argument.
-// If this tensor has more than 1 element, then multiple parallel rpc requests
-// are sent.  This argument broadcasts with `address` and `method`.
-//
-// Returns Same shape as `request`. Serialized proto strings: the rpc responses.Same shape as `request`.  Values correspond to tensorflow Status enum codes.Same shape as `request`.  Values correspond to Status messages
-// returned from the RPC calls.
-func TryRpc(scope *Scope, address tf.Output, method tf.Output, request tf.Output, optional ...TryRpcAttr) (response tf.Output, status_code tf.Output, status_message tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "TryRpc",
-		Input: []tf.Input{
-			address, method, request,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// BoostedTreesSparseCalculateBestFeatureSplitAttr is an optional argument to BoostedTreesSparseCalculateBestFeatureSplit.
-type BoostedTreesSparseCalculateBestFeatureSplitAttr func(optionalAttr)
-
-// BoostedTreesSparseCalculateBestFeatureSplitSplitType sets the optional split_type attribute to value.
-//
-// value: A string indicating if this Op should perform inequality split or equality split.
-// If not specified, defaults to "inequality"
-func BoostedTreesSparseCalculateBestFeatureSplitSplitType(value string) BoostedTreesSparseCalculateBestFeatureSplitAttr {
-	return func(m optionalAttr) {
-		m["split_type"] = value
-	}
-}
-
-// Calculates gains for each feature and returns the best possible split information for the feature.
-//
-// The split information is the best threshold (bucket id), gains and left/right node contributions per node for each feature.
-//
-// It is possible that not all nodes can be split on each feature. Hence, the list of possible nodes can differ between the features. Therefore, we return `node_ids_list` for each feature, containing the list of nodes that this feature can be used to split.
-//
-// In this manner, the output is the best split per features and per node, so that it needs to be combined later to produce the best split for each node (among all possible features).
-//
-// The output shapes are compatible in a way that the first dimension of all tensors are the same and equal to the number of possible split nodes for each feature.
-//
-// Arguments:
-//	node_id_range: A Rank 1 tensor (shape=[2]) to specify the range [first, last) of node ids to process within `stats_summary_list`. The nodes are iterated between the two nodes specified by the tensor, as like `for node_id in range(node_id_range[0], node_id_range[1])` (Note that the last index node_id_range[1] is exclusive).
-//	stats_summary_indices: A Rank 2 int64 tensor of dense shape [N, 4] (N specifies the number of non-zero values) for accumulated stats summary (gradient/hessian) per node per bucket for each feature. The second dimension contains node id, feature dimension, bucket id, and stats dim.
-// stats dim is the sum of logits dimension and hessian dimension, hessian dimension can either be logits dimension if diagonal hessian is used, or logits dimension^2 if full hessian is used.
-//	stats_summary_values: A Rank 1 float tensor of dense shape [N] (N specifies the number of non-zero values), which supplies the values for each element in summary_indices.
-//	stats_summary_shape: A Rank 1 float tensor of dense shape [4], which specifies the dense shape of the sparse tensor, which is [num tree nodes, feature dimensions, num buckets, stats dim].
-//	l1: l1 regularization factor on leaf weights, per instance based.
-//	l2: l2 regularization factor on leaf weights, per instance based.
-//	tree_complexity: adjustment to the gain, per leaf based.
-//	min_node_weight: mininum avg of hessians in a node before required for the node to be considered for splitting.
-//	logits_dimension: The dimension of logit, i.e., number of classes.
-//
-// Returns A Rank 1 tensor indicating possible node ids that can be split.A Rank 1 tensor indicating the best gains to split each node.A Rank 1 tensor indicating the best feature dimension for each feature to split for each node.A Rank 1 tensor indicating the bucket id to compare with (as a threshold) for split in each node.A Rank 2 tensor indicating the contribution of the left nodes when branching from parent nodes to the left direction by the given threshold for each feature.
-// This value will be used to make the left node value by adding to the parent node value. Second dimension size is logits dimension.A Rank 2 tensor, with the same shape/conditions as left_node_contribs_list, but just that the value is for the right node.A Rank 1 tensor indicating which direction to go if data is missing.
-func BoostedTreesSparseCalculateBestFeatureSplit(scope *Scope, node_id_range tf.Output, stats_summary_indices tf.Output, stats_summary_values tf.Output, stats_summary_shape tf.Output, l1 tf.Output, l2 tf.Output, tree_complexity tf.Output, min_node_weight tf.Output, logits_dimension int64, optional ...BoostedTreesSparseCalculateBestFeatureSplitAttr) (node_ids tf.Output, gains tf.Output, feature_dimensions tf.Output, thresholds tf.Output, left_node_contribs tf.Output, right_node_contribs tf.Output, split_with_default_directions tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"logits_dimension": logits_dimension}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "BoostedTreesSparseCalculateBestFeatureSplit",
-		Input: []tf.Input{
-			node_id_range, stats_summary_indices, stats_summary_values, stats_summary_shape, l1, l2, tree_complexity, min_node_weight,
+			x, cs_prev, h_prev, w, wci, wcf, wco, b,
 		},
 		Attrs: attrs,
 	}
@@ -42373,456 +47715,59 @@ func BoostedTreesSparseCalculateBestFeatureSplit(scope *Scope, node_id_range tf.
 	return op.Output(0), op.Output(1), op.Output(2), op.Output(3), op.Output(4), op.Output(5), op.Output(6)
 }
 
-// EnqueueTPUEmbeddingIntegerBatchAttr is an optional argument to EnqueueTPUEmbeddingIntegerBatch.
-type EnqueueTPUEmbeddingIntegerBatchAttr func(optionalAttr)
-
-// EnqueueTPUEmbeddingIntegerBatchDeviceOrdinal sets the optional device_ordinal attribute to value.
+// The gradient of SparseFillEmptyRows.
 //
-// value: The TPU device to use. Should be >= 0 and less than the number
-// of TPU cores in the task on which the node is placed.
-// If not specified, defaults to -1
-func EnqueueTPUEmbeddingIntegerBatchDeviceOrdinal(value int64) EnqueueTPUEmbeddingIntegerBatchAttr {
-	return func(m optionalAttr) {
-		m["device_ordinal"] = value
-	}
-}
-
-// An op that enqueues a list of input batch tensors to TPUEmbedding.
+// Takes vectors reverse_index_map, shaped `[N]`, and grad_values,
+// shaped `[N_full]`, where `N_full >= N` and copies data into either
+// `d_values` or `d_default_value`.  Here `d_values` is shaped `[N]` and
+// `d_default_value` is a scalar.
+//
+//   d_values[j] = grad_values[reverse_index_map[j]]
+//   d_default_value = sum_{k : 0 .. N_full - 1} (
+//      grad_values[k] * 1{k not in reverse_index_map})
 //
 // Arguments:
-//	batch: A list of 1D tensors, one for each embedding table, containing the
-// indices into the tables.
-//	mode_override: A string input that overrides the mode specified in the
-// TPUEmbeddingConfiguration. Supported values are {'unspecified', 'inference',
-// 'training', 'backward_pass_only'}. When set to 'unspecified', the mode set
-// in TPUEmbeddingConfiguration is used, otherwise mode_override is used.
+//	reverse_index_map: 1-D.  The reverse index map from SparseFillEmptyRows.
+//	grad_values: 1-D.  The gradients from backprop.
 //
-// Returns the created operation.
-func EnqueueTPUEmbeddingIntegerBatch(scope *Scope, batch []tf.Output, mode_override tf.Output, optional ...EnqueueTPUEmbeddingIntegerBatchAttr) (o *tf.Operation) {
+// Returns:
+//	d_values: 1-D.  The backprop into values.
+//	d_default_value: 0-D.  The backprop into default_value.
+func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_values tf.Output) (d_values tf.Output, d_default_value tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
 	opspec := tf.OpSpec{
-		Type: "EnqueueTPUEmbeddingIntegerBatch",
+		Type: "SparseFillEmptyRowsGrad",
 		Input: []tf.Input{
-			tf.OutputList(batch), mode_override,
-		},
-		Attrs: attrs,
-	}
-	return scope.AddOperation(opspec)
-}
-
-//   This op is used as a placeholder in If branch functions. It doesn't provide a
-//   valid output when run, so must either be removed (e.g. replaced with a
-//   function input) or guaranteed not to be used (e.g. if mirroring an
-//   intermediate output needed for the gradient computation of the other branch).
-//
-// Arguments:
-//	dtype: The type of the output.
-//	shape:     The purported shape of the output. This is only used for shape inference;
-//     the output will not necessarily have this shape. Can be a partial shape.
-//
-// Returns     \"Fake\" output value. This should not be consumed by another op.
-func FakeParam(scope *Scope, dtype tf.DataType, shape tf.Shape) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtype": dtype, "shape": shape}
-	opspec := tf.OpSpec{
-		Type: "FakeParam",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that shards the input dataset.
-//
-// Creates a dataset that shards the input dataset by num_workers, returning a
-// sharded dataset for the index-th worker. This attempts to automatically shard
-// a dataset by examining the Dataset graph and inserting a shard op before the
-// inputs to a reader Dataset (e.g. CSVDataset, TFRecordDataset).
-//
-// This dataset will throw a NotFound error if we cannot shard the dataset
-// automatically.
-//
-// Arguments:
-//	input_dataset: A variant tensor representing the input dataset.
-//	num_workers: A scalar representing the number of workers to distribute this dataset across.
-//	index: A scalar representing the index of the current worker out of num_workers.
-//
-//
-func AutoShardDataset(scope *Scope, input_dataset tf.Output, num_workers tf.Output, index tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "AutoShardDataset",
-		Input: []tf.Input{
-			input_dataset, num_workers, index,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// StringLengthAttr is an optional argument to StringLength.
-type StringLengthAttr func(optionalAttr)
-
-// StringLengthUnit sets the optional unit attribute to value.
-//
-// value: The unit that is counted to compute string length.  One of: `"BYTE"` (for
-// the number of bytes in each string) or `"UTF8_CHAR"` (for the number of UTF-8
-// encoded Unicode code points in each string).  Results are undefined
-// if `unit=UTF8_CHAR` and the `input` strings do not contain structurally
-// valid UTF-8.
-// If not specified, defaults to "BYTE"
-func StringLengthUnit(value string) StringLengthAttr {
-	return func(m optionalAttr) {
-		m["unit"] = value
-	}
-}
-
-// String lengths of `input`.
-//
-// Computes the length of each string given in the input tensor.
-//
-// Arguments:
-//	input: The string for which to compute the length.
-//
-// Returns Integer tensor that has the same shape as `input`. The output contains the
-// element-wise string lengths of `input`.
-func StringLength(scope *Scope, input tf.Output, optional ...StringLengthAttr) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "StringLength",
-		Input: []tf.Input{
-			input,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Determine the script codes of a given tensor of Unicode integer code points.
-//
-// This operation converts Unicode code points to script codes corresponding to
-// each code point. Script codes correspond to International Components for
-// Unicode (ICU) UScriptCode values. See http://icu-project.org/apiref/icu4c/uscript_8h.html.
-// Returns -1 (USCRIPT_INVALID_CODE) for invalid codepoints. Output shape will
-// match input shape.
-//
-// Arguments:
-//	input: A Tensor of int32 Unicode code points.
-//
-// Returns A Tensor of int32 script codes corresponding to each input code point.
-func UnicodeScript(scope *Scope, input tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "UnicodeScript",
-		Input: []tf.Input{
-			input,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Records the bytes size of each element of `input_dataset` in a StatsAggregator.
-func ExperimentalBytesProducedStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalBytesProducedStatsDataset",
-		Input: []tf.Input{
-			input_dataset, tag,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Return the reduction indices for computing gradients of s0 op s1 with broadcast.
-//
-// This is typically used by gradient computations for a broadcasting operation.
-func BroadcastGradientArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output, r1 tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "BroadcastGradientArgs",
-		Input: []tf.Input{
-			s0, s1,
+			reverse_index_map, grad_values,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0), op.Output(1)
 }
 
-// Returns the cardinality of `input_dataset`.
+// SerializeSparseAttr is an optional argument to SerializeSparse.
+type SerializeSparseAttr func(optionalAttr)
+
+// SerializeSparseOutType sets the optional out_type attribute to value.
 //
-// Returns the cardinality of `input_dataset`.
+// value: The `dtype` to use for serialization; the supported types are `string`
+// (default) and `variant`.
+// If not specified, defaults to DT_STRING
+func SerializeSparseOutType(value tf.DataType) SerializeSparseAttr {
+	return func(m optionalAttr) {
+		m["out_type"] = value
+	}
+}
+
+// Serialize a `SparseTensor` into a `[3]` `Tensor` object.
 //
 // Arguments:
-//	input_dataset: A variant tensor representing the dataset to return cardinality for.
-//
-// Returns The cardinality of `input_dataset`. Named constants are used to represent
-// infinite and unknown cardinality.
-func DatasetCardinality(scope *Scope, input_dataset tf.Output) (cardinality tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DatasetCardinality",
-		Input: []tf.Input{
-			input_dataset,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the cardinality of `input_dataset`.
-//
-// Returns the cardinality of `input_dataset`.
-//
-// Arguments:
-//	input_dataset: A variant tensor representing the dataset to return cardinality for.
-//
-// Returns The cardinality of `input_dataset`. Named constants are used to represent
-// infinite and unknown cardinality.
-func ExperimentalDatasetCardinality(scope *Scope, input_dataset tf.Output) (cardinality tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalDatasetCardinality",
-		Input: []tf.Input{
-			input_dataset,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Writes the given dataset to the given file using the TFRecord format.
-//
-// Arguments:
-//	input_dataset: A variant tensor representing the dataset to write.
-//	filename: A scalar string tensor representing the filename to use.
-//	compression_type: A scalar string tensor containing either (i) the empty string (no
-// compression), (ii) "ZLIB", or (iii) "GZIP".
-//
-// Returns the created operation.
-func DatasetToTFRecord(scope *Scope, input_dataset tf.Output, filename tf.Output, compression_type tf.Output) (o *tf.Operation) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "DatasetToTFRecord",
-		Input: []tf.Input{
-			input_dataset, filename, compression_type,
-		},
-	}
-	return scope.AddOperation(opspec)
-}
-
-// Concatenates tensors along one dimension.
-//
-// Arguments:
-//	concat_dim: 0-D.  The dimension along which to concatenate.  Must be in the
-// range [0, rank(values)).
-//	values: The `N` Tensors to concatenate. Their ranks and types must match,
-// and their sizes must match in all dimensions except `concat_dim`.
-//
-// Returns A `Tensor` with the concatenation of values stacked along the
-// `concat_dim` dimension.  This tensor's shape matches that of `values` except
-// in `concat_dim` where it has the sum of the sizes.
-func Concat(scope *Scope, concat_dim tf.Output, values []tf.Output) (output tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "Concat",
-		Input: []tf.Input{
-			concat_dim, tf.OutputList(values),
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a dataset that batches input elements into a SparseTensor.
-//
-// Arguments:
-//	input_dataset: A handle to an input dataset. Must have a single component.
-//	batch_size: A scalar representing the number of elements to accumulate in a
-// batch.
-//	row_shape: A vector representing the dense shape of each row in the produced
-// SparseTensor. The shape may be partially specified, using `-1` to indicate
-// that a particular dimension should use the maximum size of all batch elements.
-//
-//
-func ExperimentalDenseToSparseBatchDataset(scope *Scope, input_dataset tf.Output, batch_size tf.Output, row_shape tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalDenseToSparseBatchDataset",
-		Input: []tf.Input{
-			input_dataset, batch_size, row_shape,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// A substitute for `InterleaveDataset` on a fixed list of `N` datasets.
-//
-// Arguments:
-//	selector_input_dataset: A dataset of scalar `DT_INT64` elements that determines which of the
-// `N` data inputs should produce the next output element.
-//	data_input_datasets: `N` datasets with the same type that will be interleaved according to
-// the values of `selector_input_dataset`.
-//
-//
-func ExperimentalDirectedInterleaveDataset(scope *Scope, selector_input_dataset tf.Output, data_input_datasets []tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalDirectedInterleaveDataset",
-		Input: []tf.Input{
-			selector_input_dataset, tf.OutputList(data_input_datasets),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// OrderedMapIncompleteSizeAttr is an optional argument to OrderedMapIncompleteSize.
-type OrderedMapIncompleteSizeAttr func(optionalAttr)
-
-// OrderedMapIncompleteSizeCapacity sets the optional capacity attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func OrderedMapIncompleteSizeCapacity(value int64) OrderedMapIncompleteSizeAttr {
-	return func(m optionalAttr) {
-		m["capacity"] = value
-	}
-}
-
-// OrderedMapIncompleteSizeMemoryLimit sets the optional memory_limit attribute to value.
-// If not specified, defaults to 0
-//
-// REQUIRES: value >= 0
-func OrderedMapIncompleteSizeMemoryLimit(value int64) OrderedMapIncompleteSizeAttr {
-	return func(m optionalAttr) {
-		m["memory_limit"] = value
-	}
-}
-
-// OrderedMapIncompleteSizeContainer sets the optional container attribute to value.
-// If not specified, defaults to ""
-func OrderedMapIncompleteSizeContainer(value string) OrderedMapIncompleteSizeAttr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// OrderedMapIncompleteSizeSharedName sets the optional shared_name attribute to value.
-// If not specified, defaults to ""
-func OrderedMapIncompleteSizeSharedName(value string) OrderedMapIncompleteSizeAttr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// Op returns the number of incomplete elements in the underlying container.
-func OrderedMapIncompleteSize(scope *Scope, dtypes []tf.DataType, optional ...OrderedMapIncompleteSizeAttr) (size tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"dtypes": dtypes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "OrderedMapIncompleteSize",
-
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// PrintAttr is an optional argument to Print.
-type PrintAttr func(optionalAttr)
-
-// PrintMessage sets the optional message attribute to value.
-//
-// value: A string, prefix of the error message.
-// If not specified, defaults to ""
-func PrintMessage(value string) PrintAttr {
-	return func(m optionalAttr) {
-		m["message"] = value
-	}
-}
-
-// PrintFirstN sets the optional first_n attribute to value.
-//
-// value: Only log `first_n` number of times. -1 disables logging.
-// If not specified, defaults to -1
-func PrintFirstN(value int64) PrintAttr {
-	return func(m optionalAttr) {
-		m["first_n"] = value
-	}
-}
-
-// PrintSummarize sets the optional summarize attribute to value.
-//
-// value: Only print this many entries of each tensor.
-// If not specified, defaults to 3
-func PrintSummarize(value int64) PrintAttr {
-	return func(m optionalAttr) {
-		m["summarize"] = value
-	}
-}
-
-// Prints a list of tensors.
-//
-// Passes `input` through to `output` and prints `data` when evaluating.
-//
-// Arguments:
-//	input: The tensor passed to `output`
-//	data: A list of tensors to print out when op is evaluated.
-//
-// Returns = The unmodified `input` tensor
-func Print(scope *Scope, input tf.Output, data []tf.Output, optional ...PrintAttr) (output tf.Output) {
+//	sparse_indices: 2-D.  The `indices` of the `SparseTensor`.
+//	sparse_values: 1-D.  The `values` of the `SparseTensor`.
+//	sparse_shape: 1-D.  The `shape` of the `SparseTensor`.
+func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeSparseAttr) (serialized_sparse tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
@@ -42831,9 +47776,9 @@ func Print(scope *Scope, input tf.Output, data []tf.Output, optional ...PrintAtt
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "Print",
+		Type: "SerializeSparse",
 		Input: []tf.Input{
-			input, tf.OutputList(data),
+			sparse_indices, sparse_values, sparse_shape,
 		},
 		Attrs: attrs,
 	}
@@ -42841,72 +47786,99 @@ func Print(scope *Scope, input tf.Output, data []tf.Output, optional ...PrintAtt
 	return op.Output(0)
 }
 
-// Returns the value stored in an Optional variant or raises an error if none exists.
-func OptionalGetValue(scope *Scope, optional tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (components []tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "OptionalGetValue",
-		Input: []tf.Input{
-			optional,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	if scope.Err() != nil {
-		return
-	}
-	var idx int
-	var err error
-	if components, idx, err = makeOutputList(op, idx, "components"); err != nil {
-		scope.UpdateErr("OptionalGetValue", err)
-		return
-	}
-	return components
-}
-
-// MaxPool3DAttr is an optional argument to MaxPool3D.
-type MaxPool3DAttr func(optionalAttr)
-
-// MaxPool3DDataFormat sets the optional data_format attribute to value.
+// Constructs a tensor by tiling a given tensor.
 //
-// value: The data format of the input and output data. With the
-// default format "NDHWC", the data is stored in the order of:
-//     [batch, in_depth, in_height, in_width, in_channels].
-// Alternatively, the format could be "NCDHW", the data storage order is:
-//     [batch, in_channels, in_depth, in_height, in_width].
-// If not specified, defaults to "NDHWC"
-func MaxPool3DDataFormat(value string) MaxPool3DAttr {
-	return func(m optionalAttr) {
-		m["data_format"] = value
-	}
-}
-
-// Performs 3D max pooling on the input.
+// This operation creates a new tensor by replicating `input` `multiples` times.
+// The output tensor's i'th dimension has `input.dims(i) * multiples[i]` elements,
+// and the values of `input` are replicated `multiples[i]` times along the 'i'th
+// dimension. For example, tiling `[a b c d]` by `[2]` produces
+// `[a b c d a b c d]`.
+//
+// >>> a = tf.constant([[1,2,3],[4,5,6]], tf.int32)
+// >>> b = tf.constant([1,2], tf.int32)
+// >>> tf.tile(a, b)
+// <tf.Tensor: shape=(2, 6), dtype=int32, numpy=
+// array([[1, 2, 3, 1, 2, 3],
+//        [4, 5, 6, 4, 5, 6]], dtype=int32)>
+// >>> c = tf.constant([2,1], tf.int32)
+// >>> tf.tile(a, c)
+// <tf.Tensor: shape=(4, 3), dtype=int32, numpy=
+// array([[1, 2, 3],
+//        [4, 5, 6],
+//        [1, 2, 3],
+//        [4, 5, 6]], dtype=int32)>
+// >>> d = tf.constant([2,2], tf.int32)
+// >>> tf.tile(a, d)
+// <tf.Tensor: shape=(4, 6), dtype=int32, numpy=
+// array([[1, 2, 3, 1, 2, 3],
+//        [4, 5, 6, 4, 5, 6],
+//        [1, 2, 3, 1, 2, 3],
+//        [4, 5, 6, 4, 5, 6]], dtype=int32)>
 //
 // Arguments:
-//	input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over.
-//	ksize: 1-D tensor of length 5. The size of the window for each dimension of
-// the input tensor. Must have `ksize[0] = ksize[4] = 1`.
-//	strides: 1-D tensor of length 5. The stride of the sliding window for each
-// dimension of `input`. Must have `strides[0] = strides[4] = 1`.
-//	padding: The type of padding algorithm to use.
-//
-// Returns The max pooled output tensor.
-func MaxPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DAttr) (output tf.Output) {
+//	input: 1-D or higher.
+//	multiples: 1-D. Length must be the same as the number of dimensions in `input`
+func Tile(scope *Scope, input tf.Output, multiples tf.Output) (output tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
-	attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding}
+	opspec := tf.OpSpec{
+		Type: "Tile",
+		Input: []tf.Input{
+			input, multiples,
+		},
+	}
+	op := scope.AddOperation(opspec)
+	return op.Output(0)
+}
+
+// RandomUniformAttr is an optional argument to RandomUniform.
+type RandomUniformAttr func(optionalAttr)
+
+// RandomUniformSeed sets the optional seed attribute to value.
+//
+// value: If either `seed` or `seed2` are set to be non-zero, the random number
+// generator is seeded by the given seed.  Otherwise, it is seeded by a
+// random seed.
+// If not specified, defaults to 0
+func RandomUniformSeed(value int64) RandomUniformAttr {
+	return func(m optionalAttr) {
+		m["seed"] = value
+	}
+}
+
+// RandomUniformSeed2 sets the optional seed2 attribute to value.
+//
+// value: A second seed to avoid seed collision.
+// If not specified, defaults to 0
+func RandomUniformSeed2(value int64) RandomUniformAttr {
+	return func(m optionalAttr) {
+		m["seed2"] = value
+	}
+}
+
+// Outputs random values from a uniform distribution.
+//
+// The generated values follow a uniform distribution in the range `[0, 1)`. The
+// lower bound 0 is included in the range, while the upper bound 1 is excluded.
+//
+// Arguments:
+//	shape: The shape of the output tensor.
+//	dtype: The type of the output.
+//
+// Returns A tensor of the specified shape filled with uniform random values.
+func RandomUniform(scope *Scope, shape tf.Output, dtype tf.DataType, optional ...RandomUniformAttr) (output tf.Output) {
+	if scope.Err() != nil {
+		return
+	}
+	attrs := map[string]interface{}{"dtype": dtype}
 	for _, a := range optional {
 		a(attrs)
 	}
 	opspec := tf.OpSpec{
-		Type: "MaxPool3D",
+		Type: "RandomUniform",
 		Input: []tf.Input{
-			input,
+			shape,
 		},
 		Attrs: attrs,
 	}
@@ -42914,183 +47886,63 @@ func MaxPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, pa
 	return op.Output(0)
 }
 
-// Creates a dataset that contains the elements of `input_dataset` ignoring errors.
-func IgnoreErrorsDataset(scope *Scope, input_dataset tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "IgnoreErrorsDataset",
-		Input: []tf.Input{
-			input_dataset,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Returns the name of the device on which `resource` has been placed.
-func IteratorGetDevice(scope *Scope, resource tf.Output) (device tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "IteratorGetDevice",
-		Input: []tf.Input{
-			resource,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// MutableDenseHashTableV2Attr is an optional argument to MutableDenseHashTableV2.
-type MutableDenseHashTableV2Attr func(optionalAttr)
-
-// MutableDenseHashTableV2Container sets the optional container attribute to value.
+// Convert JSON-encoded Example records to binary protocol buffer strings.
 //
-// value: If non-empty, this table is placed in the given container.
-// Otherwise, a default container is used.
-// If not specified, defaults to ""
-func MutableDenseHashTableV2Container(value string) MutableDenseHashTableV2Attr {
-	return func(m optionalAttr) {
-		m["container"] = value
-	}
-}
-
-// MutableDenseHashTableV2SharedName sets the optional shared_name attribute to value.
-//
-// value: If non-empty, this table is shared under the given name across
-// multiple sessions.
-// If not specified, defaults to ""
-func MutableDenseHashTableV2SharedName(value string) MutableDenseHashTableV2Attr {
-	return func(m optionalAttr) {
-		m["shared_name"] = value
-	}
-}
-
-// MutableDenseHashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value.
-// If not specified, defaults to false
-func MutableDenseHashTableV2UseNodeNameSharing(value bool) MutableDenseHashTableV2Attr {
-	return func(m optionalAttr) {
-		m["use_node_name_sharing"] = value
-	}
-}
-
-// MutableDenseHashTableV2ValueShape sets the optional value_shape attribute to value.
-//
-// value: The shape of each value.
-// If not specified, defaults to <>
-func MutableDenseHashTableV2ValueShape(value tf.Shape) MutableDenseHashTableV2Attr {
-	return func(m optionalAttr) {
-		m["value_shape"] = value
-	}
-}
-
-// MutableDenseHashTableV2InitialNumBuckets sets the optional initial_num_buckets attribute to value.
-//
-// value: The initial number of hash table buckets. Must be a power
-// to 2.
-// If not specified, defaults to 131072
-func MutableDenseHashTableV2InitialNumBuckets(value int64) MutableDenseHashTableV2Attr {
-	return func(m optionalAttr) {
-		m["initial_num_buckets"] = value
-	}
-}
-
-// MutableDenseHashTableV2MaxLoadFactor sets the optional max_load_factor attribute to value.
-//
-// value: The maximum ratio between number of entries and number of
-// buckets before growing the table. Must be between 0 and 1.
-// If not specified, defaults to 0.8
-func MutableDenseHashTableV2MaxLoadFactor(value float32) MutableDenseHashTableV2Attr {
-	return func(m optionalAttr) {
-		m["max_load_factor"] = value
-	}
-}
-
-// Creates an empty hash table that uses tensors as the backing store.
-//
-// It uses "open addressing" with quadratic reprobing to resolve
-// collisions.
-//
-// This op creates a mutable hash table, specifying the type of its keys and
-// values. Each value must be a scalar. Data can be inserted into the table using
-// the insert operations. It does not support the initialization operation.
+// This op translates a tensor containing Example records, encoded using
+// the [standard JSON
+// mapping](https://developers.google.com/protocol-buffers/docs/proto3#json),
+// into a tensor containing the same records encoded as binary protocol
+// buffers. The resulting tensor can then be fed to any of the other
+// Example-parsing ops.
 //
 // Arguments:
-//	empty_key: The key used to represent empty key buckets internally. Must not
-// be used in insert or lookup operations.
+//	json_examples: Each string is a JSON object serialized according to the JSON
+// mapping of the Example proto.
 //
-//	value_dtype: Type of the table values.
-//
-// Returns Handle to a table.
-func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, deleted_key tf.Output, value_dtype tf.DataType, optional ...MutableDenseHashTableV2Attr) (table_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"value_dtype": value_dtype}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "MutableDenseHashTableV2",
-		Input: []tf.Input{
-			empty_key, deleted_key,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Records the latency of producing `input_dataset` elements in a StatsAggregator.
-func LatencyStatsDataset(scope *Scope, input_dataset tf.Output, tag tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "LatencyStatsDataset",
-		Input: []tf.Input{
-			input_dataset, tag,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Return the shape of s0 op s1 with broadcast.
-//
-// Given `s0` and `s1`, tensors that represent shapes, compute `r0`, the
-// broadcasted shape. `s0`, `s1` and `r0` are all integer vectors.
-func BroadcastArgs(scope *Scope, s0 tf.Output, s1 tf.Output) (r0 tf.Output) {
+// Returns Each string is a binary Example protocol buffer corresponding
+// to the respective element of `json_examples`.
+func DecodeJSONExample(scope *Scope, json_examples tf.Output) (binary_examples tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "BroadcastArgs",
+		Type: "DecodeJSONExample",
 		Input: []tf.Input{
-			s0, s1,
+			json_examples,
 		},
 	}
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
 
-// Returns the truth value of (x > y) element-wise.
+// Elementwise computes the bitwise AND of `x` and `y`.
 //
-// *NOTE*: `Greater` supports broadcasting. More about broadcasting
-// [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
-func Greater(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
+// The result will have those bits set, that are set in both `x` and `y`. The
+// computation is performed on the underlying representations of `x` and `y`.
+//
+// For example:
+//
+// ```python
+// import tensorflow as tf
+// from tensorflow.python.ops import bitwise_ops
+// dtype_list = [tf.int8, tf.int16, tf.int32, tf.int64,
+//               tf.uint8, tf.uint16, tf.uint32, tf.uint64]
+//
+// for dtype in dtype_list:
+//   lhs = tf.constant([0, 5, 3, 14], dtype=dtype)
+//   rhs = tf.constant([5, 0, 7, 11], dtype=dtype)
+//   exp = tf.constant([0, 0, 3, 10], dtype=tf.float32)
+//
+//   res = bitwise_ops.bitwise_and(lhs, rhs)
+//   tf.assert_equal(tf.cast(res, tf.float32), exp) # TRUE
+// ```
+//
+func BitwiseAnd(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	if scope.Err() != nil {
 		return
 	}
 	opspec := tf.OpSpec{
-		Type: "Greater",
+		Type: "BitwiseAnd",
 		Input: []tf.Input{
 			x, y,
 		},
@@ -43098,240 +47950,3 @@ func Greater(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) {
 	op := scope.AddOperation(opspec)
 	return op.Output(0)
 }
-
-// Creates a dataset that overrides the maximum intra-op parallelism.
-//
-// Arguments:
-//
-//	max_intra_op_parallelism: Identifies the maximum intra-op parallelism to use.
-//
-//
-func MaxIntraOpParallelismDataset(scope *Scope, input_dataset tf.Output, max_intra_op_parallelism tf.Output, output_types []tf.DataType, output_shapes []tf.Shape) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"output_types": output_types, "output_shapes": output_shapes}
-	opspec := tf.OpSpec{
-		Type: "MaxIntraOpParallelismDataset",
-		Input: []tf.Input{
-			input_dataset, max_intra_op_parallelism,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// QuantizedMatMulWithBiasAttr is an optional argument to QuantizedMatMulWithBias.
-type QuantizedMatMulWithBiasAttr func(optionalAttr)
-
-// QuantizedMatMulWithBiasToutput sets the optional Toutput attribute to value.
-// If not specified, defaults to DT_QINT32
-func QuantizedMatMulWithBiasToutput(value tf.DataType) QuantizedMatMulWithBiasAttr {
-	return func(m optionalAttr) {
-		m["Toutput"] = value
-	}
-}
-
-// QuantizedMatMulWithBiasTransposeA sets the optional transpose_a attribute to value.
-//
-// value: If true, `a` is transposed before multiplication.
-// If not specified, defaults to false
-func QuantizedMatMulWithBiasTransposeA(value bool) QuantizedMatMulWithBiasAttr {
-	return func(m optionalAttr) {
-		m["transpose_a"] = value
-	}
-}
-
-// QuantizedMatMulWithBiasTransposeB sets the optional transpose_b attribute to value.
-//
-// value: If true, `b` is transposed before multiplication.
-// If not specified, defaults to false
-func QuantizedMatMulWithBiasTransposeB(value bool) QuantizedMatMulWithBiasAttr {
-	return func(m optionalAttr) {
-		m["transpose_b"] = value
-	}
-}
-
-// QuantizedMatMulWithBiasInputQuantMode sets the optional input_quant_mode attribute to value.
-//
-// value: Input data quantization mode. Either MIN_FIRST(default) or SCALED.
-// If not specified, defaults to "MIN_FIRST"
-func QuantizedMatMulWithBiasInputQuantMode(value string) QuantizedMatMulWithBiasAttr {
-	return func(m optionalAttr) {
-		m["input_quant_mode"] = value
-	}
-}
-
-// Performs a quantized matrix multiplication of `a` by the matrix `b` with bias
-// add.
-//
-// The inputs must be two-dimensional matrices and 1D bias vector. And the inner
-// dimension of `a` (after being transposed if `transpose_a` is non-zero) must
-// match the outer dimension of `b` (after being transposed if `transposed_b` is
-// non-zero). Then do broadcast add operation with bias values on the matrix
-// mulplication result. The bias size must match inner dimension of `b`.
-//
-// Arguments:
-//	a: A matrix to be multiplied. Must be a two-dimensional tensor of type `quint8`.
-//	b: A matrix to be multiplied and must be a two-dimensional tensor of type `qint8`.
-//	bias: A 1D bias tensor with size matching inner dimension of `b` (after being
-// transposed if `transposed_b` is non-zero).
-//	min_a: The float value that the lowest quantized `a` value represents.
-//	max_a: The float value that the highest quantized `a` value represents.
-//	min_b: The float value that the lowest quantized `b` value represents.
-//	max_b: The float value that the highest quantized `b` value represents.
-//
-// Returns The float value that the lowest quantized output value represents.The float value that the highest quantized output value represents.
-func QuantizedMatMulWithBias(scope *Scope, a tf.Output, b tf.Output, bias tf.Output, min_a tf.Output, max_a tf.Output, min_b tf.Output, max_b tf.Output, optional ...QuantizedMatMulWithBiasAttr) (out tf.Output, min_out tf.Output, max_out tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "QuantizedMatMulWithBias",
-		Input: []tf.Input{
-			a, b, bias, min_a, max_a, min_b, max_b,
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0), op.Output(1), op.Output(2)
-}
-
-// ParseExampleDatasetAttr is an optional argument to ParseExampleDataset.
-type ParseExampleDatasetAttr func(optionalAttr)
-
-// ParseExampleDatasetSloppy sets the optional sloppy attribute to value.
-// If not specified, defaults to false
-func ParseExampleDatasetSloppy(value bool) ParseExampleDatasetAttr {
-	return func(m optionalAttr) {
-		m["sloppy"] = value
-	}
-}
-
-// Transforms `input_dataset` containing `Example` protos as vectors of DT_STRING into a dataset of `Tensor` or `SparseTensor` objects representing the parsed features.
-//
-// Arguments:
-//
-//
-//	dense_defaults: A dict mapping string keys to `Tensor`s.
-// The keys of the dict must match the dense_keys of the feature.
-//	sparse_keys: A list of string keys in the examples features.
-// The results for these keys will be returned as `SparseTensor` objects.
-//	dense_keys: A list of Ndense string Tensors (scalars).
-// The keys expected in the Examples features associated with dense values.
-//	sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
-// Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
-// and `tf.string` (`BytesList`) are supported.
-//	dense_shapes: List of tuples with the same length as `dense_keys`.
-// The shape of the data for each dense feature referenced by `dense_keys`.
-// Required for any input tensors identified by `dense_keys`.  Must be
-// either fully defined, or may contain an unknown first dimension.
-// An unknown first dimension means the feature is treated as having
-// a variable number of blocks, and the output shape along this dimension
-// is considered unknown at graph build time.  Padding is applied for
-// minibatch elements smaller than the maximum number of blocks for the
-// given feature along this dimension.
-//	output_types: The type list for the return values.
-//	output_shapes: The list of shapes being produced.
-func ParseExampleDataset(scope *Scope, input_dataset tf.Output, num_parallel_calls tf.Output, dense_defaults []tf.Output, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ParseExampleDatasetAttr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes, "output_types": output_types, "output_shapes": output_shapes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ParseExampleDataset",
-		Input: []tf.Input{
-			input_dataset, num_parallel_calls, tf.OutputList(dense_defaults),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// Creates a TensorList by indexing into a Tensor.
-//
-// Each member of the TensorList corresponds to one row of the input tensor,
-// specified by the given index (see `tf.gather`).
-//
-// tensor: The input tensor.
-// indices: The indices used to index into the list.
-// element_shape: The shape of the elements in the list (can be less specified than
-//   the shape of the tensor).
-// output_handle: The TensorList.
-func TensorListScatter(scope *Scope, tensor tf.Output, indices tf.Output, element_shape tf.Output) (output_handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	opspec := tf.OpSpec{
-		Type: "TensorListScatter",
-		Input: []tf.Input{
-			tensor, indices, element_shape,
-		},
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
-
-// ExperimentalParseExampleDatasetAttr is an optional argument to ExperimentalParseExampleDataset.
-type ExperimentalParseExampleDatasetAttr func(optionalAttr)
-
-// ExperimentalParseExampleDatasetSloppy sets the optional sloppy attribute to value.
-// If not specified, defaults to false
-func ExperimentalParseExampleDatasetSloppy(value bool) ExperimentalParseExampleDatasetAttr {
-	return func(m optionalAttr) {
-		m["sloppy"] = value
-	}
-}
-
-// Transforms `input_dataset` containing `Example` protos as vectors of DT_STRING into a dataset of `Tensor` or `SparseTensor` objects representing the parsed features.
-//
-// Arguments:
-//
-//
-//	dense_defaults: A dict mapping string keys to `Tensor`s.
-// The keys of the dict must match the dense_keys of the feature.
-//	sparse_keys: A list of string keys in the examples features.
-// The results for these keys will be returned as `SparseTensor` objects.
-//	dense_keys: A list of Ndense string Tensors (scalars).
-// The keys expected in the Examples features associated with dense values.
-//	sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
-// Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
-// and `tf.string` (`BytesList`) are supported.
-//	dense_shapes: List of tuples with the same length as `dense_keys`.
-// The shape of the data for each dense feature referenced by `dense_keys`.
-// Required for any input tensors identified by `dense_keys`.  Must be
-// either fully defined, or may contain an unknown first dimension.
-// An unknown first dimension means the feature is treated as having
-// a variable number of blocks, and the output shape along this dimension
-// is considered unknown at graph build time.  Padding is applied for
-// minibatch elements smaller than the maximum number of blocks for the
-// given feature along this dimension.
-//	output_types: The type list for the return values.
-//	output_shapes: The list of shapes being produced.
-func ExperimentalParseExampleDataset(scope *Scope, input_dataset tf.Output, num_parallel_calls tf.Output, dense_defaults []tf.Output, sparse_keys []string, dense_keys []string, sparse_types []tf.DataType, dense_shapes []tf.Shape, output_types []tf.DataType, output_shapes []tf.Shape, optional ...ExperimentalParseExampleDatasetAttr) (handle tf.Output) {
-	if scope.Err() != nil {
-		return
-	}
-	attrs := map[string]interface{}{"sparse_keys": sparse_keys, "dense_keys": dense_keys, "sparse_types": sparse_types, "dense_shapes": dense_shapes, "output_types": output_types, "output_shapes": output_shapes}
-	for _, a := range optional {
-		a(attrs)
-	}
-	opspec := tf.OpSpec{
-		Type: "ExperimentalParseExampleDataset",
-		Input: []tf.Input{
-			input_dataset, num_parallel_calls, tf.OutputList(dense_defaults),
-		},
-		Attrs: attrs,
-	}
-	op := scope.AddOperation(opspec)
-	return op.Output(0)
-}
diff --git a/tensorflow/go/saved_model.go b/tensorflow/go/saved_model.go
index 5ccb22388d9..90fe3655585 100644
--- a/tensorflow/go/saved_model.go
+++ b/tensorflow/go/saved_model.go
@@ -22,7 +22,7 @@ import (
 
 	"github.com/golang/protobuf/proto"
 
-	tfpb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/github.com/tensorflow/tensorflow/tensorflow/go/core/framework"
+	tfpb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/github.com/tensorflow/tensorflow/tensorflow/go/core"
 )
 
 // #include <stdlib.h>
diff --git a/tensorflow/go/signature.go b/tensorflow/go/signature.go
index 2a4842be6aa..67d62577125 100644
--- a/tensorflow/go/signature.go
+++ b/tensorflow/go/signature.go
@@ -16,7 +16,7 @@ limitations under the License.
 
 package tensorflow
 
-import tfpb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/github.com/tensorflow/tensorflow/tensorflow/go/core/framework"
+import tfpb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/github.com/tensorflow/tensorflow/tensorflow/go/core"
 
 // #include "tensorflow/c/c_api.h"
 import "C"
diff --git a/tensorflow/go/signature_test.go b/tensorflow/go/signature_test.go
index 37d1650d9ed..7988347ed17 100644
--- a/tensorflow/go/signature_test.go
+++ b/tensorflow/go/signature_test.go
@@ -20,7 +20,9 @@ import (
 	"fmt"
 	"testing"
 
-	tfpb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/github.com/tensorflow/tensorflow/tensorflow/go/core/framework"
+	tspb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/github.com/tensorflow/tensorflow/tensorflow/go/core/framework/tensor_shape_go_proto"
+	typb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/github.com/tensorflow/tensorflow/tensorflow/go/core/framework/types_go_proto"
+	tfpb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/github.com/tensorflow/tensorflow/tensorflow/go/core"
 )
 
 func TestSignatureFromProto(t *testing.T) {
@@ -30,9 +32,9 @@ func TestSignatureFromProto(t *testing.T) {
 				Encoding: &tfpb.TensorInfo_Name{
 					Name: "tensor_1",
 				},
-				Dtype: tfpb.DataType_DT_INT8,
-				TensorShape: &tfpb.TensorShapeProto{
-					Dim: []*tfpb.TensorShapeProto_Dim{
+				Dtype: typb.DataType_DT_INT8,
+				TensorShape: &tspb.TensorShapeProto{
+					Dim: []*tspb.TensorShapeProto_Dim{
 						{Size: 1},
 						{Size: 2},
 						{Size: 3},
@@ -43,9 +45,9 @@ func TestSignatureFromProto(t *testing.T) {
 				Encoding: &tfpb.TensorInfo_Name{
 					Name: "tensor_2",
 				},
-				Dtype: tfpb.DataType_DT_FLOAT,
-				TensorShape: &tfpb.TensorShapeProto{
-					Dim: []*tfpb.TensorShapeProto_Dim{
+				Dtype: typb.DataType_DT_FLOAT,
+				TensorShape: &tspb.TensorShapeProto{
+					Dim: []*tspb.TensorShapeProto_Dim{
 						{Size: 4},
 						{Size: 5},
 						{Size: 6},
@@ -58,9 +60,9 @@ func TestSignatureFromProto(t *testing.T) {
 				Encoding: &tfpb.TensorInfo_Name{
 					Name: "tensor_3",
 				},
-				Dtype: tfpb.DataType_DT_STRING,
-				TensorShape: &tfpb.TensorShapeProto{
-					Dim: []*tfpb.TensorShapeProto_Dim{
+				Dtype: typb.DataType_DT_STRING,
+				TensorShape: &tspb.TensorShapeProto{
+					Dim: []*tspb.TensorShapeProto_Dim{
 						{Size: 1},
 						{Size: 2},
 						{Size: 3},
@@ -71,9 +73,9 @@ func TestSignatureFromProto(t *testing.T) {
 				Encoding: &tfpb.TensorInfo_Name{
 					Name: "tensor_4",
 				},
-				Dtype: tfpb.DataType_DT_BOOL,
-				TensorShape: &tfpb.TensorShapeProto{
-					Dim: []*tfpb.TensorShapeProto_Dim{
+				Dtype: typb.DataType_DT_BOOL,
+				TensorShape: &tspb.TensorShapeProto{
+					Dim: []*tspb.TensorShapeProto_Dim{
 						{Size: 4},
 						{Size: 5},
 						{Size: 6},
@@ -142,9 +144,9 @@ func TestTensorInfoFromProto(t *testing.T) {
 		Encoding: &tfpb.TensorInfo_Name{
 			Name: "tensor",
 		},
-		Dtype: tfpb.DataType_DT_INT8,
-		TensorShape: &tfpb.TensorShapeProto{
-			Dim: []*tfpb.TensorShapeProto_Dim{
+		Dtype: typb.DataType_DT_INT8,
+		TensorShape: &tspb.TensorShapeProto{
+			Dim: []*tspb.TensorShapeProto_Dim{
 				{Size: 1},
 				{Size: 2},
 				{Size: 3},
diff --git a/tensorflow/lite/BUILD b/tensorflow/lite/BUILD
index 530b27aa7d3..84150546353 100644
--- a/tensorflow/lite/BUILD
+++ b/tensorflow/lite/BUILD
@@ -318,6 +318,7 @@ cc_test(
         "testdata/2_subgraphs.bin",
         "testdata/empty_model.bin",
         "testdata/multi_add_flex.bin",
+        "testdata/sparse_tensor.bin",
         "testdata/test_min_runtime.bin",
         "testdata/test_model.bin",
         "testdata/test_model_broken.bin",
diff --git a/tensorflow/lite/build_def.bzl b/tensorflow/lite/build_def.bzl
index f37ab23a67a..35b75cdbd61 100644
--- a/tensorflow/lite/build_def.bzl
+++ b/tensorflow/lite/build_def.bzl
@@ -272,7 +272,8 @@ def generated_test_models():
         "exp",
         "embedding_lookup",
         "expand_dims",
-        "eye",
+        # TODO(b/145885576): Re-enable.
+        # "eye",
         "fill",
         "floor",
         "floor_div",
@@ -301,8 +302,9 @@ def generated_test_models():
         "logical_or",
         "logical_xor",
         "lstm",
-        "matrix_diag",
-        "matrix_set_diag",
+        # TODO(b/145885576): Re-enable.
+        # "matrix_diag",
+        # "matrix_set_diag",
         "max_pool",
         "maximum",
         "mean",
diff --git a/tensorflow/lite/c/BUILD b/tensorflow/lite/c/BUILD
index 629320370cb..b3e231e8cb3 100644
--- a/tensorflow/lite/c/BUILD
+++ b/tensorflow/lite/c/BUILD
@@ -137,7 +137,10 @@ cc_library(
 )
 
 # For use with library targets that can't use relative paths.
-exports_files(["common.h"])
+exports_files([
+    "c_api.h",
+    "common.h",
+])
 
 # Test the C extension API code.
 cc_test(
diff --git a/tensorflow/lite/c/builtin_op_data.h b/tensorflow/lite/c/builtin_op_data.h
index 5ac525e0a3c..08440c4cf27 100644
--- a/tensorflow/lite/c/builtin_op_data.h
+++ b/tensorflow/lite/c/builtin_op_data.h
@@ -75,12 +75,16 @@ typedef enum {
 } TfLiteFusedActivation;
 
 typedef struct {
+  // Parameters for CONV_2D version 1.
   TfLitePadding padding;
   int stride_width;
   int stride_height;
+  TfLiteFusedActivation activation;
+
+  // Parameters for CONV_2D version 2.
+  // Note: Version 2 supports dilation values not equal to 1.
   int dilation_width_factor;
   int dilation_height_factor;
-  TfLiteFusedActivation activation;
 } TfLiteConvParams;
 
 typedef struct {
diff --git a/tensorflow/lite/c/common.c b/tensorflow/lite/c/common.c
index 524bf8091fe..0b17c049e93 100644
--- a/tensorflow/lite/c/common.c
+++ b/tensorflow/lite/c/common.c
@@ -103,12 +103,46 @@ void TfLiteQuantizationFree(TfLiteQuantization* quantization) {
   quantization->type = kTfLiteNoQuantization;
 }
 
+void TfLiteSparsityFree(TfLiteSparsity* sparsity) {
+  if (sparsity == NULL) {
+    return;
+  }
+
+  if (sparsity->traversal_order) {
+    TfLiteIntArrayFree(sparsity->traversal_order);
+    sparsity->traversal_order = NULL;
+  }
+
+  if (sparsity->block_map) {
+    TfLiteIntArrayFree(sparsity->block_map);
+    sparsity->block_map = NULL;
+  }
+
+  if (sparsity->dim_metadata) {
+    for (int i = 0; i < sparsity->dim_metadata_size; i++) {
+      TfLiteDimensionMetadata metadata = sparsity->dim_metadata[i];
+      if (metadata.format == kTfLiteDimSparseCSR) {
+        TfLiteIntArrayFree(metadata.array_segments);
+        metadata.array_segments = NULL;
+        TfLiteIntArrayFree(metadata.array_indices);
+        metadata.array_indices = NULL;
+      }
+    }
+    free(sparsity->dim_metadata);
+    sparsity->dim_metadata = NULL;
+  }
+
+  free(sparsity);
+}
+
 void TfLiteTensorFree(TfLiteTensor* t) {
   TfLiteTensorDataFree(t);
   if (t->dims) TfLiteIntArrayFree(t->dims);
   t->dims = NULL;
 
   TfLiteQuantizationFree(&t->quantization);
+  TfLiteSparsityFree(t->sparsity);
+  t->sparsity = NULL;
 }
 
 void TfLiteTensorReset(TfLiteType type, const char* name, TfLiteIntArray* dims,
diff --git a/tensorflow/lite/c/common.h b/tensorflow/lite/c/common.h
index b3b0ddc059d..332b9b68881 100644
--- a/tensorflow/lite/c/common.h
+++ b/tensorflow/lite/c/common.h
@@ -303,6 +303,29 @@ enum {
   kTfLiteNullBufferHandle = -1,
 };
 
+// Storage format of each dimension in a sparse tensor.
+typedef enum {
+  kTfLiteDimDense = 0,
+  kTfLiteDimSparseCSR,
+} TfLiteDimensionType;
+
+// Metadata to encode each dimension in a sparse tensor.
+typedef struct {
+  TfLiteDimensionType format;
+  int dense_size;
+  TfLiteIntArray* array_segments;
+  TfLiteIntArray* array_indices;
+} TfLiteDimensionMetadata;
+
+// Parameters used to encode a sparse tensor. For detailed explanation of each
+// field please refer to lite/schema/schema.fbs.
+typedef struct {
+  TfLiteIntArray* traversal_order;
+  TfLiteIntArray* block_map;
+  TfLiteDimensionMetadata* dim_metadata;
+  int dim_metadata_size;
+} TfLiteSparsity;
+
 // An tensor in the interpreter system which is a wrapper around a buffer of
 // data including a dimensionality (or NULL if not currently defined).
 typedef struct {
@@ -357,6 +380,11 @@ typedef struct {
 
   // Quantization information. Replaces params field above.
   TfLiteQuantization quantization;
+
+  // Parameters used to encode a sparse tensor.
+  // This is optional. The field is NULL if a tensor is dense.
+  // WARNING: This is an experimental interface that is subject to change.
+  TfLiteSparsity* sparsity;
 } TfLiteTensor;
 
 // Free data memory of tensor `t`.
@@ -365,6 +393,9 @@ void TfLiteTensorDataFree(TfLiteTensor* t);
 // Free quantization data.
 void TfLiteQuantizationFree(TfLiteQuantization* quantization);
 
+// Free sparsity parameters.
+void TfLiteSparsityFree(TfLiteSparsity* sparsity);
+
 // Free memory of tensor `t`.
 void TfLiteTensorFree(TfLiteTensor* t);
 
diff --git a/tensorflow/lite/c/common_test.cc b/tensorflow/lite/c/common_test.cc
index 88ac181faf6..7230adff0e9 100644
--- a/tensorflow/lite/c/common_test.cc
+++ b/tensorflow/lite/c/common_test.cc
@@ -96,6 +96,7 @@ TEST(Quantization, TestQuantizationFree) {
   t.allocation_type = kTfLiteArenaRw;
   t.dims = nullptr;
   t.quantization.type = kTfLiteAffineQuantization;
+  t.sparsity = nullptr;
   auto* params = reinterpret_cast<TfLiteAffineQuantization*>(
       malloc(sizeof(TfLiteAffineQuantization)));
   params->scale = TfLiteFloatArrayCreate(3);
@@ -104,6 +105,31 @@ TEST(Quantization, TestQuantizationFree) {
   TfLiteTensorFree(&t);
 }
 
+TEST(Sparsity, TestSparsityFree) {
+  TfLiteTensor t = {};
+  // Set these values, otherwise TfLiteTensorFree has uninitialized values.
+  t.allocation_type = kTfLiteArenaRw;
+  t.dims = nullptr;
+
+  // A dummy CSR sparse matrix.
+  t.sparsity = static_cast<TfLiteSparsity*>(malloc(sizeof(TfLiteSparsity)));
+  t.sparsity->traversal_order = TfLiteIntArrayCreate(2);
+  t.sparsity->block_map = nullptr;
+
+  t.sparsity->dim_metadata = static_cast<TfLiteDimensionMetadata*>(
+      malloc(sizeof(TfLiteDimensionMetadata) * 2));
+  t.sparsity->dim_metadata_size = 2;
+
+  t.sparsity->dim_metadata[0].format = kTfLiteDimDense;
+  t.sparsity->dim_metadata[0].dense_size = 4;
+
+  t.sparsity->dim_metadata[1].format = kTfLiteDimSparseCSR;
+  t.sparsity->dim_metadata[1].array_segments = TfLiteIntArrayCreate(2);
+  t.sparsity->dim_metadata[1].array_indices = TfLiteIntArrayCreate(3);
+
+  TfLiteTensorFree(&t);
+}
+
 }  // namespace tflite
 
 int main(int argc, char** argv) {
diff --git a/tensorflow/lite/core/subgraph.cc b/tensorflow/lite/core/subgraph.cc
index e453ff2ff7e..69c39769593 100644
--- a/tensorflow/lite/core/subgraph.cc
+++ b/tensorflow/lite/core/subgraph.cc
@@ -40,6 +40,15 @@ struct TfLiteQuantizationDeleter {
 using ScopedTfLiteQuantization =
     std::unique_ptr<TfLiteQuantization, TfLiteQuantizationDeleter>;
 
+struct TfLiteSparsityDeleter {
+  void operator()(TfLiteSparsity* s) {
+    if (s) TfLiteSparsityFree(s);
+  }
+};
+
+using ScopedTfLiteSparsity =
+    std::unique_ptr<TfLiteSparsity, TfLiteSparsityDeleter>;
+
 TfLiteStatus ReportOpError(TfLiteContext* context, const TfLiteNode& node,
                            const TfLiteRegistration& registration,
                            int node_index, const char* message) {
@@ -908,9 +917,10 @@ TfLiteStatus Subgraph::GetNodeAndRegistration(
 TfLiteStatus Subgraph::SetTensorParametersReadOnly(
     int tensor_index, TfLiteType type, const char* name, const size_t rank,
     const int* dims, TfLiteQuantization quantization, const char* buffer,
-    size_t bytes, const Allocation* allocation) {
+    size_t bytes, const Allocation* allocation, TfLiteSparsity* sparsity) {
   // Ensure quantization cleanup on failure.
   ScopedTfLiteQuantization scoped_quantization(&quantization);
+  ScopedTfLiteSparsity scoped_sparsity(sparsity);
   if (state_ == kStateInvokableAndImmutable) {
     ReportError(
         "SetTensorParametersReadOnly is disallowed when graph is immutable.");
@@ -919,10 +929,12 @@ TfLiteStatus Subgraph::SetTensorParametersReadOnly(
 
   TF_LITE_ENSURE(&context_,
                  tensor_index < context_.tensors_size && tensor_index >= 0);
+
   // For most tensors we know exactly how much memory is necessary so we can
   // ensure the buffer is large enough. However, we need to skip string tensors
-  // because their sizes change with the contents of the individual strings.
-  if (type != kTfLiteString) {
+  // and sparse tensors because their sizes change with the contents.
+  // TODO(b/145615516): Extend BytesRequired to check sparse tensors.
+  if (type != kTfLiteString && sparsity == nullptr) {
     size_t required_bytes;
     TF_LITE_ENSURE_OK(&context_,
                       BytesRequired(type, dims, rank, &required_bytes));
@@ -939,6 +951,7 @@ TfLiteStatus Subgraph::SetTensorParametersReadOnly(
     if (!tensor.dims) tensor.dims = ConvertArrayToTfLiteIntArray(rank, dims);
     tensor.params = GetLegacyQuantization(quantization);
     tensor.quantization = *scoped_quantization.release();
+    tensor.sparsity = scoped_sparsity.release();
     tensor.allocation_type = kTfLiteMmapRo;
     tensor.allocation = allocation;
   } else {
@@ -950,6 +963,7 @@ TfLiteStatus Subgraph::SetTensorParametersReadOnly(
     // TODO(suharshs): Update TfLiteTensorReset to include the new quantization
     // if there are other required callers.
     tensor.quantization = *scoped_quantization.release();
+    tensor.sparsity = scoped_sparsity.release();
   }
   return kTfLiteOk;
 }
diff --git a/tensorflow/lite/core/subgraph.h b/tensorflow/lite/core/subgraph.h
index 89a9da7db28..c2572546709 100644
--- a/tensorflow/lite/core/subgraph.h
+++ b/tensorflow/lite/core/subgraph.h
@@ -94,16 +94,17 @@ class Subgraph {
   inline TfLiteStatus SetTensorParametersReadOnly(
       int tensor_index, TfLiteType type, const char* name,
       const std::vector<int>& dims, TfLiteQuantization quantization,
-      const char* buffer, size_t bytes,
-      const Allocation* allocation = nullptr) {
+      const char* buffer, size_t bytes, const Allocation* allocation = nullptr,
+      TfLiteSparsity* sparsity = nullptr) {
     return SetTensorParametersReadOnly(tensor_index, type, name, dims.size(),
                                        dims.data(), quantization, buffer, bytes,
-                                       allocation);
+                                       allocation, sparsity);
   }
   TfLiteStatus SetTensorParametersReadOnly(
       int tensor_index, TfLiteType type, const char* name, const size_t rank,
       const int* dims, TfLiteQuantization quantization, const char* buffer,
-      size_t bytes, const Allocation* allocation = nullptr);
+      size_t bytes, const Allocation* allocation = nullptr,
+      TfLiteSparsity* sparsity = nullptr);
 
   // Set description of inputs/outputs/data/fptrs for node `node_index`.
   // This variant assumes an external buffer has been allocated of size
diff --git a/tensorflow/lite/delegates/gpu/BUILD b/tensorflow/lite/delegates/gpu/BUILD
index 4cfbeff2081..8da62f0d09f 100644
--- a/tensorflow/lite/delegates/gpu/BUILD
+++ b/tensorflow/lite/delegates/gpu/BUILD
@@ -6,6 +6,11 @@ package(
     licenses = ["notice"],  # Apache 2.0
 )
 
+exports_files([
+    "gpu_delegate.h",
+    "metal_delegate.h",
+])
+
 # Primary purpose of this config is to replace ::util::Status with our custom
 # light implementation ::tflite::gpu::StatusLite to reduce binary size.  Besides
 # that, certain features that were hard to communicate without full open source
diff --git a/tensorflow/lite/delegates/gpu/gl/egl_context.cc b/tensorflow/lite/delegates/gpu/gl/egl_context.cc
index 146a1921e11..46fbed24291 100644
--- a/tensorflow/lite/delegates/gpu/gl/egl_context.cc
+++ b/tensorflow/lite/delegates/gpu/gl/egl_context.cc
@@ -131,17 +131,10 @@ Status CreateSurfacelessContext(EGLDisplay display, EGLContext shared_context,
 
 Status CreatePBufferContext(EGLDisplay display, EGLContext shared_context,
                             EglContext* egl_context) {
-  const EGLint attributes[] = {EGL_SURFACE_TYPE,
-                               EGL_PBUFFER_BIT,
-                               EGL_BLUE_SIZE,
-                               8,
-                               EGL_GREEN_SIZE,
-                               8,
-                               EGL_RED_SIZE,
-                               8,
-                               EGL_RENDERABLE_TYPE,
-                               EGL_OPENGL_ES3_BIT_KHR,
-                               EGL_NONE};
+  const EGLint attributes[] = {
+      EGL_SURFACE_TYPE, EGL_PBUFFER_BIT,     EGL_BIND_TO_TEXTURE_RGB,
+      EGL_TRUE,         EGL_RENDERABLE_TYPE, EGL_OPENGL_ES3_BIT_KHR,
+      EGL_NONE};
   EGLConfig config;
   RETURN_IF_ERROR(GetConfig(display, attributes, &config));
   return CreateContext(display, shared_context, config, egl_context);
diff --git a/tensorflow/lite/delegates/nnapi/BUILD b/tensorflow/lite/delegates/nnapi/BUILD
index 54251676da3..6e48b214d66 100644
--- a/tensorflow/lite/delegates/nnapi/BUILD
+++ b/tensorflow/lite/delegates/nnapi/BUILD
@@ -103,7 +103,6 @@ cc_library(
     }),
     deps = [
         ":nnapi_delegate",
-        "//tensorflow/lite/nnapi:nnapi_handler",
         "//tensorflow/lite/nnapi:nnapi_implementation",
         "@com_google_absl//absl/memory",
         "@com_google_googletest//:gtest",
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
index cc73f3020e5..1bb27baf7d4 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
@@ -57,14 +57,47 @@ limitations under the License.
 namespace tflite {
 namespace {
 
-// TODO(b/80621585): Consider printing error string, but don't for now to
-// minimize binary size.
-#define RETURN_TFLITE_ERROR_IF_NN_ERROR(context, code, p_errno)               \
+// Returns the enum name corresponding to the given error code if the given
+// value corresponds to an of the error codes in the enumeration above or
+// an message with the unknown code.
+// LINT.IfChange(NnApiErrorDescription)
+std::string NnApiErrorDescription(int error_code) {
+  switch (error_code) {
+    case ANEURALNETWORKS_NO_ERROR:
+      return "ANEURALNETWORKS_NO_ERROR";
+    case ANEURALNETWORKS_OUT_OF_MEMORY:
+      return "ANEURALNETWORKS_OUT_OF_MEMORY";
+    case ANEURALNETWORKS_INCOMPLETE:
+      return "ANEURALNETWORKS_INCOMPLETE";
+    case ANEURALNETWORKS_UNEXPECTED_NULL:
+      return "ANEURALNETWORKS_UNEXPECTED_NULL";
+    case ANEURALNETWORKS_BAD_DATA:
+      return "ANEURALNETWORKS_BAD_DATA";
+    case ANEURALNETWORKS_OP_FAILED:
+      return "ANEURALNETWORKS_OP_FAILED";
+    case ANEURALNETWORKS_BAD_STATE:
+      return "ANEURALNETWORKS_BAD_STATE";
+    case ANEURALNETWORKS_UNMAPPABLE:
+      return "ANEURALNETWORKS_UNMAPPABLE";
+    case ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE:
+      return "ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE";
+    case ANEURALNETWORKS_UNAVAILABLE_DEVICE:
+      return "ANEURALNETWORKS_UNAVAILABLE_DEVICE";
+    default:
+      return "Unknown NNAPI error code: " + std::to_string(error_code);
+  }
+}
+// LINT.ThenChange()
+
+#define RETURN_TFLITE_ERROR_IF_NN_ERROR(context, code, call_desc, p_errno)    \
   do {                                                                        \
     const auto _code = (code);                                                \
+    const auto _call_desc = (call_desc);                                      \
     if (_code != ANEURALNETWORKS_NO_ERROR) {                                  \
-      context->ReportError(context, "NN API returned error (%d, line %d).\n", \
-                           _code, __LINE__);                                  \
+      const auto error_desc = NnApiErrorDescription(_code);                   \
+      context->ReportError(context,                                           \
+                           "NN API returned error %s at line %d while %s.\n", \
+                           error_desc.c_str(), __LINE__, _call_desc);         \
       *p_errno = _code;                                                       \
       return kTfLiteError;                                                    \
     }                                                                         \
@@ -611,7 +644,7 @@ class NNAPIOpBuilder {
       RETURN_TFLITE_ERROR_IF_NN_ERROR(
           context_,
           nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
-          nnapi_errno_);
+          "adding operand", nnapi_errno_);
       dequantized_ann_index = operand_mapping_->add_new_non_tensor_operand();
 
       // Add Dequantize operation.
@@ -623,7 +656,7 @@ class NNAPIOpBuilder {
           nnapi_->ANeuralNetworksModel_addOperation(
               nn_model_, ANEURALNETWORKS_DEQUANTIZE, 1, dequantize_input, 1,
               dequantize_output),
-          nnapi_errno_);
+          "adding operation", nnapi_errno_);
       dequantize_mapping_->Add(ann_index, dequantized_type,
                                dequantized_ann_index);
     }
@@ -645,7 +678,7 @@ class NNAPIOpBuilder {
             augmented_inputs_.data(),
             static_cast<uint32_t>(augmented_outputs_.size()),
             augmented_outputs_.data()),
-        nnapi_errno_);
+        "adding operation", nnapi_errno_);
     augmented_inputs_.clear();
     augmented_outputs_.clear();
     return kTfLiteOk;
@@ -660,7 +693,7 @@ class NNAPIOpBuilder {
     RETURN_TFLITE_ERROR_IF_NN_ERROR(
         context_,
         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
-        nnapi_errno_);
+        "adding operand", nnapi_errno_);
     int ann_tensor_index = operand_mapping_->lite_index_to_ann(tensor_index);
     if (ann_tensor_index != -1) {
       augmented_inputs_.push_back(ann_tensor_index);
@@ -718,7 +751,7 @@ class NNAPIOpBuilder {
     RETURN_TFLITE_ERROR_IF_NN_ERROR(
         context_,
         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
-        nnapi_errno_);
+        "adding operand", nnapi_errno_);
 
     augmented_inputs_.push_back(ann_tensor_index);
 
@@ -727,7 +760,7 @@ class NNAPIOpBuilder {
         nnapi_->ANeuralNetworksModel_setOperandValue(
             nn_model_, ann_tensor_index, new_tensor->data.raw,
             new_tensor->bytes),
-        nnapi_errno_);
+        "setting new operand value", nnapi_errno_);
 
     return kTfLiteOk;
   }
@@ -774,13 +807,13 @@ class NNAPIOpBuilder {
     RETURN_TFLITE_ERROR_IF_NN_ERROR(
         context_,
         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
-        nnapi_errno_);
+        "adding operand", nnapi_errno_);
     const int ann_index = operand_mapping_->add_new_non_tensor_operand();
     RETURN_TFLITE_ERROR_IF_NN_ERROR(
         context_,
         nnapi_->ANeuralNetworksModel_setOperandValue(nn_model_, ann_index,
                                                      &value, sizeof(T)),
-        nnapi_errno_);
+        "setting new operand value", nnapi_errno_);
     augmented_inputs_.push_back(ann_index);
     return kTfLiteOk;
   }
@@ -798,14 +831,14 @@ class NNAPIOpBuilder {
     RETURN_TFLITE_ERROR_IF_NN_ERROR(
         context_,
         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
-        nnapi_errno_);
+        "adding operand", nnapi_errno_);
 
     const int ann_index = operand_mapping_->add_new_non_tensor_operand();
     RETURN_TFLITE_ERROR_IF_NN_ERROR(
         context_,
         nnapi_->ANeuralNetworksModel_setOperandValue(
             nn_model_, ann_index, values, sizeof(T) * num_values),
-        nnapi_errno_);
+        "settings new operand value", nnapi_errno_);
     augmented_inputs_.push_back(ann_index);
     return kTfLiteOk;
   }
@@ -840,7 +873,7 @@ class NNAPIOpBuilder {
     RETURN_TFLITE_ERROR_IF_NN_ERROR(
         context_,
         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
-        nnapi_errno_);
+        "adding operand", nnapi_errno_);
     const int ann_index = operand_mapping_->add_new_non_tensor_operand();
     augmented_outputs_.push_back(ann_index);
     if (ann_index_out) *ann_index_out = ann_index;
@@ -960,14 +993,14 @@ class NNAPIOpBuilder {
     RETURN_TFLITE_ERROR_IF_NN_ERROR(
         context_,
         nnapi_->ANeuralNetworksModel_addOperand(nn_model_, &operand_type),
-        nnapi_errno_);
+        "adding operand", nnapi_errno_);
 
     if (nn_type == ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL) {
       RETURN_TFLITE_ERROR_IF_NN_ERROR(
           context_,
           nnapi_->ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
               nn_model_, ann_tensor_index, &ann_perchannel_params),
-          nnapi_errno_);
+          "setting new operand per channel quantization params", nnapi_errno_);
     }
     if (tensor->allocation_type == kTfLiteMmapRo) {
       if (IsQuantized(tensor_type) && need_int8_conversion) {
@@ -1000,7 +1033,7 @@ class NNAPIOpBuilder {
             nnapi_->ANeuralNetworksModel_setOperandValue(
                 nn_model_, ann_tensor_index, new_tensor->data.raw,
                 new_tensor->bytes),
-            nnapi_errno_);
+            "setting new operand value", nnapi_errno_);
 #ifdef TFLITE_NNAPI_ALLOW_MMAP_SHARING
       } else if (tensor->allocation &&
                  static_cast<const Allocation*>(tensor->allocation)->type() ==
@@ -1025,14 +1058,14 @@ class NNAPIOpBuilder {
             nnapi_->ANeuralNetworksModel_setOperandValueFromMemory(
                 nn_model_, ann_tensor_index, ann_memory_handle, offset,
                 tensor->bytes),
-            nnapi_errno_);
+            "setting new operand value from memory", nnapi_errno_);
 #endif
       } else {
         RETURN_TFLITE_ERROR_IF_NN_ERROR(
             context_,
             nnapi_->ANeuralNetworksModel_setOperandValue(
                 nn_model_, ann_tensor_index, tensor->data.raw, tensor->bytes),
-            nnapi_errno_);
+            "setting new operand value", nnapi_errno_);
       }
     }
 
@@ -2910,8 +2943,9 @@ TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context,
 
   if (!nn_model_) {
     ANeuralNetworksModel* model = nullptr;
-    RETURN_TFLITE_ERROR_IF_NN_ERROR(
-        context, nnapi_->ANeuralNetworksModel_create(&model), nnapi_errno);
+    RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
+                                    nnapi_->ANeuralNetworksModel_create(&model),
+                                    "creating NNAPI model", nnapi_errno);
     nn_model_.reset(model);
 
     TF_LITE_ENSURE_STATUS(BuildGraph(context, params->input_tensors,
@@ -2927,11 +2961,12 @@ TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context,
           nnapi_->ANeuralNetworksCompilation_createForDevices(
               nn_model_.get(), nnapi_devices_.data(), nnapi_devices_.size(),
               &compilation),
-          nnapi_errno);
+          "creating NNAPI model for given devices", nnapi_errno);
     } else {
       RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
                                       nnapi_->ANeuralNetworksCompilation_create(
                                           nn_model_.get(), &compilation),
+                                      "creating NNAPI compilation",
                                       nnapi_errno);
     }
 
@@ -2945,7 +2980,9 @@ TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context,
         nnapi_->ANeuralNetworksCompilation_free(compilation);
         compilation = nullptr;
       }
-      RETURN_TFLITE_ERROR_IF_NN_ERROR(context, preference_result, nnapi_errno);
+      RETURN_TFLITE_ERROR_IF_NN_ERROR(context, preference_result,
+                                      "setting compilation preferences",
+                                      nnapi_errno);
     }
 
     const char* cache_dir = delegate_options.cache_dir;
@@ -2978,7 +3015,8 @@ TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context,
         nnapi_->ANeuralNetworksCompilation_free(compilation);
         compilation = nullptr;
       }
-      RETURN_TFLITE_ERROR_IF_NN_ERROR(context, set_caching_result, nnapi_errno);
+      RETURN_TFLITE_ERROR_IF_NN_ERROR(context, set_caching_result,
+                                      "configuring NNAPI caching", nnapi_errno);
     }
     const int finish_result =
         nnapi_->ANeuralNetworksCompilation_finish(compilation);
@@ -2986,7 +3024,8 @@ TfLiteStatus NNAPIDelegateKernel::Init(TfLiteContext* context,
       nnapi_->ANeuralNetworksCompilation_free(compilation);
       compilation = nullptr;
     }
-    RETURN_TFLITE_ERROR_IF_NN_ERROR(context, finish_result, nnapi_errno);
+    RETURN_TFLITE_ERROR_IF_NN_ERROR(
+        context, finish_result, "completing NNAPI compilation", nnapi_errno);
     nn_compilation_.reset(compilation);
   }
   return kTfLiteOk;
@@ -3007,7 +3046,7 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
   RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
                                   nnapi_->ANeuralNetworksExecution_create(
                                       nn_compilation_.get(), &execution),
-                                  nnapi_errno);
+                                  "creating NNAPI execution", nnapi_errno);
   std::unique_ptr<ANeuralNetworksExecution, NNFreeExecution>
       execution_unique_ptr(execution);
 
@@ -3030,6 +3069,7 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
                 execution, relative_input_index, nullptr,
                 tensor_memory_map_->at(tensor->buffer_handle).memory, 0,
                 tensor->bytes),
+            "associating NNAPI execution input with a memory object",
             nnapi_errno);
         relative_input_index++;
         continue;
@@ -3077,6 +3117,7 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
             nnapi_->ANeuralNetworksExecution_setInputFromMemory(
                 execution, relative_input_index, nullptr,
                 nn_input_memory_->get_handle(), input_offset, tensor_size),
+            "associating NNAPI execution input with a memory object",
             nnapi_errno);
       } else {
         // copy data to pre-allocated shared memory.
@@ -3087,6 +3128,7 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
             nnapi_->ANeuralNetworksExecution_setInputFromMemory(
                 execution, relative_input_index, nullptr,
                 nn_input_memory_->get_handle(), input_offset, tensor->bytes),
+            "associating NNAPI execution input with a memory object",
             nnapi_errno);
         tensor_size = tensor->bytes;
       }
@@ -3114,7 +3156,7 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
               execution, relative_output_index, nullptr,
               tensor_memory_map_->at(tensor->buffer_handle).memory, 0,
               tensor->bytes),
-          nnapi_errno);
+          "associating NNAPI execution output to a memory object", nnapi_errno);
 
     } else {
       RETURN_TFLITE_ERROR_IF_NN_ERROR(
@@ -3122,7 +3164,7 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
           nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
               execution, relative_output_index, nullptr,
               nn_output_memory_->get_handle(), output_offset, tensor->bytes),
-          nnapi_errno);
+          "associating NNAPI execution output to a memory object", nnapi_errno);
       output_offset += tensor->bytes;
       output_offset += getNumPaddingBytes(tensor->bytes);
     }
@@ -3142,7 +3184,7 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
         nnapi_->ANeuralNetworksExecution_setOutput(
             execution, relative_output_index, nullptr, tensor->data.raw,
             tensor->bytes),
-        nnapi_errno);
+        "associating NNAPI execution output to a buffer", nnapi_errno);
     relative_output_index++;
   }
   // Invoke ANN in blocking fashion.
@@ -3151,15 +3193,17 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
     RETURN_TFLITE_ERROR_IF_NN_ERROR(
         context,
         nnapi_->ANeuralNetworksExecution_startCompute(execution, &event),
-        nnapi_errno);
+        "starting async computation", nnapi_errno);
     const int wait_result = nnapi_->ANeuralNetworksEvent_wait(event);
     nnapi_->ANeuralNetworksEvent_free(event);
-    RETURN_TFLITE_ERROR_IF_NN_ERROR(context, wait_result, nnapi_errno);
+    RETURN_TFLITE_ERROR_IF_NN_ERROR(context, wait_result,
+                                    "waiting for async computation completion",
+                                    nnapi_errno);
   } else {
     // Use synchronous execution for NNAPI 1.2+.
     RETURN_TFLITE_ERROR_IF_NN_ERROR(
         context, nnapi_->ANeuralNetworksExecution_compute(execution),
-        nnapi_errno);
+        "running computation", nnapi_errno);
   }
 
   // copy results from shared memory to the destination.
@@ -3567,21 +3611,19 @@ TfLiteStatus NNAPIDelegateKernel::BuildGraph(
       nnapi_->ANeuralNetworksModel_identifyInputsAndOutputs(
           nn_model_.get(), inputs.size(), inputs.data(), outputs.size(),
           outputs.data()),
-      nnapi_errno);
+      "identifying model inputs and outputs", nnapi_errno);
 
-  // Set relaxed computation mode for fp32 if possible.
   if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI11) {
     RETURN_TFLITE_ERROR_IF_NN_ERROR(
         context,
         nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16(
             nn_model_.get(), context->allow_fp32_relax_to_fp16),
-        nnapi_errno);
+        "set relaxed computation mode for fp32 if possible", nnapi_errno);
   }
 
-  // Finalize the model
   RETURN_TFLITE_ERROR_IF_NN_ERROR(
       context, nnapi_->ANeuralNetworksModel_finish(nn_model_.get()),
-      nnapi_errno);
+      "finalizing the model", nnapi_errno);
 
   // Create shared memory pool for inputs and outputs.
   nn_input_memory_.reset(
@@ -3740,7 +3782,7 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
       uint32_t device_count = 0;
       RETURN_TFLITE_ERROR_IF_NN_ERROR(
           context, nnapi->ANeuralNetworks_getDeviceCount(&device_count),
-          nnapi_errno);
+          "getting number of NNAPI devices", nnapi_errno);
       if (device_count <= 1) {
         return kTfLiteOk;
       }
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
index 6a9493f9f4d..db263a195f4 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
@@ -169,6 +169,7 @@ class NNMemory {
   ANeuralNetworksMemory* nn_memory_handle_ = nullptr;
 };
 
+
 enum class NNAPIValidationFailureType : int {
   // The operator is not supported by either NNAPI or the NNAPI Delegate.
   kUnsupportedOperator = 0,
@@ -226,6 +227,7 @@ enum class NNAPIValidationFailureType : int {
   kUnsupportedQuantizationParameters = 15,
 };
 
+
 struct NNAPIValidationFailure {
   NNAPIValidationFailureType type;
   std::string message;
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h
index 4a48409de1e..6e5e2098f42 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_mock_test.h
@@ -28,17 +28,134 @@ limitations under the License.
 #include <gtest/gtest.h>
 #include "absl/memory/memory.h"
 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
-#include "tensorflow/lite/nnapi/nnapi_handler.h"
 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
 
 namespace tflite {
 namespace delegate {
 namespace nnapi {
 
-class NnApiMock : public ::tflite::nnapi::NnApiHandler {
+class NnApiMock {
  public:
+  template <int Value>
+  void GetDeviceCountReturns() {
+    nnapi_->ANeuralNetworks_getDeviceCount = [](uint32_t* numDevices) -> int {
+      *numDevices = 2;
+      return Value;
+    };
+  }
+
+  template <int Value>
+  void ModelCreateReturns() {
+    nnapi_->ANeuralNetworksModel_create = [](ANeuralNetworksModel** model) {
+      *model = reinterpret_cast<ANeuralNetworksModel*>(1);
+      return Value;
+    };
+  }
+
+  template <int Value>
+  void AddOperandReturns() {
+    nnapi_->ANeuralNetworksModel_addOperand =
+        [](ANeuralNetworksModel* model,
+           const ANeuralNetworksOperandType* type) { return Value; };
+  }
+
+  template <int Value>
+  void SetOperandValueReturns() {
+    nnapi_->ANeuralNetworksModel_setOperandValue =
+        [](ANeuralNetworksModel* model, int32_t index, const void* buffer,
+           size_t length) { return Value; };
+  }
+
+  template <int Value>
+  void AddOperationReturns() {
+    nnapi_->ANeuralNetworksModel_addOperation =
+        [](ANeuralNetworksModel* model, ANeuralNetworksOperationType type,
+           uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount,
+           const uint32_t* outputs) { return Value; };
+  }
+
+  template <int Value>
+  void IdentifyInputAndOutputsReturns() {
+    nnapi_->ANeuralNetworksModel_identifyInputsAndOutputs =
+        [](ANeuralNetworksModel* model, uint32_t inputCount,
+           const uint32_t* inputs, uint32_t outputCount,
+           const uint32_t* outputs) { return Value; };
+  }
+
+  template <int Value>
+  void RelaxComputationFloatReturns() {
+    nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16 =
+        [](ANeuralNetworksModel* model, bool allow) { return Value; };
+  }
+
+  template <int Value>
+  void ModelFinishReturns() {
+    nnapi_->ANeuralNetworksModel_finish = [](ANeuralNetworksModel* model) {
+      return Value;
+    };
+  }
+
+  template <int Value>
+  void MemoryCreateFromFdReturns() {
+    nnapi_->ANeuralNetworksMemory_createFromFd =
+        [](size_t size, int protect, int fd, size_t offset,
+           ANeuralNetworksMemory** memory) {
+          *memory = reinterpret_cast<ANeuralNetworksMemory*>(2);
+          return Value;
+        };
+  }
+
+  template <int Value>
+  void CompilationCreateReturns() {
+    nnapi_->ANeuralNetworksCompilation_create =
+        [](ANeuralNetworksModel* model,
+           ANeuralNetworksCompilation** compilation) {
+          *compilation = reinterpret_cast<ANeuralNetworksCompilation*>(3);
+          return Value;
+        };
+  }
+
+  template <int Value>
+  void CompilationFinishReturns() {
+    nnapi_->ANeuralNetworksCompilation_finish =
+        [](ANeuralNetworksCompilation* compilation) { return Value; };
+  }
+
+  template <int Value>
+  void ExecutionCreateReturns() {
+    nnapi_->ANeuralNetworksExecution_create =
+        [](ANeuralNetworksCompilation* compilation,
+           ANeuralNetworksExecution** execution) {
+          if (compilation == nullptr) return 1;
+          *execution = reinterpret_cast<ANeuralNetworksExecution*>(4);
+          return Value;
+        };
+  }
+  template <int Value>
+  void ExecutionSetInputFromMemoryReturns() {
+    nnapi_->ANeuralNetworksExecution_setInputFromMemory =
+        [](ANeuralNetworksExecution* execution, int32_t index,
+           const ANeuralNetworksOperandType* type,
+           const ANeuralNetworksMemory* memory, size_t offset,
+           size_t length) { return Value; };
+  }
+  template <int Value>
+  void ExecutionSetOutputFromMemoryReturns() {
+    nnapi_->ANeuralNetworksExecution_setOutputFromMemory =
+        [](ANeuralNetworksExecution* execution, int32_t index,
+           const ANeuralNetworksOperandType* type,
+           const ANeuralNetworksMemory* memory, size_t offset,
+           size_t length) { return Value; };
+  }
+
+  template <int Value>
+  void ExecutionComputeReturns() {
+    nnapi_->ANeuralNetworksExecution_compute =
+        [](ANeuralNetworksExecution* execution) { return Value; };
+  }
+
   explicit NnApiMock(NnApi* nnapi, int android_sdk_version = 29)
-      : ::tflite::nnapi::NnApiHandler(nnapi) {
+      : nnapi_(nnapi), prev_nnapi_(*nnapi) {
     nnapi_->nnapi_exists = true;
     nnapi_->android_sdk_version = android_sdk_version;
 
@@ -69,7 +186,14 @@ class NnApiMock : public ::tflite::nnapi::NnApiHandler {
     ExecutionComputeReturns<0>();
   }
 
-  ~NnApiMock() { Reset(); }
+  ~NnApiMock() {
+    // Restores global NNAPI to original value for non mocked tests
+    *nnapi_ = prev_nnapi_;
+  }
+
+ private:
+  NnApi* nnapi_;
+  NnApi prev_nnapi_;
 };
 
 class NnApiDelegateMockTest : public ::testing::Test {
diff --git a/tensorflow/lite/experimental/kernels/hashtable.cc b/tensorflow/lite/experimental/kernels/hashtable.cc
index 9fea3566f04..dd0e75d4f54 100644
--- a/tensorflow/lite/experimental/kernels/hashtable.cc
+++ b/tensorflow/lite/experimental/kernels/hashtable.cc
@@ -86,7 +86,8 @@ TfLiteStatus EvalHashtable(TfLiteContext* context, TfLiteNode* node) {
 
   TfLiteTensor* resource_handle_tensor =
       GetOutput(context, node, kResourceHandleTensor);
-  auto* resource_handle_data = GetTensorData<int32>(resource_handle_tensor);
+  auto* resource_handle_data =
+      GetTensorData<std::int32_t>(resource_handle_tensor);
   resource_handle_data[0] = resource_id;
 
   Subgraph* subgraph = reinterpret_cast<Subgraph*>(context->impl_);
diff --git a/tensorflow/lite/experimental/kernels/hashtable_ops_test.cc b/tensorflow/lite/experimental/kernels/hashtable_ops_test.cc
index 8790a2c9960..4c8ca6c476b 100644
--- a/tensorflow/lite/experimental/kernels/hashtable_ops_test.cc
+++ b/tensorflow/lite/experimental/kernels/hashtable_ops_test.cc
@@ -808,7 +808,8 @@ class HashtableLookupOpModel : public BaseHashtableOpModel {
 
 TEST(HashtableOpsTest, TestHashtableLookupIntToInt) {
   const int kResourceId = 42;
-  HashtableLookupOpModel<int32, int32> m(TensorType_INT32, TensorType_INT32, 3);
+  HashtableLookupOpModel<std::int32_t, std::int32_t> m(TensorType_INT32,
+                                                       TensorType_INT32, 3);
 
   m.SetResourceId({kResourceId});
   m.SetLookup({5, 6, 7});
@@ -818,14 +819,14 @@ TEST(HashtableOpsTest, TestHashtableLookupIntToInt) {
                         kTfLiteInt32, {4, 5, 6}, {1, 2, 3});
   m.Invoke();
 
-  EXPECT_THAT(m.GetOutput<int32>(), ElementsAreArray({2, 3, 4}));
+  EXPECT_THAT(m.GetOutput<std::int32_t>(), ElementsAreArray({2, 3, 4}));
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({3}));
 }
 
 TEST(HashtableOpsTest, TestHashtableLookupIntToFloat) {
   const int kResourceId = 42;
-  HashtableLookupOpModel<int32, float> m(TensorType_INT32, TensorType_FLOAT32,
-                                         3);
+  HashtableLookupOpModel<std::int32_t, float> m(TensorType_INT32,
+                                                TensorType_FLOAT32, 3);
 
   m.SetResourceId({kResourceId});
   m.SetLookup({5, 6, 7});
@@ -869,8 +870,8 @@ class HashtableImportOpModel : public BaseHashtableOpModel {
 
 TEST(HashtableOpsTest, TestHashtableImport) {
   const int kResourceId = 42;
-  HashtableImportOpModel<int32, float> m(TensorType_INT32, TensorType_FLOAT32,
-                                         3);
+  HashtableImportOpModel<std::int32_t, float> m(TensorType_INT32,
+                                                TensorType_FLOAT32, 3);
   EXPECT_EQ(m.GetResources().size(), 0);
   m.SetResourceId({kResourceId});
   m.SetKeys({1, 2, 3});
@@ -890,8 +891,8 @@ TEST(HashtableOpsTest, TestHashtableImport) {
 
 TEST(HashtableOpsTest, TestHashtableImportTwice) {
   const int kResourceId = 42;
-  HashtableImportOpModel<int32, float> m(TensorType_INT32, TensorType_FLOAT32,
-                                         3);
+  HashtableImportOpModel<std::int32_t, float> m(TensorType_INT32,
+                                                TensorType_FLOAT32, 3);
   EXPECT_EQ(m.GetResources().size(), 0);
   m.SetResourceId({kResourceId});
   m.SetKeys({1, 2, 3});
@@ -929,7 +930,8 @@ class HashtableSizeOpModel : public BaseHashtableOpModel {
 
 TEST(HashtableOpsTest, TestHashtableSize) {
   const int kResourceId = 42;
-  HashtableSizeOpModel<int32, int32> m(TensorType_INT32, TensorType_INT32);
+  HashtableSizeOpModel<std::int32_t, std::int32_t> m(TensorType_INT32,
+                                                     TensorType_INT32);
 
   m.SetResourceId({kResourceId});
 
@@ -937,13 +939,14 @@ TEST(HashtableOpsTest, TestHashtableSize) {
                         kTfLiteInt32, {4, 5, 6}, {1, 2, 3});
   m.Invoke();
 
-  EXPECT_THAT(m.GetOutput<int32>(), ElementsAreArray({3}));
+  EXPECT_THAT(m.GetOutput<std::int32_t>(), ElementsAreArray({3}));
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1}));
 }
 
 TEST(HashtableOpsTest, TestHashtableSizeNonInitialized) {
   const int kResourceId = 42;
-  HashtableSizeOpModel<int32, int32> m(TensorType_INT32, TensorType_INT32);
+  HashtableSizeOpModel<std::int32_t, std::int32_t> m(TensorType_INT32,
+                                                     TensorType_INT32);
   m.SetResourceId({kResourceId});
 
   // Invoke without hash table initialization.
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/BUILD b/tensorflow/lite/experimental/micro/examples/magic_wand/BUILD
index bed81676304..20eacf37dfb 100644
--- a/tensorflow/lite/experimental/micro/examples/magic_wand/BUILD
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/BUILD
@@ -23,12 +23,12 @@ cc_library(
 cc_library(
     name = "sample_feature_data",
     srcs = [
-        "angle_micro_features_data.cc",
-        "circle_micro_features_data.cc",
+        "ring_micro_features_data.cc",
+        "slope_micro_features_data.cc",
     ],
     hdrs = [
-        "angle_micro_features_data.h",
-        "circle_micro_features_data.h",
+        "ring_micro_features_data.h",
+        "slope_micro_features_data.h",
     ],
 )
 
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/Makefile.inc b/tensorflow/lite/experimental/micro/examples/magic_wand/Makefile.inc
index e1fcbe59be4..f739aefa074 100644
--- a/tensorflow/lite/experimental/micro/examples/magic_wand/Makefile.inc
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/Makefile.inc
@@ -40,13 +40,13 @@ tensorflow/lite/experimental/micro/examples/magic_wand/gesture_predictor.h \
 magic_wand_TEST_SRCS := \
 tensorflow/lite/experimental/micro/examples/magic_wand/magic_wand_test.cc \
 tensorflow/lite/experimental/micro/examples/magic_wand/magic_wand_model_data.cc \
-tensorflow/lite/experimental/micro/examples/magic_wand/angle_micro_features_data.cc \
-tensorflow/lite/experimental/micro/examples/magic_wand/circle_micro_features_data.cc
+tensorflow/lite/experimental/micro/examples/magic_wand/slope_micro_features_data.cc \
+tensorflow/lite/experimental/micro/examples/magic_wand/ring_micro_features_data.cc
 
 magic_wand_TEST_HDRS := \
 tensorflow/lite/experimental/micro/examples/magic_wand/magic_wand_model_data.h \
-tensorflow/lite/experimental/micro/examples/magic_wand/angle_micro_features_data.h \
-tensorflow/lite/experimental/micro/examples/magic_wand/circle_micro_features_data.h
+tensorflow/lite/experimental/micro/examples/magic_wand/slope_micro_features_data.h \
+tensorflow/lite/experimental/micro/examples/magic_wand/ring_micro_features_data.h
 
 magic_wand_SRCS := \
 tensorflow/lite/experimental/micro/examples/magic_wand/main.cc \
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/README.md b/tensorflow/lite/experimental/micro/examples/magic_wand/README.md
index bf095fd4018..3f97b9d85ae 100644
--- a/tensorflow/lite/experimental/micro/examples/magic_wand/README.md
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/README.md
@@ -15,6 +15,7 @@ then outputs the gesture to the serial port.
 -   [Deploy to SparkFun Edge](#deploy-to-sparkfun-edge)
 -   [Deploy to Adafruit devices](#deploy-to-adafruit)
 -   [Run the tests on a development machine](#run-the-tests-on-a-development-machine)
+-   [Train your own model](#train-your-own-model)
 
 ## Deploy to Arduino
 
@@ -360,3 +361,8 @@ To understand how TensorFlow Lite does this, you can look at the source in
 It's a fairly small amount of code that creates an interpreter, gets a handle to
 a model that's been compiled into the program, and then invokes the interpreter
 with the model and sample inputs.
+
+## Train your own model
+
+To train your own model, or create a new model for a new set of gestures,
+follow the instructions in [magic_wand/train/README.md](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro/examples/magic_wand/train/README.md).
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/gesture_predictor.cc b/tensorflow/lite/experimental/micro/examples/magic_wand/gesture_predictor.cc
index ea1cf3046af..865016785ad 100644
--- a/tensorflow/lite/experimental/micro/examples/magic_wand/gesture_predictor.cc
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/gesture_predictor.cc
@@ -23,7 +23,7 @@ int continuous_count = 0;
 int last_predict = -1;
 
 // Return the result of the last prediction
-// 0: wing("W"), 1: ring("O"), 2: slope("angle"), 3: unknown
+// 0: wing, 1: ring, 2: slope, 3: unknown
 int PredictGesture(float* output) {
   // Find whichever output has a probability > 0.8 (they sum to 1)
   int this_predict = -1;
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/magic_wand_test.cc b/tensorflow/lite/experimental/micro/examples/magic_wand/magic_wand_test.cc
index 395881ff3cf..1bf26b4d34c 100644
--- a/tensorflow/lite/experimental/micro/examples/magic_wand/magic_wand_test.cc
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/magic_wand_test.cc
@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/lite/experimental/micro/examples/magic_wand/angle_micro_features_data.h"
-#include "tensorflow/lite/experimental/micro/examples/magic_wand/circle_micro_features_data.h"
 #include "tensorflow/lite/experimental/micro/examples/magic_wand/magic_wand_model_data.h"
+#include "tensorflow/lite/experimental/micro/examples/magic_wand/ring_micro_features_data.h"
+#include "tensorflow/lite/experimental/micro/examples/magic_wand/slope_micro_features_data.h"
 #include "tensorflow/lite/experimental/micro/kernels/micro_ops.h"
 #include "tensorflow/lite/experimental/micro/micro_error_reporter.h"
 #include "tensorflow/lite/experimental/micro/micro_interpreter.h"
@@ -89,7 +89,7 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) {
   TF_LITE_MICRO_EXPECT_EQ(kTfLiteFloat32, input->type);
 
   // Provide an input value
-  const float* ring_features_data = g_circle_micro_f9643d42_nohash_4_data;
+  const float* ring_features_data = g_ring_micro_f9643d42_nohash_4_data;
   error_reporter->Report("%d", input->bytes);
   for (int i = 0; i < (input->bytes / sizeof(float)); ++i) {
     input->data.f[i] = ring_features_data[i];
@@ -127,7 +127,7 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) {
   TF_LITE_MICRO_EXPECT_GT(ring_score, negative_score);
 
   // Now test with a different input, from a recording of "Slope".
-  const float* slope_features_data = g_angle_micro_f2e59fea_nohash_1_data;
+  const float* slope_features_data = g_slope_micro_f2e59fea_nohash_1_data;
   for (int i = 0; i < (input->bytes / sizeof(float)); ++i) {
     input->data.f[i] = slope_features_data[i];
   }
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/circle_micro_features_data.cc b/tensorflow/lite/experimental/micro/examples/magic_wand/ring_micro_features_data.cc
similarity index 95%
rename from tensorflow/lite/experimental/micro/examples/magic_wand/circle_micro_features_data.cc
rename to tensorflow/lite/experimental/micro/examples/magic_wand/ring_micro_features_data.cc
index 24d44aabd5e..aa579b43457 100644
--- a/tensorflow/lite/experimental/micro/examples/magic_wand/circle_micro_features_data.cc
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/ring_micro_features_data.cc
@@ -13,12 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/lite/experimental/micro/examples/magic_wand/circle_micro_features_data.h"
+#include "tensorflow/lite/experimental/micro/examples/magic_wand/ring_micro_features_data.h"
 
-const int g_circle_micro_f9643d42_nohash_4_length = 128;
-const int g_circle_micro_f9643d42_nohash_4_dim = 3;
+const int g_ring_micro_f9643d42_nohash_4_length = 128;
+const int g_ring_micro_f9643d42_nohash_4_dim = 3;
 // Raw accelerometer data with a sample rate of 25Hz
-const float g_circle_micro_f9643d42_nohash_4_data[] = {
+const float g_ring_micro_f9643d42_nohash_4_data[] = {
     0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
     0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
     0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/angle_micro_features_data.h b/tensorflow/lite/experimental/micro/examples/magic_wand/ring_micro_features_data.h
similarity index 65%
rename from tensorflow/lite/experimental/micro/examples/magic_wand/angle_micro_features_data.h
rename to tensorflow/lite/experimental/micro/examples/magic_wand/ring_micro_features_data.h
index 6060f128ed2..d1d0b602165 100644
--- a/tensorflow/lite/experimental/micro/examples/magic_wand/angle_micro_features_data.h
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/ring_micro_features_data.h
@@ -13,11 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MAGIC_WAND_ANGLE_MICRO_FEATURES_DATA_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MAGIC_WAND_ANGLE_MICRO_FEATURES_DATA_H_
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MAGIC_WAND_RING_MICRO_FEATURES_DATA_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MAGIC_WAND_RING_MICRO_FEATURES_DATA_H_
 
-extern const int g_angle_micro_f2e59fea_nohash_1_length;
-extern const int g_angle_micro_f2e59fea_nohash_1_dim;
-extern const float g_angle_micro_f2e59fea_nohash_1_data[];
+extern const int g_ring_micro_f9643d42_nohash_4_length;
+extern const int g_ring_micro_f9643d42_nohash_4_dim;
+extern const float g_ring_micro_f9643d42_nohash_4_data[];
 
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MAGIC_WAND_ANGLE_MICRO_FEATURES_DATA_H_
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MAGIC_WAND_RING_MICRO_FEATURES_DATA_H_
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/angle_micro_features_data.cc b/tensorflow/lite/experimental/micro/examples/magic_wand/slope_micro_features_data.cc
similarity index 95%
rename from tensorflow/lite/experimental/micro/examples/magic_wand/angle_micro_features_data.cc
rename to tensorflow/lite/experimental/micro/examples/magic_wand/slope_micro_features_data.cc
index 922f0797032..68b3e40052b 100644
--- a/tensorflow/lite/experimental/micro/examples/magic_wand/angle_micro_features_data.cc
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/slope_micro_features_data.cc
@@ -13,12 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/lite/experimental/micro/examples/magic_wand/angle_micro_features_data.h"
+#include "tensorflow/lite/experimental/micro/examples/magic_wand/slope_micro_features_data.h"
 
-const int g_angle_micro_f2e59fea_nohash_1_length = 128;
-const int g_angle_micro_f2e59fea_nohash_1_dim = 3;
+const int g_slope_micro_f2e59fea_nohash_1_length = 128;
+const int g_slope_micro_f2e59fea_nohash_1_dim = 3;
 // Raw accelerometer data with a sample rate of 25Hz
-const float g_angle_micro_f2e59fea_nohash_1_data[] = {
+const float g_slope_micro_f2e59fea_nohash_1_data[] = {
     0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
     0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
     0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,    0.0,
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/circle_micro_features_data.h b/tensorflow/lite/experimental/micro/examples/magic_wand/slope_micro_features_data.h
similarity index 65%
rename from tensorflow/lite/experimental/micro/examples/magic_wand/circle_micro_features_data.h
rename to tensorflow/lite/experimental/micro/examples/magic_wand/slope_micro_features_data.h
index f9c69f90ef4..ade97683d79 100644
--- a/tensorflow/lite/experimental/micro/examples/magic_wand/circle_micro_features_data.h
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/slope_micro_features_data.h
@@ -13,11 +13,11 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MAGIC_WAND_CIRCLE_MICRO_FEATURES_DATA_H_
-#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MAGIC_WAND_CIRCLE_MICRO_FEATURES_DATA_H_
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MAGIC_WAND_SLOPE_MICRO_FEATURES_DATA_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MAGIC_WAND_SLOPE_MICRO_FEATURES_DATA_H_
 
-extern const int g_circle_micro_f9643d42_nohash_4_length;
-extern const int g_circle_micro_f9643d42_nohash_4_dim;
-extern const float g_circle_micro_f9643d42_nohash_4_data[];
+extern const int g_slope_micro_f2e59fea_nohash_1_length;
+extern const int g_slope_micro_f2e59fea_nohash_1_dim;
+extern const float g_slope_micro_f2e59fea_nohash_1_data[];
 
-#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MAGIC_WAND_CIRCLE_MICRO_FEATURES_DATA_H_
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MAGIC_WAND_SLOPE_MICRO_FEATURES_DATA_H_
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/README.md b/tensorflow/lite/experimental/micro/examples/magic_wand/train/README.md
new file mode 100644
index 00000000000..6bd45375341
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/train/README.md
@@ -0,0 +1,195 @@
+# Gesture Recognition Magic Wand Training Scripts
+
+## Introduction
+
+The scripts in this directory can be used to train a TensorFlow model that
+classifies gestures based on accelerometer data. The code uses Python 3.7 and
+TensorFlow 2.0. The resulting model is less than 20KB in size.
+
+The following document contains instructions on using the scripts to train a
+model, and capturing your own training data.
+
+This project was inspired by the [Gesture Recognition Magic Wand](https://github.com/jewang/gesture-demo)
+project by Jennifer Wang.
+
+## Training
+
+### Data and pre-trained model
+
+Three magic gestures were chosen, and data were collected from 7
+different people. Some random long movement sequences were collected and divided
+into shorter pieces, which made up "negative" data along with some other
+automatically generated random data.
+
+The dataset can be downloaded from the following URL:
+
+[download.tensorflow.org/models/tflite/magic_wand/data.tar.gz](http://download.tensorflow.org/models/tflite/magic_wand/data.tar.gz)
+
+A pre-trained, quantized model can be downloaded from the following URL:
+
+[download.tensorflow.org/models/tflite/magic_wand/model_quantized.tflite](http://download.tensorflow.org/models/tflite/magic_wand/model_quantized.tflite)
+
+### Training in Colab
+
+The following [Google Colaboratory](https://colab.research.google.com)
+notebook demonstrates how to train the model. It's the easiest way to get
+started:
+
+<table class="tfo-notebook-buttons" align="left">
+  <td>
+    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/micro/examples/magic_wand/train/train_magic_wand_model.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
+  </td>
+  <td>
+    <a target="_blank" href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/micro/examples/magic_wand/train/train_magic_wand_model.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
+  </td>
+</table>
+
+If you'd prefer to run the scripts locally, use the following instructions.
+
+### Running the scripts
+
+Use the following command to install the required dependencies:
+
+```shell
+pip install -r requirements.txt
+```
+
+There are two ways to train the model:
+
+- Random data split, which mixes different people's data together and randomly
+  splits them into training, validation, and test sets
+- Person data split, which splits the data by person
+
+#### Random data split
+
+Using a random split results in higher training accuracy than a person split,
+but inferior performance on new data.
+
+```shell
+$ python data_prepare.py
+
+$ python data_split.py
+
+$ python train.py --model CNN --person false
+```
+
+#### Person data split
+
+Using a person data split results in lower training accuracy but better
+performance on new data.
+
+```shell
+$ python data_prepare.py
+
+$ python data_split_person.py
+
+$ python train.py --model CNN --person true
+```
+
+#### Model type
+
+In the `--model` argument, you can can provide `CNN` or `LSTM`. The CNN
+model has a smaller size and lower latency.
+
+## Collecting new data
+
+To obtain new training data using the
+[SparkFun Edge development board](https://sparkfun.com/products/15170), you can
+modify one of the examples in the [SparkFun Edge BSP](https://github.com/sparkfun/SparkFun_Edge_BSP)
+and deploy it using the Ambiq SDK.
+
+### Install the Ambiq SDK and SparkFun Edge BSP
+
+Follow SparkFun's
+[Using SparkFun Edge Board with Ambiq Apollo3 SDK](https://learn.sparkfun.com/tutorials/using-sparkfun-edge-board-with-ambiq-apollo3-sdk/all)
+guide to set up the Ambiq SDK and SparkFun Edge BSP.
+
+#### Modify the example code
+
+First, `cd` into
+`AmbiqSuite-Rel2.2.0/boards/SparkFun_Edge_BSP/examples/example1_edge_test`.
+
+##### Modify `src/tf_adc/tf_adc.c`
+
+Add `true` in line 62 as the second parameter of function
+`am_hal_adc_samples_read`.
+
+##### Modify `src/main.c`
+
+Add the line below in `int main(void)`, just before the line `while(1)`:
+
+```cc
+am_util_stdio_printf("-,-,-\r\n");
+```
+
+Change the following lines in `while(1){...}`
+
+```cc
+am_util_stdio_printf("Acc [mg] %04.2f x, %04.2f y, %04.2f z, Temp [deg C] %04.2f, MIC0 [counts / 2^14] %d\r\n", acceleration_mg[0], acceleration_mg[1], acceleration_mg[2], temperature_degC, (audioSample) );
+```
+
+to:
+
+```cc
+am_util_stdio_printf("%04.2f,%04.2f,%04.2f\r\n", acceleration_mg[0], acceleration_mg[1], acceleration_mg[2]);
+```
+
+#### Flash the binary
+
+Follow the instructions in
+[SparkFun's guide](https://learn.sparkfun.com/tutorials/using-sparkfun-edge-board-with-ambiq-apollo3-sdk/all#example-applications)
+to flash the binary to the device.
+
+#### Collect accelerometer data
+
+First, in a new terminal window, run the following command to begin logging
+output to `output.txt`:
+
+```shell
+$ script output.txt
+```
+
+Next, in the same window, use `screen` to connect to the device:
+
+```shell
+$ screen ${DEVICENAME} 115200
+```
+
+Output information collected from accelerometer sensor will be shown on the
+screen and saved in `output.txt`, in the format of "x,y,z" per line.
+
+Press the `RST` button to start capturing a new gesture, then press Button 14
+when it ends. New data will begin with a line "-,-,-".
+
+To exit `screen`, hit +Ctrl\\+A+, immediately followed by the +K+ key,
+then hit the +Y+ key. Then run
+
+```shell
+$ exit
+```
+
+to stop logging data. Data will be saved in `output.txt`. For compatibility
+with the training scripts, change the file name to include person's name and
+the gesture name, in the following format:
+
+```
+output_{gesture_name}_{person_name}.txt
+```
+
+#### Edit and run the scripts
+
+Edit the following files to include your new gesture names (replacing
+"wing", "ring", and "slope")
+
+- `data_load.py`
+- `data_prepare.py`
+- `data_split.py`
+
+Edit the following files to include your new person names (replacing "hyw",
+"shiyun", "tangsy", "dengyl", "jiangyh", "xunkai", "lsj", "pengxl", "liucx",
+and "zhangxy"):
+
+- `data_prepare.py`
+- `data_split_person.py`
+
+Finally, run the commands described earlier to train a new model.
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_augmentation.py b/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_augmentation.py
new file mode 100644
index 00000000000..45700b9e4a8
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_augmentation.py
@@ -0,0 +1,73 @@
+# Lint as: python3
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# pylint: disable=g-bad-import-order
+
+"""Data augmentation that will be used in data_load.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import random
+import numpy as np
+
+
+def time_wrapping(molecule, denominator, data):
+  """Generate (molecule/denominator)x speed data."""
+  tmp_data = [[0
+               for i in range(len(data[0]))]
+              for j in range((int(len(data) / molecule) - 1) * denominator)]
+  for i in range(int(len(data) / molecule) - 1):
+    for j in range(len(data[i])):
+      for k in range(denominator):
+        tmp_data[denominator * i +
+                 k][j] = (data[molecule * i + k][j] * (denominator - k) +
+                          data[molecule * i + k + 1][j] * k) / denominator
+  return tmp_data
+
+
+def augment_data(original_data, original_label):
+  """Perform data augmentation."""
+  new_data = []
+  new_label = []
+  for idx, (data, label) in enumerate(zip(original_data, original_label)):  # pylint: disable=unused-variable
+    # Original data
+    new_data.append(data)
+    new_label.append(label)
+    # Sequence shift
+    for num in range(5):  # pylint: disable=unused-variable
+      new_data.append((np.array(data, dtype=np.float32) +
+                       (random.random() - 0.5) * 200).tolist())
+      new_label.append(label)
+    # Random noise
+    tmp_data = [[0 for i in range(len(data[0]))] for j in range(len(data))]
+    for num in range(5):
+      for i in range(len(tmp_data)):
+        for j in range(len(tmp_data[i])):
+          tmp_data[i][j] = data[i][j] + 5 * random.random()
+      new_data.append(tmp_data)
+      new_label.append(label)
+    # Time warping
+    fractions = [(3, 2), (5, 3), (2, 3), (3, 4), (9, 5), (6, 5), (4, 5)]
+    for molecule, denominator in fractions:
+      new_data.append(time_wrapping(molecule, denominator, data))
+      new_label.append(label)
+    # Movement amplification
+    for molecule, denominator in fractions:
+      new_data.append(
+          (np.array(data, dtype=np.float32) * molecule / denominator).tolist())
+      new_label.append(label)
+  return new_data, new_label
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_augmentation_test.py b/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_augmentation_test.py
new file mode 100644
index 00000000000..76bac6585d6
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_augmentation_test.py
@@ -0,0 +1,58 @@
+# Lint as: python3
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# pylint: disable=g-bad-import-order
+
+"""Test for data_augmentation.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+
+import numpy as np
+
+from data_augmentation import augment_data
+from data_augmentation import time_wrapping
+
+
+class TestAugmentation(unittest.TestCase):
+
+  def test_time_wrapping(self):
+    original_data = np.random.rand(10, 3).tolist()
+    wrapped_data = time_wrapping(4, 5, original_data)
+    self.assertEqual(len(wrapped_data), int(len(original_data) / 4 - 1) * 5)
+    self.assertEqual(len(wrapped_data[0]), len(original_data[0]))
+
+  def test_augment_data(self):
+    original_data = [
+        np.random.rand(128, 3).tolist(),
+        np.random.rand(66, 2).tolist(),
+        np.random.rand(9, 1).tolist()
+    ]
+    original_label = ["data", "augmentation", "test"]
+    augmented_data, augmented_label = augment_data(original_data,
+                                                   original_label)
+    self.assertEqual(25 * len(original_data), len(augmented_data))
+    self.assertIsInstance(augmented_data, list)
+    self.assertEqual(25 * len(original_label), len(augmented_label))
+    self.assertIsInstance(augmented_label, list)
+    for i in range(len(original_label)):
+      self.assertEqual(augmented_label[25 * i], original_label[i])
+
+
+if __name__ == "__main__":
+  unittest.main()
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_load.py b/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_load.py
new file mode 100644
index 00000000000..321b9c7ea0a
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_load.py
@@ -0,0 +1,105 @@
+# Lint as: python3
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# pylint: disable=g-bad-import-order
+
+"""Load data from the specified paths and format them for training."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import numpy as np
+import tensorflow as tf
+
+from data_augmentation import augment_data
+
+LABEL_NAME = "gesture"
+DATA_NAME = "accel_ms2_xyz"
+
+
+class DataLoader(object):
+  """Loads data and prepares for training."""
+
+  def __init__(self, train_data_path, valid_data_path, test_data_path,
+               seq_length):
+    self.dim = 3
+    self.seq_length = seq_length
+    self.label2id = {"wing": 0, "ring": 1, "slope": 2, "negative": 3}
+    self.train_data, self.train_label, self.train_len = self.get_data_file(
+        train_data_path, "train")
+    self.valid_data, self.valid_label, self.valid_len = self.get_data_file(
+        valid_data_path, "valid")
+    self.test_data, self.test_label, self.test_len = self.get_data_file(
+        test_data_path, "test")
+
+  def get_data_file(self, data_path, data_type):
+    """Get train, valid and test data from files."""
+    data = []
+    label = []
+    with open(data_path, "r") as f:
+      lines = f.readlines()
+      for idx, line in enumerate(lines):  # pylint: disable=unused-variable
+        dic = json.loads(line)
+        data.append(dic[DATA_NAME])
+        label.append(dic[LABEL_NAME])
+    if data_type == "train":
+      data, label = augment_data(data, label)
+    length = len(label)
+    print(data_type + "_data_length:" + str(length))
+    return data, label, length
+
+  def pad(self, data, seq_length, dim):
+    """Get neighboor padding."""
+    noise_level = 20
+    padded_data = []
+    # Before- Neighbour padding
+    tmp_data = (np.random.rand(seq_length, dim) - 0.5) * noise_level + data[0]
+    tmp_data[(seq_length -
+              min(len(data), seq_length)):] = data[:min(len(data), seq_length)]
+    padded_data.append(tmp_data)
+    # After- Neighbour padding
+    tmp_data = (np.random.rand(seq_length, dim) - 0.5) * noise_level + data[-1]
+    tmp_data[:min(len(data), seq_length)] = data[:min(len(data), seq_length)]
+    padded_data.append(tmp_data)
+    return padded_data
+
+  def format_support_func(self, padded_num, length, data, label):
+    """Support function for format.(Helps format train, valid and test.)"""
+    # Add 2 padding, initialize data and label
+    length *= padded_num
+    features = np.zeros((length, self.seq_length, self.dim))
+    labels = np.zeros(length)
+    # Get padding for train, valid and test
+    for idx, (data, label) in enumerate(zip(data, label)):
+      padded_data = self.pad(data, self.seq_length, self.dim)
+      for num in range(padded_num):
+        features[padded_num * idx + num] = padded_data[num]
+        labels[padded_num * idx + num] = self.label2id[label]
+    # Turn into tf.data.Dataset
+    dataset = tf.data.Dataset.from_tensor_slices(
+        (features, labels.astype("int32")))
+    return length, dataset
+
+  def format(self):
+    """Format data(including padding, etc.) and get the dataset for the model."""
+    padded_num = 2
+    self.train_len, self.train_data = self.format_support_func(
+        padded_num, self.train_len, self.train_data, self.train_label)
+    self.valid_len, self.valid_data = self.format_support_func(
+        padded_num, self.valid_len, self.valid_data, self.valid_label)
+    self.test_len, self.test_data = self.format_support_func(
+        padded_num, self.test_len, self.test_data, self.test_label)
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_load_test.py b/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_load_test.py
new file mode 100644
index 00000000000..8a4ef45c7c4
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_load_test.py
@@ -0,0 +1,95 @@
+# Lint as: python3
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# pylint: disable=g-bad-import-order
+
+"""Test for data_load.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+from data_load import DataLoader
+
+import tensorflow as tf
+
+
+class TestLoad(unittest.TestCase):
+
+  def setUp(self):  # pylint: disable=g-missing-super-call
+    self.loader = DataLoader(
+        "./data/train", "./data/valid", "./data/test", seq_length=512)
+
+  def test_get_data(self):
+    self.assertIsInstance(self.loader.train_data, list)
+    self.assertIsInstance(self.loader.train_label, list)
+    self.assertIsInstance(self.loader.valid_data, list)
+    self.assertIsInstance(self.loader.valid_label, list)
+    self.assertIsInstance(self.loader.test_data, list)
+    self.assertIsInstance(self.loader.test_label, list)
+    self.assertEqual(self.loader.train_len, len(self.loader.train_data))
+    self.assertEqual(self.loader.train_len, len(self.loader.train_label))
+    self.assertEqual(self.loader.valid_len, len(self.loader.valid_data))
+    self.assertEqual(self.loader.valid_len, len(self.loader.valid_label))
+    self.assertEqual(self.loader.test_len, len(self.loader.test_data))
+    self.assertEqual(self.loader.test_len, len(self.loader.test_label))
+
+  def test_pad(self):
+    original_data1 = [[2, 3], [1, 1]]
+    expected_data1_0 = [[2, 3], [2, 3], [2, 3], [2, 3], [1, 1]]
+    expected_data1_1 = [[2, 3], [1, 1], [1, 1], [1, 1], [1, 1]]
+    original_data2 = [[-2, 3], [-77, -681], [5, 6], [9, -7], [22, 3333],
+                      [9, 99], [-100, 0]]
+    expected_data2 = [[-2, 3], [-77, -681], [5, 6], [9, -7], [22, 3333]]
+    padding_data1 = self.loader.pad(original_data1, seq_length=5, dim=2)
+    padding_data2 = self.loader.pad(original_data2, seq_length=5, dim=2)
+    for i in range(len(padding_data1[0])):
+      for j in range(len(padding_data1[0].tolist()[0])):
+        self.assertLess(
+            abs(padding_data1[0].tolist()[i][j] - expected_data1_0[i][j]),
+            10.001)
+    for i in range(len(padding_data1[1])):
+      for j in range(len(padding_data1[1].tolist()[0])):
+        self.assertLess(
+            abs(padding_data1[1].tolist()[i][j] - expected_data1_1[i][j]),
+            10.001)
+    self.assertEqual(padding_data2[0].tolist(), expected_data2)
+    self.assertEqual(padding_data2[1].tolist(), expected_data2)
+
+  def test_format(self):
+    self.loader.format()
+    expected_train_label = int(self.loader.label2id[self.loader.train_label[0]])
+    expected_valid_label = int(self.loader.label2id[self.loader.valid_label[0]])
+    expected_test_label = int(self.loader.label2id[self.loader.test_label[0]])
+    for feature, label in self.loader.train_data:  # pylint: disable=unused-variable
+      format_train_label = label.numpy()
+      break
+    for feature, label in self.loader.valid_data:
+      format_valid_label = label.numpy()
+      break
+    for feature, label in self.loader.test_data:
+      format_test_label = label.numpy()
+      break
+    self.assertEqual(expected_train_label, format_train_label)
+    self.assertEqual(expected_valid_label, format_valid_label)
+    self.assertEqual(expected_test_label, format_test_label)
+    self.assertIsInstance(self.loader.train_data, tf.data.Dataset)
+    self.assertIsInstance(self.loader.valid_data, tf.data.Dataset)
+    self.assertIsInstance(self.loader.test_data, tf.data.Dataset)
+
+
+if __name__ == "__main__":
+  unittest.main()
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_prepare.py b/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_prepare.py
new file mode 100644
index 00000000000..b5f1fcfdd01
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_prepare.py
@@ -0,0 +1,164 @@
+# Lint as: python3
+# coding=utf-8
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Prepare data for further process.
+
+Read data from "/slope", "/ring", "/wing", "/negative" and save them
+in "/data/complete_data" in python dict format.
+
+It will generate a new file with the following structure:
+├── data
+│   └── complete_data
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import csv
+import json
+import os
+import random
+
+LABEL_NAME = "gesture"
+DATA_NAME = "accel_ms2_xyz"
+folders = ["wing", "ring", "slope"]
+names = [
+    "hyw", "shiyun", "tangsy", "dengyl", "zhangxy", "pengxl", "liucx",
+    "jiangyh", "xunkai"
+]
+
+
+def prepare_original_data(folder, name, data, file_to_read):  # pylint: disable=redefined-outer-name
+  """Read collected data from files."""
+  if folder != "negative":
+    with open(file_to_read, "r") as f:
+      lines = csv.reader(f)
+      data_new = {}
+      data_new[LABEL_NAME] = folder
+      data_new[DATA_NAME] = []
+      data_new["name"] = name
+      for idx, line in enumerate(lines):  # pylint: disable=unused-variable,redefined-outer-name
+        if len(line) == 3:
+          if line[2] == "-" and data_new[DATA_NAME]:
+            data.append(data_new)
+            data_new = {}
+            data_new[LABEL_NAME] = folder
+            data_new[DATA_NAME] = []
+            data_new["name"] = name
+          elif line[2] != "-":
+            data_new[DATA_NAME].append([float(i) for i in line[0:3]])
+      data.append(data_new)
+  else:
+    with open(file_to_read, "r") as f:
+      lines = csv.reader(f)
+      data_new = {}
+      data_new[LABEL_NAME] = folder
+      data_new[DATA_NAME] = []
+      data_new["name"] = name
+      for idx, line in enumerate(lines):
+        if len(line) == 3 and line[2] != "-":
+          if len(data_new[DATA_NAME]) == 120:
+            data.append(data_new)
+            data_new = {}
+            data_new[LABEL_NAME] = folder
+            data_new[DATA_NAME] = []
+            data_new["name"] = name
+          else:
+            data_new[DATA_NAME].append([float(i) for i in line[0:3]])
+      data.append(data_new)
+
+
+def generate_negative_data(data):  # pylint: disable=redefined-outer-name
+  """Generate negative data labeled as 'negative6~8'."""
+  # Big movement -> around straight line
+  for i in range(100):
+    if i > 80:
+      dic = {DATA_NAME: [], LABEL_NAME: "negative", "name": "negative8"}
+    elif i > 60:
+      dic = {DATA_NAME: [], LABEL_NAME: "negative", "name": "negative7"}
+    else:
+      dic = {DATA_NAME: [], LABEL_NAME: "negative", "name": "negative6"}
+    start_x = (random.random() - 0.5) * 2000
+    start_y = (random.random() - 0.5) * 2000
+    start_z = (random.random() - 0.5) * 2000
+    x_increase = (random.random() - 0.5) * 10
+    y_increase = (random.random() - 0.5) * 10
+    z_increase = (random.random() - 0.5) * 10
+    for j in range(128):
+      dic[DATA_NAME].append([
+          start_x + j * x_increase + (random.random() - 0.5) * 6,
+          start_y + j * y_increase + (random.random() - 0.5) * 6,
+          start_z + j * z_increase + (random.random() - 0.5) * 6
+      ])
+    data.append(dic)
+  # Random
+  for i in range(100):
+    if i > 80:
+      dic = {DATA_NAME: [], LABEL_NAME: "negative", "name": "negative8"}
+    elif i > 60:
+      dic = {DATA_NAME: [], LABEL_NAME: "negative", "name": "negative7"}
+    else:
+      dic = {DATA_NAME: [], LABEL_NAME: "negative", "name": "negative6"}
+    for j in range(128):
+      dic[DATA_NAME].append([(random.random() - 0.5) * 1000,
+                             (random.random() - 0.5) * 1000,
+                             (random.random() - 0.5) * 1000])
+    data.append(dic)
+  # Stay still
+  for i in range(100):
+    if i > 80:
+      dic = {DATA_NAME: [], LABEL_NAME: "negative", "name": "negative8"}
+    elif i > 60:
+      dic = {DATA_NAME: [], LABEL_NAME: "negative", "name": "negative7"}
+    else:
+      dic = {DATA_NAME: [], LABEL_NAME: "negative", "name": "negative6"}
+    start_x = (random.random() - 0.5) * 2000
+    start_y = (random.random() - 0.5) * 2000
+    start_z = (random.random() - 0.5) * 2000
+    for j in range(128):
+      dic[DATA_NAME].append([
+          start_x + (random.random() - 0.5) * 40,
+          start_y + (random.random() - 0.5) * 40,
+          start_z + (random.random() - 0.5) * 40
+      ])
+    data.append(dic)
+
+
+# Write data to file
+def write_data(data_to_write, path):
+  with open(path, "w") as f:
+    for idx, item in enumerate(data_to_write):  # pylint: disable=unused-variable,redefined-outer-name
+      dic = json.dumps(item, ensure_ascii=False)
+      f.write(dic)
+      f.write("\n")
+
+
+if __name__ == "__main__":
+  data = []  # pylint: disable=redefined-outer-name
+  for idx1, folder in enumerate(folders):
+    for idx2, name in enumerate(names):
+      prepare_original_data(folder, name, data,
+                            "./%s/output_%s_%s.txt" % (folder, folder, name))
+  for idx in range(5):
+    prepare_original_data("negative", "negative%d" % (idx + 1), data,
+                          "./negative/output_negative_%d.txt" % (idx + 1))
+  generate_negative_data(data)
+  print("data_length: " + str(len(data)))
+  if not os.path.exists("./data"):
+    os.makedirs("./data")
+  write_data(data, "./data/complete_data")
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_prepare_test.py b/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_prepare_test.py
new file mode 100644
index 00000000000..a2af0992a9d
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_prepare_test.py
@@ -0,0 +1,75 @@
+# Lint as: python3
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Test for data_prepare.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import csv
+import json
+import os
+import unittest
+from data_prepare import generate_negative_data
+from data_prepare import prepare_original_data
+from data_prepare import write_data
+
+
+class TestPrepare(unittest.TestCase):
+
+  def setUp(self):  # pylint: disable=g-missing-super-call
+    self.file = "./%s/output_%s_%s.txt" % (folders[0], folders[0], names[0])  # pylint: disable=undefined-variable
+    self.data = []
+    prepare_original_data(folders[0], names[0], self.data, self.file)  # pylint: disable=undefined-variable
+
+  def test_prepare_data(self):
+    num = 0
+    with open(self.file, "r") as f:
+      lines = csv.reader(f)
+      for idx, line in enumerate(lines):  # pylint: disable=unused-variable
+        if len(line) == 3 and line[2] == "-":
+          num += 1
+    self.assertEqual(len(self.data), num)
+    self.assertIsInstance(self.data, list)
+    self.assertIsInstance(self.data[0], dict)
+    self.assertEqual(list(self.data[-1]), ["gesture", "accel_ms2_xyz", "name"])
+    self.assertEqual(self.data[0]["name"], names[0])  # pylint: disable=undefined-variable
+
+  def test_generate_negative(self):
+    original_len = len(self.data)
+    generate_negative_data(self.data)
+    self.assertEqual(original_len + 300, len(self.data))
+    generated_num = 0
+    for idx, data in enumerate(self.data):  # pylint: disable=undefined-variable, unused-variable
+      if data["name"] == "negative6" or data["name"] == "negative7" or data[
+          "name"] == "negative8":
+        generated_num += 1
+    self.assertEqual(generated_num, 300)
+
+  def test_write_data(self):
+    data_path_test = "./data/data0"
+    write_data(self.data, data_path_test)
+    with open(data_path_test, "r") as f:
+      lines = f.readlines()
+      self.assertEqual(len(lines), len(self.data))
+      self.assertEqual(json.loads(lines[0]), self.data[0])
+      self.assertEqual(json.loads(lines[-1]), self.data[-1])
+    os.remove(data_path_test)
+
+
+if __name__ == "__main__":
+  unittest.main()
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_split.py b/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_split.py
new file mode 100644
index 00000000000..3bf47472d5c
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_split.py
@@ -0,0 +1,90 @@
+# Lint as: python3
+# coding=utf-8
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Mix and split data.
+
+Mix different people's data together and randomly split them into train,
+validation and test. These data would be saved separately under "/data".
+It will generate new files with the following structure:
+
+├── data
+│   ├── complete_data
+│   ├── test
+│   ├── train
+│   └── valid
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import random
+from data_prepare import write_data
+
+
+# Read data
+def read_data(path):
+  data = []  # pylint: disable=redefined-outer-name
+  with open(path, "r") as f:
+    lines = f.readlines()
+    for idx, line in enumerate(lines):  # pylint: disable=unused-variable
+      dic = json.loads(line)
+      data.append(dic)
+  print("data_length:" + str(len(data)))
+  return data
+
+
+def split_data(data, train_ratio, valid_ratio):  # pylint: disable=redefined-outer-name
+  """Splits data into train, validation and test according to ratio."""
+  train_data = []  # pylint: disable=redefined-outer-name
+  valid_data = []  # pylint: disable=redefined-outer-name
+  test_data = []  # pylint: disable=redefined-outer-name
+  num_dic = {"wing": 0, "ring": 0, "slope": 0, "negative": 0}
+  for idx, item in enumerate(data):  # pylint: disable=unused-variable
+    for i in num_dic:
+      if item["gesture"] == i:
+        num_dic[i] += 1
+  print(num_dic)
+  train_num_dic = {}
+  valid_num_dic = {}
+  for i in num_dic:
+    train_num_dic[i] = int(train_ratio * num_dic[i])
+    valid_num_dic[i] = int(valid_ratio * num_dic[i])
+  random.seed(30)
+  random.shuffle(data)
+  for idx, item in enumerate(data):
+    for i in num_dic:
+      if item["gesture"] == i:
+        if train_num_dic[i] > 0:
+          train_data.append(item)
+          train_num_dic[i] -= 1
+        elif valid_num_dic[i] > 0:
+          valid_data.append(item)
+          valid_num_dic[i] -= 1
+        else:
+          test_data.append(item)
+  print("train_length:" + str(len(train_data)))
+  print("test_length:" + str(len(test_data)))
+  return train_data, valid_data, test_data
+
+
+if __name__ == "__main__":
+  data = read_data("./data/complete_data")
+  train_data, valid_data, test_data = split_data(data, 0.6, 0.2)
+  write_data(train_data, "./data/train")
+  write_data(valid_data, "./data/valid")
+  write_data(test_data, "./data/test")
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_split_person.py b/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_split_person.py
new file mode 100644
index 00000000000..be05213411a
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_split_person.py
@@ -0,0 +1,75 @@
+# Lint as: python3
+# coding=utf-8
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Split data into train, validation and test dataset according to person.
+
+That is, use some people's data as train, some other people's data as
+validation, and the rest ones' data as test. These data would be saved
+separately under "/person_split".
+
+It will generate new files with the following structure:
+├──person_split
+│   ├── test
+│   ├── train
+│   └──valid
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import random
+from data_split import read_data
+from data_split import write_data
+
+
+def person_split(whole_data, train_names, valid_names, test_names):  # pylint: disable=redefined-outer-name
+  """Split data by person."""
+  random.seed(30)
+  random.shuffle(whole_data)
+  train_data = []  # pylint: disable=redefined-outer-name
+  valid_data = []  # pylint: disable=redefined-outer-name
+  test_data = []  # pylint: disable=redefined-outer-name
+  for idx, data in enumerate(whole_data):  # pylint: disable=redefined-outer-name,unused-variable
+    if data["name"] in train_names:
+      train_data.append(data)
+    elif data["name"] in valid_names:
+      valid_data.append(data)
+    elif data["name"] in test_names:
+      test_data.append(data)
+  print("train_length:" + str(len(train_data)))
+  print("valid_length:" + str(len(valid_data)))
+  print("test_length:" + str(len(test_data)))
+  return train_data, valid_data, test_data
+
+
+if __name__ == "__main__":
+  data = read_data("./data/complete_data")
+  train_names = [
+      "hyw", "shiyun", "tangsy", "dengyl", "jiangyh", "xunkai", "negative3",
+      "negative4", "negative5", "negative6"
+  ]
+  valid_names = ["lsj", "pengxl", "negative2", "negative7"]
+  test_names = ["liucx", "zhangxy", "negative1", "negative8"]
+  train_data, valid_data, test_data = person_split(data, train_names,
+                                                   valid_names, test_names)
+  if not os.path.exists("./person_split"):
+    os.makedirs("./person_split")
+  write_data(train_data, "./person_split/train")
+  write_data(valid_data, "./person_split/valid")
+  write_data(test_data, "./person_split/test")
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_split_person_test.py b/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_split_person_test.py
new file mode 100644
index 00000000000..3a91ce429ed
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_split_person_test.py
@@ -0,0 +1,54 @@
+# Lint as: python3
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Test for data_split_person.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+from data_split_person import person_split
+from data_split_person import read_data
+
+
+class TestSplitPerson(unittest.TestCase):
+
+  def setUp(self):  # pylint: disable=g-missing-super-call
+    self.data = read_data("./data/complete_data")
+
+  def test_person_split(self):
+    train_names = ["dengyl"]
+    valid_names = ["liucx"]
+    test_names = ["tangsy"]
+    dengyl_num = 63
+    liucx_num = 63
+    tangsy_num = 30
+    train_data, valid_data, test_data = person_split(self.data, train_names,
+                                                     valid_names, test_names)
+    self.assertEqual(len(train_data), dengyl_num)
+    self.assertEqual(len(valid_data), liucx_num)
+    self.assertEqual(len(test_data), tangsy_num)
+    self.assertIsInstance(train_data, list)
+    self.assertIsInstance(valid_data, list)
+    self.assertIsInstance(test_data, list)
+    self.assertIsInstance(train_data[0], dict)
+    self.assertIsInstance(valid_data[0], dict)
+    self.assertIsInstance(test_data[0], dict)
+
+
+if __name__ == "__main__":
+  unittest.main()
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_split_test.py b/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_split_test.py
new file mode 100644
index 00000000000..9a8f1519faf
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/train/data_split_test.py
@@ -0,0 +1,77 @@
+# Lint as: python3
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Test for data_split.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import unittest
+from data_split import read_data
+from data_split import split_data
+
+
+class TestSplit(unittest.TestCase):
+
+  def setUp(self):  # pylint: disable=g-missing-super-call
+    self.data = read_data("./data/complete_data")
+    self.num_dic = {"wing": 0, "ring": 0, "slope": 0, "negative": 0}
+    with open("./data/complete_data", "r") as f:
+      lines = f.readlines()
+      self.num = len(lines)
+
+  def test_read_data(self):
+    self.assertEqual(len(self.data), self.num)
+    self.assertIsInstance(self.data, list)
+    self.assertIsInstance(self.data[0], dict)
+    self.assertEqual(
+        set(list(self.data[-1])), set(["gesture", "accel_ms2_xyz", "name"]))
+
+  def test_split_data(self):
+    with open("./data/complete_data", "r") as f:
+      lines = f.readlines()
+      for idx, line in enumerate(lines):  # pylint: disable=unused-variable
+        dic = json.loads(line)
+        for ges in self.num_dic:
+          if dic["gesture"] == ges:
+            self.num_dic[ges] += 1
+    train_data_0, valid_data_0, test_data_100 = split_data(self.data, 0, 0)
+    train_data_50, valid_data_50, test_data_0 = split_data(self.data, 0.5, 0.5)
+    train_data_60, valid_data_20, test_data_20 = split_data(self.data, 0.6, 0.2)
+    len_60 = int(self.num_dic["wing"] * 0.6) + int(
+        self.num_dic["ring"] * 0.6) + int(self.num_dic["slope"] * 0.6) + int(
+            self.num_dic["negative"] * 0.6)
+    len_50 = int(self.num_dic["wing"] * 0.5) + int(
+        self.num_dic["ring"] * 0.5) + int(self.num_dic["slope"] * 0.5) + int(
+            self.num_dic["negative"] * 0.5)
+    len_20 = int(self.num_dic["wing"] * 0.2) + int(
+        self.num_dic["ring"] * 0.2) + int(self.num_dic["slope"] * 0.2) + int(
+            self.num_dic["negative"] * 0.2)
+    self.assertEqual(len(train_data_0), 0)
+    self.assertEqual(len(train_data_50), len_50)
+    self.assertEqual(len(train_data_60), len_60)
+    self.assertEqual(len(valid_data_0), 0)
+    self.assertEqual(len(valid_data_50), len_50)
+    self.assertEqual(len(valid_data_20), len_20)
+    self.assertEqual(len(test_data_100), self.num)
+    self.assertEqual(len(test_data_0), (self.num - 2 * len_50))
+    self.assertEqual(len(test_data_20), (self.num - len_60 - len_20))
+
+
+if __name__ == "__main__":
+  unittest.main()
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/netmodels/CNN/weights.h5 b/tensorflow/lite/experimental/micro/examples/magic_wand/train/netmodels/CNN/weights.h5
new file mode 100644
index 00000000000..1d825b3aaf7
Binary files /dev/null and b/tensorflow/lite/experimental/micro/examples/magic_wand/train/netmodels/CNN/weights.h5 differ
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/requirements.txt b/tensorflow/lite/experimental/micro/examples/magic_wand/train/requirements.txt
new file mode 100644
index 00000000000..c83b8a48eb0
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/train/requirements.txt
@@ -0,0 +1,2 @@
+numpy==1.16.2
+tensorflow==2.0.0-beta1
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/train.py b/tensorflow/lite/experimental/micro/examples/magic_wand/train/train.py
new file mode 100644
index 00000000000..0f17f33a0dd
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/train/train.py
@@ -0,0 +1,202 @@
+# Lint as: python3
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# pylint: disable=redefined-outer-name
+# pylint: disable=g-bad-import-order
+
+"""Build and train neural networks."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import datetime
+import os
+from data_load import DataLoader
+import numpy as np
+import tensorflow as tf
+
+logdir = "logs/scalars/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
+tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
+
+
+def reshape_function(data, label):
+  reshaped_data = tf.reshape(data, [-1, 3, 1])
+  return reshaped_data, label
+
+
+def calculate_model_size(model):
+  print(model.summary())
+  var_sizes = [
+      np.product(list(map(int, v.shape))) * v.dtype.size
+      for v in model.trainable_variables
+  ]
+  print("Model size:", sum(var_sizes) / 1024, "KB")
+
+
+def build_cnn(seq_length):
+  """Builds a convolutional neural network in Keras."""
+  model = tf.keras.Sequential([
+      tf.keras.layers.Conv2D(
+          8, (4, 3),
+          padding="same",
+          activation="relu",
+          input_shape=(seq_length, 3, 1)),  # output_shape=(batch, 128, 3, 8)
+      tf.keras.layers.MaxPool2D((3, 3)),  # (batch, 42, 1, 8)
+      tf.keras.layers.Dropout(0.1),  # (batch, 42, 1, 8)
+      tf.keras.layers.Conv2D(16, (4, 1), padding="same",
+                             activation="relu"),  # (batch, 42, 1, 16)
+      tf.keras.layers.MaxPool2D((3, 1), padding="same"),  # (batch, 14, 1, 16)
+      tf.keras.layers.Dropout(0.1),  # (batch, 14, 1, 16)
+      tf.keras.layers.Flatten(),  # (batch, 224)
+      tf.keras.layers.Dense(16, activation="relu"),  # (batch, 16)
+      tf.keras.layers.Dropout(0.1),  # (batch, 16)
+      tf.keras.layers.Dense(4, activation="softmax")  # (batch, 4)
+  ])
+  model_path = os.path.join("./netmodels", "CNN")
+  print("Built CNN.")
+  if not os.path.exists(model_path):
+    os.makedirs(model_path)
+  model.load_weights("./netmodels/CNN/weights.h5")
+  return model, model_path
+
+
+def build_lstm(seq_length):
+  """Builds an LSTM in Keras."""
+  model = tf.keras.Sequential([
+      tf.keras.layers.Bidirectional(
+          tf.keras.layers.LSTM(22),
+          input_shape=(seq_length, 3)),  # output_shape=(batch, 44)
+      tf.keras.layers.Dense(4, activation="sigmoid")  # (batch, 4)
+  ])
+  model_path = os.path.join("./netmodels", "LSTM")
+  print("Built LSTM.")
+  if not os.path.exists(model_path):
+    os.makedirs(model_path)
+  return model, model_path
+
+
+def load_data(train_data_path, valid_data_path, test_data_path, seq_length):
+  data_loader = DataLoader(
+      train_data_path, valid_data_path, test_data_path, seq_length=seq_length)
+  data_loader.format()
+  return data_loader.train_len, data_loader.train_data, data_loader.valid_len, \
+      data_loader.valid_data, data_loader.test_len, data_loader.test_data
+
+
+def build_net(args, seq_length):
+  if args.model == "CNN":
+    model, model_path = build_cnn(seq_length)
+  elif args.model == "LSTM":
+    model, model_path = build_lstm(seq_length)
+  else:
+    print("Please input correct model name.(CNN  LSTM)")
+  return model, model_path
+
+
+def train_net(
+    model,
+    model_path,  # pylint: disable=unused-argument
+    train_len,  # pylint: disable=unused-argument
+    train_data,
+    valid_len,
+    valid_data,  # pylint: disable=unused-argument
+    test_len,
+    test_data,
+    kind):
+  """Trains the model."""
+  calculate_model_size(model)
+  epochs = 50
+  batch_size = 64
+  model.compile(
+      optimizer="adam",
+      loss="sparse_categorical_crossentropy",
+      metrics=["accuracy"])
+  if kind == "CNN":
+    train_data = train_data.map(reshape_function)
+    test_data = test_data.map(reshape_function)
+    valid_data = valid_data.map(reshape_function)
+  test_labels = np.zeros(test_len)
+  idx = 0
+  for data, label in test_data:  # pylint: disable=unused-variable
+    test_labels[idx] = label.numpy()
+    idx += 1
+  train_data = train_data.batch(batch_size).repeat()
+  valid_data = valid_data.batch(batch_size)
+  test_data = test_data.batch(batch_size)
+  model.fit(
+      train_data,
+      epochs=epochs,
+      validation_data=valid_data,
+      steps_per_epoch=1000,
+      validation_steps=int((valid_len - 1) / batch_size + 1),
+      callbacks=[tensorboard_callback])
+  loss, acc = model.evaluate(test_data)
+  pred = np.argmax(model.predict(test_data), axis=1)
+  confusion = tf.math.confusion_matrix(
+      labels=tf.constant(test_labels),
+      predictions=tf.constant(pred),
+      num_classes=4)
+  print(confusion)
+  print("Loss {}, Accuracy {}".format(loss, acc))
+  # Convert the model to the TensorFlow Lite format without quantization
+  converter = tf.lite.TFLiteConverter.from_keras_model(model)
+  tflite_model = converter.convert()
+
+  # Save the model to disk
+  open("model.tflite", "wb").write(tflite_model)
+
+  # Convert the model to the TensorFlow Lite format with quantization
+  converter = tf.lite.TFLiteConverter.from_keras_model(model)
+  converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
+  tflite_model = converter.convert()
+
+  # Save the model to disk
+  open("model_quantized.tflite", "wb").write(tflite_model)
+
+  basic_model_size = os.path.getsize("model.tflite")
+  print("Basic model is %d bytes" % basic_model_size)
+  quantized_model_size = os.path.getsize("model_quantized.tflite")
+  print("Quantized model is %d bytes" % quantized_model_size)
+  difference = basic_model_size - quantized_model_size
+  print("Difference is %d bytes" % difference)
+
+
+if __name__ == "__main__":
+  parser = argparse.ArgumentParser()
+  parser.add_argument("--model", "-m")
+  parser.add_argument("--person", "-p")
+  args = parser.parse_args()
+
+  seq_length = 128
+
+  print("Start to load data...")
+  if args.person == "true":
+    train_len, train_data, valid_len, valid_data, test_len, test_data = \
+        load_data("./person_split/train", "./person_split/valid",
+                  "./person_split/test", seq_length)
+  else:
+    train_len, train_data, valid_len, valid_data, test_len, test_data = \
+        load_data("./data/train", "./data/valid", "./data/test", seq_length)
+
+  print("Start to build net...")
+  model, model_path = build_net(args, seq_length)
+
+  print("Start training...")
+  train_net(model, model_path, train_len, train_data, valid_len, valid_data,
+            test_len, test_data, args.model)
+
+  print("Training finished!")
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/train_magic_wand_model.ipynb b/tensorflow/lite/experimental/micro/examples/magic_wand/train/train_magic_wand_model.ipynb
new file mode 100644
index 00000000000..1995ef02dc3
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/train/train_magic_wand_model.ipynb
@@ -0,0 +1,251 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "Train a gesture recognition model for microcontroller use",
+      "provenance": [],
+      "collapsed_sections": [],
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1BtkMGSYQOTQ",
+        "colab_type": "text"
+      },
+      "source": [
+        "# Train a gesture recognition model for microcontroller use"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BaFfr7DHRmGF",
+        "colab_type": "text"
+      },
+      "source": [
+        "This notebook demonstrates how to train a 20kb gesture recognition model for [TensorFlow Lite for Microcontrollers](https://tensorflow.org/lite/microcontrollers/overview). It will produce the same model used in the [magic_wand](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/experimental/micro/examples/magic_wand) example application.\n",
+        "\n",
+        "The model is designed to be used with [Google Colaboratory](https://colab.research.google.com).\n",
+        "\n",
+        "<table class=\"tfo-notebook-buttons\" align=\"left\">\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/micro/examples/magic_wand/train/train_magic_wand_model.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a target=\"_blank\" href=\"https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/experimental/micro/examples/magic_wand/train/train_magic_wand_model.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View source on GitHub</a>\n",
+        "  </td>\n",
+        "</table>\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "xXgS6rxyT7Qk",
+        "colab_type": "text"
+      },
+      "source": [
+        "Training is much faster using GPU acceleration. Before you proceed, ensure you are using a GPU runtime by going to **Runtime -> Change runtime type** and selecting **GPU**. Training will take around 5 minutes on a GPU runtime."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "LG6ErX5FRIaV",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Configure dependencies\n",
+        "\n",
+        "Run the following cell to ensure the correct version of TensorFlow is used."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "h3sE3keZZnMX",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "%tensorflow_version 2.x\n"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "STNft9TrfoVh",
+        "colab_type": "text"
+      },
+      "source": [
+        "We'll also clone the TensorFlow repository, which contains the training scripts, and copy them into our workspace."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ygkWw73dRNda",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Clone the repository from GitHub\n",
+        "!git clone --depth 1 -q https://github.com/tensorflow/tensorflow\n",
+        "# Copy the training scripts into our workspace\n",
+        "!cp -r tensorflow/tensorflow/lite/experimental/micro/examples/magic_wand/train train"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pXI7R4RehFdU",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Prepare the data\n",
+        "\n",
+        "Next, we'll download the data and extract it into the expected location within the training scripts' directory."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "W2Sg2AKzVr2L",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Download the data we will use to train the model\n",
+        "!wget http://download.tensorflow.org/models/tflite/magic_wand/data.tar.gz\n",
+        "# Extract the data into the train directory\n",
+        "!tar xvzf data.tar.gz -C train 1>/dev/null"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DNjukI1Sgl2C",
+        "colab_type": "text"
+      },
+      "source": [
+        "We'll then run the scripts that split the data into training, validation, and test sets."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "XBqSVpi6Vxss",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# The scripts must be run from within the train directory\n",
+        "%cd train\n",
+        "# Prepare the data\n",
+        "!python data_prepare.py\n",
+        "# Split the data by person\n",
+        "!python data_split_person.py"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5-cmVbFvhTvy",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Load TensorBoard\n",
+        "\n",
+        "Now, we set up TensorBoard so that we can graph our accuracy and loss as training proceeds."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "CCx6SN9NWRPw",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Load TensorBoard\n",
+        "%load_ext tensorboard\n",
+        "%tensorboard --logdir logs/scalars"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ERC2Cr4PhaOl",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Begin training\n",
+        "\n",
+        "The following cell will begin the training process. Training will take around 5 minutes on a GPU runtime. You'll see the metrics in TensorBoard after a few epochs."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "DXmQZgbuWQFO",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "!python train.py --model CNN --person true"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4gXbVzcXhvGD",
+        "colab_type": "text"
+      },
+      "source": [
+        "## Create a C source file\n",
+        "\n",
+        "The `train.py` script writes a quantized model, `model_quantized.tflite`, to the training scripts' directory.\n",
+        "\n",
+        "In the following cell, we convert this model into a C++ source file we can use with TensorFlow Lite for Microcontrollers."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "8wgei4OGe3Nz",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Install xxd if it is not available\n",
+        "!apt-get -qq install xxd\n",
+        "# Save the file as a C source file\n",
+        "!xxd -i model_quantized.tflite > /content/model_quantized.cc\n",
+        "# Print the source file\n",
+        "!cat /content/model_quantized.cc"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
diff --git a/tensorflow/lite/experimental/micro/examples/magic_wand/train/train_test.py b/tensorflow/lite/experimental/micro/examples/magic_wand/train/train_test.py
new file mode 100644
index 00000000000..18467abeae3
--- /dev/null
+++ b/tensorflow/lite/experimental/micro/examples/magic_wand/train/train_test.py
@@ -0,0 +1,77 @@
+# Lint as: python3
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Test for train.py."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import unittest
+import numpy as np
+import tensorflow as tf
+from train import build_cnn
+from train import build_lstm
+from train import load_data
+from train import reshape_function
+
+
+class TestTrain(unittest.TestCase):
+
+  def setUp(self):  # pylint: disable=g-missing-super-call
+    self.seq_length = 128
+    self.train_len, self.train_data, self.valid_len, self.valid_data, \
+        self.test_len, self.test_data = \
+        load_data("./data/train", "./data/valid", "./data/test",
+                  self.seq_length)
+
+  def test_load_data(self):
+    self.assertIsInstance(self.train_data, tf.data.Dataset)
+    self.assertIsInstance(self.valid_data, tf.data.Dataset)
+    self.assertIsInstance(self.test_data, tf.data.Dataset)
+
+  def test_build_net(self):
+    cnn, cnn_path = build_cnn(self.seq_length)
+    lstm, lstm_path = build_lstm(self.seq_length)
+    cnn_data = np.random.rand(60, 128, 3, 1)
+    lstm_data = np.random.rand(60, 128, 3)
+    cnn_prob = cnn(tf.constant(cnn_data, dtype="float32")).numpy()
+    lstm_prob = lstm(tf.constant(lstm_data, dtype="float32")).numpy()
+    self.assertIsInstance(cnn, tf.keras.Sequential)
+    self.assertIsInstance(lstm, tf.keras.Sequential)
+    self.assertEqual(cnn_path, "./netmodels/CNN")
+    self.assertEqual(lstm_path, "./netmodels/LSTM")
+    self.assertEqual(cnn_prob.shape, (60, 4))
+    self.assertEqual(lstm_prob.shape, (60, 4))
+
+  def test_reshape_function(self):
+    for data, label in self.train_data:
+      original_data_shape = data.numpy().shape
+      original_label_shape = label.numpy().shape
+      break
+    self.train_data = self.train_data.map(reshape_function)
+    for data, label in self.train_data:
+      reshaped_data_shape = data.numpy().shape
+      reshaped_label_shape = label.numpy().shape
+      break
+    self.assertEqual(
+        reshaped_data_shape,
+        (int(original_data_shape[0] * original_data_shape[1] / 3), 3, 1))
+    self.assertEqual(reshaped_label_shape, original_label_shape)
+
+
+if __name__ == "__main__":
+  unittest.main()
diff --git a/tensorflow/lite/experimental/micro/kernels/conv_test.cc b/tensorflow/lite/experimental/micro/kernels/conv_test.cc
index 9ac2bea0c0e..352b10cca04 100644
--- a/tensorflow/lite/experimental/micro/kernels/conv_test.cc
+++ b/tensorflow/lite/experimental/micro/kernels/conv_test.cc
@@ -43,9 +43,9 @@ static TfLiteConvParams common_conv_params = {
     kTfLitePaddingValid,  // padding
     2,                    // stride_width
     2,                    // stride_height
+    kTfLiteActNone,       // activation
     1,                    // dilation_width_factor
     1,                    // dilation_height_factor
-    kTfLiteActNone,       // activation
 };
 
 template <typename T>
@@ -328,9 +328,9 @@ TF_LITE_MICRO_TEST(SimpleTestQuantizedPerChannel) {
 
 TF_LITE_MICRO_TEST(Kernel1x1QuantizedPerChannel) {
   // conv params:
-  // padding, stride_<width,height>, dilation_<width, height>, activation
-  TfLiteConvParams conv_params = {kTfLitePaddingValid, 1, 1, 1, 1,
-                                  kTfLiteActNone};
+  // padding, stride_<width,height>, activation, dilation_<width, height>
+  TfLiteConvParams conv_params = {kTfLitePaddingValid, 1, 1,
+                                  kTfLiteActNone,      1, 1};
   const int kInputShape[] = {4, 1, 2, 2, 4};  // [len,N,H,W,C]
   const int kInputElements =
       kInputShape[1] * kInputShape[2] * kInputShape[3] * kInputShape[4];
diff --git a/tensorflow/lite/experimental/micro/micro_allocator.cc b/tensorflow/lite/experimental/micro/micro_allocator.cc
index 82b3b350c23..73c2bda1d20 100644
--- a/tensorflow/lite/experimental/micro/micro_allocator.cc
+++ b/tensorflow/lite/experimental/micro/micro_allocator.cc
@@ -42,6 +42,19 @@ struct TensorInfo {
 // requirement for SIMD extensions.
 constexpr int kBufferAlignment = 16;
 
+// If building with GNU clib from GCC 4.8.x or lower, `max_align_t` is not a
+// member of `std`. If using a newer version of clib, we import `max_align_t`
+// into the local anonymous namespace to be able to use it like the global
+// `max_align_t` from the older clib.
+#if defined(__GNUC__) && defined(__GNUC_PREREQ)
+#if __GNUC_PREREQ(4, 9)
+using std::max_align_t;
+#endif
+#else
+// We assume other compiler/clib configurations don't have this issue.
+using std::max_align_t;
+#endif
+
 class MicroBuiltinDataAllocator : public BuiltinDataAllocator {
  public:
   explicit MicroBuiltinDataAllocator(SimpleMemoryAllocator* memory_allocator)
@@ -51,7 +64,7 @@ class MicroBuiltinDataAllocator : public BuiltinDataAllocator {
     // Align to an address that is proper for all primitive types, but no more
     // than the size.
     return memory_allocator_->AllocateFromTail(
-        size, std::min(size, alignof(std::max_align_t)));
+        size, std::min(size, alignof(max_align_t)));
   }
   void Deallocate(void* data) override {
     // Do not deallocate, builtin data needs to be available for the life time
@@ -412,7 +425,7 @@ TfLiteStatus MicroAllocator::InitializeRuntimeTensor(
     // If we've found a buffer, does it have any data?
     if (auto* array = buffer->data()) {
       // If it has any data, is the data size larger than zero?
-      if (size_t array_size = array->size()) {
+      if (array->size()) {
         // We've found a buffer with valid data, so update the runtime tensor
         // data structure to point to it.
         result->data.raw =
diff --git a/tensorflow/lite/experimental/micro/micro_interpreter.cc b/tensorflow/lite/experimental/micro/micro_interpreter.cc
index 0c7b58aaece..7185d643514 100644
--- a/tensorflow/lite/experimental/micro/micro_interpreter.cc
+++ b/tensorflow/lite/experimental/micro/micro_interpreter.cc
@@ -21,7 +21,7 @@ limitations under the License.
 
 namespace tflite {
 namespace {
-const int kStackDataAllocatorSize = 128;
+const size_t kStackDataAllocatorSize = 128;
 class StackDataAllocator : public BuiltinDataAllocator {
  public:
   void* Allocate(size_t size) override {
@@ -68,7 +68,6 @@ MicroInterpreter::MicroInterpreter(const Model* model,
     : model_(model),
       op_resolver_(op_resolver),
       error_reporter_(error_reporter),
-      context_(),
       allocator_(&context_, model_, tensor_arena, tensor_arena_size,
                  error_reporter_),
       tensors_allocated_(false) {
@@ -92,7 +91,7 @@ MicroInterpreter::MicroInterpreter(const Model* model,
   // NOTE: This requires that the flatbuffer is held in memory which can be
   // modified by this process.
   if (!FLATBUFFERS_LITTLEENDIAN) {
-    for (int t = 0; t < tensors_size(); ++t) {
+    for (size_t t = 0; t < tensors_size(); ++t) {
       TfLiteTensor* thisTensor = &context_.tensors[t];
       if (thisTensor->allocation_type == kTfLiteMmapRo)
         CorrectTensorEndianness(thisTensor);
diff --git a/tensorflow/lite/experimental/micro/micro_interpreter.h b/tensorflow/lite/experimental/micro/micro_interpreter.h
index 3e8a969874a..f34e29e06ad 100644
--- a/tensorflow/lite/experimental/micro/micro_interpreter.h
+++ b/tensorflow/lite/experimental/micro/micro_interpreter.h
@@ -119,7 +119,8 @@ class MicroInterpreter {
   const Model* model_;
   const OpResolver& op_resolver_;
   ErrorReporter* error_reporter_;
-  TfLiteContext context_;
+  // Explicitly initialize TfLiteContext POD struct.
+  TfLiteContext context_ = {};
   MicroAllocator allocator_;
   bool tensors_allocated_;
 
diff --git a/tensorflow/lite/experimental/micro/micro_optional_debug_tools.cc b/tensorflow/lite/experimental/micro/micro_optional_debug_tools.cc
index e27317a5443..1f6ce531f05 100644
--- a/tensorflow/lite/experimental/micro/micro_optional_debug_tools.cc
+++ b/tensorflow/lite/experimental/micro/micro_optional_debug_tools.cc
@@ -14,6 +14,13 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/lite/experimental/micro/micro_optional_debug_tools.h"
 
+// `cinttypes` requires `__STDC_FORMAT_MACROS` to be defined to expose `PRId32`.
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS
+#endif
+
+#include <cinttypes>
+
 #include "tensorflow/lite/schema/schema_generated.h"
 namespace tflite {
 namespace {
@@ -122,7 +129,7 @@ void PrintInterpreterState(MicroInterpreter* interpreter) {
       printf("Node %3zu Operator Custom Name %s\n", node_index,
              reg->custom_name);
     } else {
-      printf("Node %3zu Operator Builtin Code %3d %s\n", node_index,
+      printf("Node %3zu Operator Builtin Code %3" PRId32 " %s\n", node_index,
              reg->builtin_code, EnumNamesBuiltinOperator()[reg->builtin_code]);
     }
     printf("  Inputs:");
diff --git a/tensorflow/lite/experimental/micro/test_helpers.cc b/tensorflow/lite/experimental/micro/test_helpers.cc
index 03e1d91fce0..a1b9801ffc9 100644
--- a/tensorflow/lite/experimental/micro/test_helpers.cc
+++ b/tensorflow/lite/experimental/micro/test_helpers.cc
@@ -47,7 +47,7 @@ class StackAllocator : public flatbuffers::Allocator {
     return *inst;
   }
 
-  static constexpr int kStackAllocatorSize = 4096;
+  static constexpr size_t kStackAllocatorSize = 4096;
 
  private:
   uint8_t data_backing_[kStackAllocatorSize];
diff --git a/tensorflow/lite/experimental/microfrontend/lib/filterbank.c b/tensorflow/lite/experimental/microfrontend/lib/filterbank.c
index a65af382b9a..80f8738f001 100644
--- a/tensorflow/lite/experimental/microfrontend/lib/filterbank.c
+++ b/tensorflow/lite/experimental/microfrontend/lib/filterbank.c
@@ -118,7 +118,7 @@ static uint32_t Sqrt64(uint64_t num) {
 
 uint32_t* FilterbankSqrt(struct FilterbankState* state, int scale_down_shift) {
   const int num_channels = state->num_channels;
-  const int64_t* work = state->work + 1;
+  const uint64_t* work = state->work + 1;
   // Reuse the work buffer since we're fine clobbering it at this point to hold
   // the output.
   uint32_t* output = (uint32_t*)state->work;
diff --git a/tensorflow/lite/experimental/resource/resource_base.h b/tensorflow/lite/experimental/resource/resource_base.h
index 48a00b93957..ff69c3ab356 100644
--- a/tensorflow/lite/experimental/resource/resource_base.h
+++ b/tensorflow/lite/experimental/resource/resource_base.h
@@ -15,11 +15,10 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_EXPERIMENTAL_RESOURCE_RESOURCE_BASE_H_
 #define TENSORFLOW_LITE_EXPERIMENTAL_RESOURCE_RESOURCE_BASE_H_
 
+#include <cstdint>
 #include <memory>
 #include <unordered_map>
 
-#include "tensorflow/lite/kernels/internal/compatibility.h"
-
 namespace tflite {
 namespace resource {
 
@@ -35,7 +34,8 @@ class ResourceBase {
 };
 
 /// WARNING: Experimental interface, subject to change.
-using ResourceMap = std::unordered_map<int32, std::unique_ptr<ResourceBase>>;
+using ResourceMap =
+    std::unordered_map<std::int32_t, std::unique_ptr<ResourceBase>>;
 
 }  // namespace resource
 }  // namespace tflite
diff --git a/tensorflow/lite/experimental/resource/static_hashtable.cc b/tensorflow/lite/experimental/resource/static_hashtable.cc
index 47e3b762607..f90ae146959 100644
--- a/tensorflow/lite/experimental/resource/static_hashtable.cc
+++ b/tensorflow/lite/experimental/resource/static_hashtable.cc
@@ -85,7 +85,7 @@ LookupInterface* CreateStaticHashtableWithGivenKey(TfLiteType key_type,
                                                    TfLiteType value_type) {
   switch (value_type) {
     case kTfLiteInt32:
-      return new StaticHashtable<KeyType, int32>(key_type, value_type);
+      return new StaticHashtable<KeyType, std::int32_t>(key_type, value_type);
     case kTfLiteString:
       return new StaticHashtable<KeyType, std::string>(key_type, value_type);
     case kTfLiteFloat32:
@@ -99,7 +99,8 @@ LookupInterface* CreateStaticHashtable(TfLiteType key_type,
                                        TfLiteType value_type) {
   switch (key_type) {
     case kTfLiteInt32:
-      return CreateStaticHashtableWithGivenKey<int32>(key_type, value_type);
+      return CreateStaticHashtableWithGivenKey<std::int32_t>(key_type,
+                                                             value_type);
     case kTfLiteString:
       return CreateStaticHashtableWithGivenKey<std::string>(key_type,
                                                             value_type);
diff --git a/tensorflow/lite/experimental/ruy/BUILD b/tensorflow/lite/experimental/ruy/BUILD
index e6fff55b77e..310cc6e0e40 100644
--- a/tensorflow/lite/experimental/ruy/BUILD
+++ b/tensorflow/lite/experimental/ruy/BUILD
@@ -118,6 +118,7 @@ cc_library(
         ":opt_set",
         ":platform",
         ":time",
+        "@gemmlowp//:profiler",
     ],
 )
 
diff --git a/tensorflow/lite/experimental/ruy/prepacked_cache.cc b/tensorflow/lite/experimental/ruy/prepacked_cache.cc
index 2bd23f834c4..372693d7670 100644
--- a/tensorflow/lite/experimental/ruy/prepacked_cache.cc
+++ b/tensorflow/lite/experimental/ruy/prepacked_cache.cc
@@ -15,6 +15,7 @@ limitations under the License.
 
 #include "tensorflow/lite/experimental/ruy/prepacked_cache.h"
 
+#include "profiling/instrumentation.h"
 #include "tensorflow/lite/experimental/ruy/matrix.h"
 
 namespace ruy {
@@ -26,7 +27,7 @@ CacheIterator PrepackedCache::FindAndUpdate(const CacheKey &key) {
   auto itr = cache_.find(key);
   // If found, update with new access time for this entry.
   if (itr != cache_.end()) {
-    const TimePoint time = CoarseNow();
+    const TimePoint time = CacheNow();
     itr->second.second = time;
   }
   return itr;
@@ -37,7 +38,7 @@ void PrepackedCache::Insert(const CacheKey &key,
   // Calculate size of this new item.
   const size_t size_bytes = matrix.data_size + matrix.sums_size;
 
-  // If we are above the threshold of ejection, eject the LRU entry.
+  // While we are above the threshold of ejection, eject the LRU entry.
   while (!cache_.empty() &&
          ((TotalSize() + size_bytes) > ejection_threshold_)) {
     EjectOne();
@@ -47,12 +48,15 @@ void PrepackedCache::Insert(const CacheKey &key,
 }
 
 void PrepackedCache::EjectOne() {
-  TimePoint oldest_time = CoarseNow();
+  TimePoint oldest_time = CacheNow();
   auto oldest = cache_.begin();
-  for (auto itr = cache_.begin(); itr != cache_.end(); ++itr) {
-    if (itr->second.second < oldest_time) {
-      oldest_time = itr->second.second;
-      oldest = itr;
+  {
+    gemmlowp::ScopedProfilingLabel label("PepackedCacheEjection");
+    for (auto itr = cache_.begin(); itr != cache_.end(); ++itr) {
+      if (itr->second.second < oldest_time) {
+        oldest_time = itr->second.second;
+        oldest = itr;
+      }
     }
   }
   PrepackedMatrix &pmatrix = oldest->second.first;
@@ -70,10 +74,7 @@ void PrepackedCache::AllocatePrepackedMatrix(PrepackedMatrix *pmatrix) {
 
 void PrepackedCache::DoInsert(const CacheKey &key,
                               const PrepackedMatrix &matrix) {
-  // TODO(talumbau) Profile timestamps on relevant models to see if
-  // this level of granularity is sufficient. CoarseNow is cheap so
-  // it would be nice to keep it.
-  const TimePoint t = CoarseNow();
+  const TimePoint t = CacheNow();
   const MatrixWithTimeStamp mts({matrix, t});
   cache_.insert({key, mts});
 }
diff --git a/tensorflow/lite/experimental/ruy/prepacked_cache.h b/tensorflow/lite/experimental/ruy/prepacked_cache.h
index 9c77c48cf69..1306e5f902f 100644
--- a/tensorflow/lite/experimental/ruy/prepacked_cache.h
+++ b/tensorflow/lite/experimental/ruy/prepacked_cache.h
@@ -69,10 +69,7 @@ enum CachePolicy { kNoCache, kCacheLHSOnGemV };
 // The implementation is "low effort" in the following ways:
 //  - we just linearly search for the oldest entry when doing an ejection
 //  - the ejection policy is very simple: if the new size would be above the
-// .  threshold, we will eject one entry when adding an entry. Therefore,
-//    there are no guarantees on maximum cache size since one may
-//    insert an item larger than the ejection threshold (it will be ejected on
-//    the next insert, but inserts always succeed).
+// .  threshold, we will eject entries until the size is below the threshold.
 // Current use cases (RNNs with GEMV operations) indicate that ejection is rare
 // and memory constraints are tight, so we devote no additional storage to the
 // LRU mechanism and accept O(n) search to eject oldest entry. In practice,
@@ -106,6 +103,12 @@ class PrepackedCache {
   // Returns the total size (in bytes) of data held in this cache.
   int TotalSize() const { return cache_size_; }
 
+  // All calls to get current TimePoints go through here.
+  // TODO(b/145625614) Profile timestamps on relevant models to see if
+  // this level of granularity is sufficient. CoarseNow is cheap so
+  // it would be nice to keep it.
+  TimePoint CacheNow() const { return CoarseNow(); }
+
   // Performs the memory allocation for the `data` and `sums` members of a
   // PrepackedMatrix.
   void AllocatePrepackedMatrix(PrepackedMatrix *pmatrix);
diff --git a/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/common/ops/CastOp.java b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/common/ops/CastOp.java
new file mode 100644
index 00000000000..3355b185655
--- /dev/null
+++ b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/common/ops/CastOp.java
@@ -0,0 +1,55 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package org.tensorflow.lite.support.common.ops;
+
+import org.tensorflow.lite.DataType;
+import org.tensorflow.lite.support.common.SupportPreconditions;
+import org.tensorflow.lite.support.common.TensorOperator;
+import org.tensorflow.lite.support.tensorbuffer.TensorBuffer;
+
+/** Casts a {@link TensorBuffer} to a specified data type. */
+public class CastOp implements TensorOperator {
+
+  private final DataType destinationType;
+
+  /**
+   * Constructs a CastOp.
+   *
+   * <p>Note: For only converting type for a certain {@link TensorBuffer} on-the-fly rather than in
+   * a processor, please directly use {@link TensorBuffer#createFrom(TensorBuffer, DataType)}.
+   *
+   * <p>When this Op is executed, if the original {@link TensorBuffer} is already in {@code
+   * destinationType}, the original buffer will be directly returned.
+   *
+   * @param destinationType: The type of the casted {@link TensorBuffer}.
+   * @throws IllegalArgumentException if {@code destinationType} is neither {@link DataType#UINT8}
+   * nor {@link DataType#FLOAT32}.
+   */
+  public CastOp(DataType destinationType) {
+    SupportPreconditions.checkArgument(
+        destinationType == DataType.UINT8 || destinationType == DataType.FLOAT32,
+        "Destination type " + destinationType + " is not supported.");
+    this.destinationType = destinationType;
+  }
+
+  @Override
+  public TensorBuffer apply(TensorBuffer input) {
+    if (input.getDataType() == destinationType) {
+      return input;
+    }
+    return TensorBuffer.createFrom(input, destinationType);
+  }
+}
diff --git a/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/common/ops/DequantizeOp.java b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/common/ops/DequantizeOp.java
new file mode 100644
index 00000000000..7840fbfac6a
--- /dev/null
+++ b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/common/ops/DequantizeOp.java
@@ -0,0 +1,34 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package org.tensorflow.lite.support.common.ops;
+
+import org.tensorflow.lite.support.common.TensorOperator;
+import org.tensorflow.lite.support.tensorbuffer.TensorBuffer;
+
+/**
+ * Dequantizes a {@link TensorBuffer} with given {@code zeroPoint} and {@code scale}.
+ *
+ * <p>Note: The data type of output tensor is always {@code FLOAT32} except when the DequantizeOp is
+ * created effectively as an identity Op such as setting {@code zeroPoint} to 0 and {@code scale} to
+ * 1 (in this case, the output tensor is the same instance as input).
+ */
+public class DequantizeOp extends NormalizeOp implements TensorOperator {
+
+  public DequantizeOp(float zeroPoint, float scale) {
+    // Quantization: f = (q - z) * s
+    super(zeroPoint, 1 / scale);
+  }
+}
diff --git a/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/common/ops/NormalizeOp.java b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/common/ops/NormalizeOp.java
index 4b4b0b99117..25db461ede1 100644
--- a/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/common/ops/NormalizeOp.java
+++ b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/common/ops/NormalizeOp.java
@@ -23,7 +23,7 @@ import org.tensorflow.lite.support.tensorbuffer.TensorBuffer;
 import org.tensorflow.lite.support.tensorbuffer.TensorBufferFloat;
 
 /**
- * Normalize a TensorBuffer with given mean and stddev: output = (input - mean) / stddev.
+ * Normalizes a {@link TensorBuffer} with given mean and stddev: output = (input - mean) / stddev.
  */
 public class NormalizeOp implements TensorOperator {
 
diff --git a/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/common/ops/QuantizeOp.java b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/common/ops/QuantizeOp.java
new file mode 100644
index 00000000000..77a6559cb00
--- /dev/null
+++ b/tensorflow/lite/experimental/support/java/src/java/org/tensorflow/lite/support/common/ops/QuantizeOp.java
@@ -0,0 +1,35 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+package org.tensorflow.lite.support.common.ops;
+
+import org.tensorflow.lite.support.common.TensorOperator;
+import org.tensorflow.lite.support.tensorbuffer.TensorBuffer;
+
+/**
+ * Quantizes a {@link TensorBuffer} with given {@code zeroPoint} and {@code scale}.
+ *
+ * <p>Note: {@link QuantizeOp} does not cast output to UINT8, but only performs the quantization
+ * math on top of input. The data type of output tensor is always {@code FLOAT32} except that the Op
+ * is effectively an identity Op (in this case, the output tensor is the same instance as the
+ * input). To connect with quantized model, a {@link CastOp} is probably needed.
+ */
+public class QuantizeOp extends NormalizeOp implements TensorOperator {
+
+  public QuantizeOp(float zeroPoint, float scale) {
+    // Quantization: f = (q - z) * s, i.e. q = f / s + z = (f - (-z * s)) / s
+    super(-zeroPoint * scale, scale);
+  }
+}
diff --git a/tensorflow/lite/g3doc/guide/ios.md b/tensorflow/lite/g3doc/guide/ios.md
index 0c7e5dc9c90..dab8e0f0ca2 100644
--- a/tensorflow/lite/g3doc/guide/ios.md
+++ b/tensorflow/lite/g3doc/guide/ios.md
@@ -7,7 +7,7 @@ example:
 image classification example</a>
 
 For an explanation of the source code, you should also read
-[TensorFlow Lite iOS image classification](https://www.tensorflow.org/code/py/tensorflow_examples/lite/examples/image_classification/ios/EXPLORE_THE_CODE.md).
+[TensorFlow Lite iOS image classification](https://github.com/tensorflow/examples/blob/master/lite/examples/image_classification/ios/EXPLORE_THE_CODE.md).
 
 This example app uses
 [image classification](https://www.tensorflow.org/lite/models/image_classification/overview)
diff --git a/tensorflow/lite/g3doc/performance/quantization_spec.md b/tensorflow/lite/g3doc/performance/quantization_spec.md
index d6b7029ecfd..b0cea36ac1e 100644
--- a/tensorflow/lite/g3doc/performance/quantization_spec.md
+++ b/tensorflow/lite/g3doc/performance/quantization_spec.md
@@ -75,7 +75,7 @@ $A$ is a $m \times n$ matrix of quantized activations. <br />
 $B$ is a $n \times p$ matrix of quantized weights. <br />
 Consider multiplying the $j$th row of $A$, $a_j$ by the $k$th column of
 $B$, $b_k$, both of length $n$. The quantized integer values and
-zero-points values are $q_a$, $z_a$ and $q_b$, $q_b$ respectively.
+zero-points values are $q_a$, $z_a$ and $q_b$, $z_b$ respectively.
 
 $$a_j \cdot b_k = \sum_{i=0}^{n} a_{j}^{(i)} b_{k}^{(i)} =
 \sum_{i=0}^{n} (q_{a}^{(i)} - z_a) (q_{b}^{(i)} - z_b) =
@@ -87,9 +87,9 @@ $$a_j \cdot b_k = \sum_{i=0}^{n} a_{j}^{(i)} b_{k}^{(i)} =
 The \\(\sum_{i=0}^{n} q_{a}^{(i)} q_{b}^{(i)}\\) term is unavoidable since it’s
 performing the dot product of the input value and the weight value.
 
-The $$\sum_{i=0}^{n} q_{b}^{(i)} z_a and \sum_{i=0}^{n} z_a z_b$$ terms are made
-up of constants that remain the same per inference invocation, and thus can be
-pre-calculated.
+The $$\sum_{i=0}^{n} q_{b}^{(i)} z_a$$ and $$\sum_{i=0}^{n} z_a z_b$$ terms are
+made up of constants that remain the same per inference invocation, and thus can
+be pre-calculated.
 
 The \\(\sum_{i=0}^{n} q_{a}^{(i)} z_b\\) term needs to be computed every inference
 since the activation changes every inference. By enforcing weights to be
diff --git a/tensorflow/lite/java/BUILD b/tensorflow/lite/java/BUILD
index 08972941950..110a1f82b05 100644
--- a/tensorflow/lite/java/BUILD
+++ b/tensorflow/lite/java/BUILD
@@ -11,6 +11,8 @@ package(
     licenses = ["notice"],  # Apache 2.0
 )
 
+exports_files(["src/testdata/add.bin"])
+
 JAVA_SRCS = glob([
     "src/main/java/org/tensorflow/lite/*.java",
     "src/main/java/org/tensorflow/lite/annotations/*.java",
@@ -23,6 +25,12 @@ JAVA_SRCS = glob([
 aar_with_jni(
     name = "tensorflow-lite",
     android_library = ":tensorflowlite",
+    headers = [
+        "//tensorflow/lite:builtin_ops.h",
+        "//tensorflow/lite/c:c_api.h",
+        "//tensorflow/lite/c:c_api_experimental.h",
+        "//tensorflow/lite/c:common.h",
+    ],
 )
 
 # EXPERIMENTAL: AAR target for using TensorFlow ops with TFLite. Note that this
@@ -47,6 +55,9 @@ aar_with_jni(
 aar_with_jni(
     name = "tensorflow-lite-gpu",
     android_library = ":tensorflowlite_gpu",
+    headers = [
+        "//tensorflow/lite/delegates/gpu:delegate.h",
+    ],
 )
 
 android_library(
@@ -347,7 +358,12 @@ cc_library(
 
 tflite_jni_binary(
     name = "libtensorflowlite_jni.so",
+    linkscript = ":tflite_version_script.lds",
     deps = [
+        # Note that we explicitly include the C API here for convenience, as it
+        # allows bundling of the C lib w/ AAR distribution.
+        "//tensorflow/lite/c:c_api",
+        "//tensorflow/lite/c:c_api_experimental",
         "//tensorflow/lite/delegates/nnapi/java/src/main/native",
         "//tensorflow/lite/java/src/main/native",
     ],
@@ -364,6 +380,7 @@ tflite_jni_binary(
 # EXPERIMENTAL: Native target that supports GPU acceleration.
 tflite_jni_binary(
     name = "libtensorflowlite_gpu_jni.so",
+    linkscript = ":gpu_version_script.lds",
     deps = [
         "//tensorflow/lite/delegates/gpu/java/src/main/native",
     ],
diff --git a/tensorflow/lite/java/aar_with_jni.bzl b/tensorflow/lite/java/aar_with_jni.bzl
index 1be3fc59b87..e33479e7009 100644
--- a/tensorflow/lite/java/aar_with_jni.bzl
+++ b/tensorflow/lite/java/aar_with_jni.bzl
@@ -5,7 +5,8 @@ load("@build_bazel_rules_android//android:rules.bzl", "android_binary")
 def aar_with_jni(
         name,
         android_library,
-        headers = None):
+        headers = None,
+        flatten_headers = False):
     """Generates an Android AAR given an Android library target.
 
     Args:
@@ -16,6 +17,7 @@ def aar_with_jni(
       headers: Optional list of headers that will be included in the
           generated .aar file. This is useful for distributing self-contained
           .aars with native libs that can be used directly by native clients.
+      flatten_headers: Whether to flatten the output paths of included headers.
     """
 
     # Generate dummy AndroidManifest.xml for dummy apk usage
@@ -68,9 +70,15 @@ zip -r $$origdir/$(location :{1}.aar) jni/*/*.so
         mkdir headers
         """
         for src in headers:
-            cmd += """
-            cp -rL $$origdir/$(location {0}) headers/$$(basename $(location {0}))
-            """.format(src)
+            if flatten_headers:
+                cmd += """
+                    cp -rL $$origdir/$(location {0}) headers/$$(basename $(location {0}))
+                """.format(src)
+            else:
+                cmd += """
+                    mkdir -p headers/$$(dirname $(location {0}))
+                    cp -rL $$origdir/$(location {0}) headers/$(location {0})
+                """.format(src)
         cmd += "zip -r $$origdir/$(location :{0}.aar) headers".format(name)
 
     native.genrule(
diff --git a/tensorflow/lite/java/gpu_version_script.lds b/tensorflow/lite/java/gpu_version_script.lds
new file mode 100644
index 00000000000..f298833e174
--- /dev/null
+++ b/tensorflow/lite/java/gpu_version_script.lds
@@ -0,0 +1,12 @@
+VERS_1.0 {
+  # Export JNI and native C symbols.
+  global:
+    Java_*;
+    JNI_OnLoad;
+    JNI_OnUnload;
+    TfLiteGpu*;
+
+  # Hide everything else.
+  local:
+    *;
+};
diff --git a/tensorflow/lite/java/tflite_version_script.lds b/tensorflow/lite/java/tflite_version_script.lds
new file mode 100644
index 00000000000..46bbffe75d8
--- /dev/null
+++ b/tensorflow/lite/java/tflite_version_script.lds
@@ -0,0 +1,12 @@
+VERS_1.0 {
+  # Export JNI and native C symbols.
+  global:
+    Java_*;
+    JNI_OnLoad;
+    JNI_OnUnload;
+    TfLite*;
+
+  # Hide everything else.
+  local:
+    *;
+};
diff --git a/tensorflow/lite/kernels/internal/BUILD b/tensorflow/lite/kernels/internal/BUILD
index 646f14680ac..e82d3c16b31 100644
--- a/tensorflow/lite/kernels/internal/BUILD
+++ b/tensorflow/lite/kernels/internal/BUILD
@@ -227,6 +227,7 @@ cc_library(
         "optimized/integer_ops/mul.h",
         "optimized/integer_ops/pooling.h",
         "optimized/integer_ops/softmax.h",
+        "optimized/integer_ops/transpose_conv.h",
         "optimized/optimized_ops.h",
     ],
     copts = tflite_copts(),
@@ -361,6 +362,7 @@ cc_test(
     name = "quantization_util_test",
     srcs = ["quantization_util_test.cc"],
     deps = [
+        ":common",
         ":quantization_util",
         "@com_google_googletest//:gtest_main",
     ],
diff --git a/tensorflow/lite/kernels/internal/common.h b/tensorflow/lite/kernels/internal/common.h
index 0c4fbc1e84e..5e4ba25b711 100644
--- a/tensorflow/lite/kernels/internal/common.h
+++ b/tensorflow/lite/kernels/internal/common.h
@@ -432,12 +432,23 @@ inline int32 GetReciprocal(int32 x, int x_integer_digits,
 inline void GetInvSqrtQuantizedMultiplierExp(int32 input, int reverse_shift,
                                              int32* output_inv_sqrt,
                                              int* output_shift) {
+  TFLITE_DCHECK_GE(input, 0);
+  if (input <= 1) {
+    // Handle the input value 1 separately to avoid overflow in that case
+    // in the general computation below (b/143972021). Also handle 0 as if it
+    // were a 1. 0 is an invalid input here (divide by zero) and 1 is a valid
+    // but rare/unrealistic input value. We can expect both to occur in some
+    // incompletely trained models, but probably not in fully trained models.
+    *output_inv_sqrt = std::numeric_limits<std::int32_t>::max();
+    *output_shift = 0;
+    return;
+  }
+  TFLITE_DCHECK_GT(input, 1);
   *output_shift = 11;
   while (input >= (1 << 29)) {
     input /= 4;
     ++*output_shift;
   }
-  TFLITE_DCHECK_GT(input, 0);
   const unsigned max_left_shift_bits =
       CountLeadingZeros(static_cast<uint32>(input)) - 1;
   const unsigned max_left_shift_bit_pairs = max_left_shift_bits / 2;
diff --git a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
index 9f827e988a4..ec4f664b9fe 100644
--- a/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
+++ b/tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h
@@ -7202,166 +7202,234 @@ struct KernelMacroBlock<DepthwiseConvImplementation::kUseNeon3x3DotProduct,
 #define DC_KERNEL_NO_MULT_24 "24"
 #define DC_KERNEL_NO_MULT_25 "25"
 #define DC_KERNEL_NO_MULT_26 "26"
+#define DC_KERNEL_NO_MULT_27 "27"
+#define DC_KERNEL_NO_MULT_28 "28"
+#define DC_KERNEL_NO_MULT_29 "29"
+#define DC_KERNEL_NO_MULT_30 "30"
+#define DC_KERNEL_NO_MULT_31 "31"
+#define DC_KERNEL_NO_MULT_32 "32"
+#define DC_KERNEL_NO_MULT_33 "33"
+#define DC_KERNEL_NO_MULT_34 "34"
+#define DC_KERNEL_NO_MULT_35 "35"
 
 #ifdef __linux__
     asm volatile(
-        // Compiled code used block of 288 for spill out of total stack of 448.
-        // However, two 4-byte spills were sneaked in to #360 and #364.
-        // Spillage increased to 304 and these are mapped to #288 and #292.
-        "sub    sp, sp, #304\n"  // =448
-        "ldp    w9, w14, [%[function_params], #" STR(DP_OFFSET_OUTPUT_WIDTH_OVERALL_MICRO_REPEATS) "]\n"
-        "ldpsw  x12, x21, [%[function_params], #" STR(DP_OFFSET_OUTPUT_HEIGHT_STRIDE) "]\n"
-        "ldrsw  x8, [%[function_params], #" STR(DP_OFFSET_INPUT_WIDTH_OVERALL_MICRO_REPEATS) "]\n"
-        "ldrsw  x16, [%[function_params]]\n"
-        "str    w9, [sp, #292]\n"  // 4-byte Folded Spill
-        "ldr    w9, [%[function_params], #" STR(DP_OFFSET_DEPTH_MICRO_REPEATS) "]\n"
-        "ldrb   w10, [%[function_params], #" STR(DP_OFFSET_QUANTIZED_ACTIVATION_MAX) "]\n"
-        "lsl    x8, x8, #5\n"
-        "str    x8, [sp, #8]\n"  // 8-byte Folded Spill
-        "str    w9, [sp, #20]\n"  // 4-byte Folded Spill
-        "ldr    w9, [%[function_params], #" STR(DP_OFFSET_OUTBOUND_BLOCK_HEIGHT) "]\n"
-        "add    x8, x12, x12, lsl #1\n"
-        "ldr    w5, [%[function_params], #" STR(DP_OFFSET_OUTPUT_RESIDUAL_WIDTH) "]\n"
-        "add    x11, %[function_params], #" STR(DP_OFFSET_OUTPUT_MULTIPLIER) "\n"  // =32
-        "str    w9, [sp, #288]\n"  // 4-byte Folded Spill
-        "ldrb   w9, [%[function_params], #" STR(DP_OFFSET_QUANTIZED_ACTIVATION_MIN) "]\n"
-        "add    x15, %[function_params], #" STR(DP_OFFSET_OUTPUT_SHIFT) "\n"  // =36
-        "add    x13, %[function_params], #" STR(DP_OFFSET_OUTPUT_OFFSET) "\n"  // =28
-        "ld1r   { v0.8h }, [x13]\n"
-        "dup    v3.16b, w9\n"
-        "dup    v5.8b, w9\n"
-        "add    x9, x16, x16, lsl #1\n"
-        "add    x7, x9, x8\n"
-        "add    x28, x9, x12, lsl #1\n"
-        "add    %[function_params], x9, x12\n"
-        "add    x9, %[output_block_data], x9\n"
-        "add    x13, x12, x16, lsl #1\n"
-        "str    x9, [sp, #112]\n"  // 8-byte Folded Spill
-        "add    x9, x8, x16, lsl #1\n"
-        "str    q3, [sp, #272]\n"  // 16-byte Folded Spill
-        "dup    v3.16b, w10\n"
-        "dup    v6.8b, w10\n"
-        "lsl    x10, x16, #1\n"
-        "add    x13, %[output_block_data], x13\n"
-        "add    x29, %[output_block_data], x9\n"
-        "add    x9, x21, x21, lsl #1\n"
-        "ld1r   { v1.4s }, [x11]\n"
-        "ld1r   { v2.4s }, [x15]\n"
-        "add    x15, x16, x12, lsl #1\n"
-        "add    x10, x10, x12, lsl #1\n"
-        "str    x13, [sp, #200]\n"  // 8-byte Folded Spill
-        "add    x13, x8, x16\n"
-        "add    x22, %[output_block_data], x8\n"
-        "add    x8, %[scratch_block_data], x21\n"
-        "str    x9, [sp, #96]\n"  // 8-byte Folded Spill
-        "add    x9, %[scratch_block_data], x9\n"
-        "add    x17, x12, x16\n"
-        "add    x15, %[output_block_data], x15\n"
-        "add    x25, x8, #32\n"  // =32
-        "add    x30, %[output_block_data], x10\n"
-        "add    x8, x21, x21, lsl #2\n"
-        "add    x10, x9, #32\n"  // =32
-        "lsl    x9, x21, #1\n"
-        "mov    x6, %[filter_workspace]\n"
-        "mov    %[filter_workspace], xzr\n"
-        "mov    w27, wzr\n"
-        "add    x11, %[scratch_block_data], x21, lsl #1\n"
-        "add    x23, %[scratch_block_data], x21, lsl #2\n"
-        "str    x15, [sp, #192]\n"  // 8-byte Folded Spill
-        "add    x15, %[output_block_data], x17\n"
-        "str    x8, [sp, #104]\n"  // 8-byte Folded Spill
-        "add    x8, %[scratch_block_data], x8\n"
-        "str    x9, [sp, #176]\n"  // 8-byte Folded Spill
-        "lsl    x9, x21, #2\n"
-        "mov    x19, xzr\n"
-        "str    x15, [sp, #184]\n"  // 8-byte Folded Spill
-        "add    x23, x23, #32\n"  // =32
-        "add    x24, x11, #32\n"  // =32
-        "add    x26, %[output_block_data], x7\n"
-        "mov    w7, wzr\n"
-        "add    x27, %[output_block_data], x28\n"
-        "add    x28, %[output_block_data], %[function_params]\n"
-        "add    x15, %[output_block_data], x13\n"
-        "mov    x13, xzr\n"
+        // Compiled code used block of 320 for spill out of total stack of 464.
+        "sub    sp, sp, #320\n"  // =464
+        "ldr    w8, [%[function_params], #" STR(DP_OFFSET_DEPTH_MICRO_REPEATS) "]\n"
+        "cmp    w8, #1\n"  // =1
+        "str    w8, [sp, #36]\n"  // 4-byte Folded Spill
+        "b.lt   " DC_KERNEL_NO_MULT_35 "f\n"
+        // %bb.1:
+        "ldr    w8, [%[function_params], #" STR(DP_OFFSET_OUTPUT_WIDTH_OVERALL_MICRO_REPEATS) "]\n"
+        "str    xzr, [sp, #64]\n"  // 8-byte Folded Spill
+        "str    wzr, [sp, #60]\n"  // 4-byte Folded Spill
+        "ldpsw  x21, x14, [%[function_params], #" STR(DP_OFFSET_OUTPUT_HEIGHT_STRIDE) "]\n"
+        "str    w8, [sp, #276]\n"  // 4-byte Folded Spill
+        "ldr    w8, [%[function_params], #" STR(DP_OFFSET_OUTPUT_WIDTH_MICRO_REPEATS) "]\n"
+        "ldrsw  x13, [%[function_params], #" STR(DP_OFFSET_INPUT_WIDTH_OVERALL_MICRO_REPEATS) "]\n"
+        "ldrb   w9, [%[function_params], #" STR(DP_OFFSET_QUANTIZED_ACTIVATION_MAX) "]\n"
+        "ldrsw  x5, [%[function_params]]\n"
+        "str    w8, [sp, #280]\n"  // 4-byte Folded Spill
+        "ldr    w8, [%[function_params], #" STR(DP_OFFSET_OUTPUT_RESIDUAL_WIDTH) "]\n"
+        "add    x11, %[function_params], #" STR(DP_OFFSET_OUTPUT_SHIFT) "\n"  // =36
+        "add    x12, %[function_params], #" STR(DP_OFFSET_OUTPUT_MULTIPLIER) "\n"  // =32
+        "add    x10, %[function_params], #" STR(DP_OFFSET_OUTPUT_OFFSET) "\n"  // =28
+        "str    w8, [sp, #284]\n"  // 4-byte Folded Spill
+        "ldrb   w8, [%[function_params], #" STR(DP_OFFSET_QUANTIZED_ACTIVATION_MIN) "]\n"
+        "ld1r   { v1.4s }, [x12]\n"
+        "ld1r   { v2.4s }, [x11]\n"
+        "lsl    x12, x14, #2\n"
+        "dup    v7.16b, w8\n"
+        "fmov   s5, w8\n"
+        "lsl    x8, x13, #5\n"
+        "add    x13, x14, x14, lsl #1\n"
+        "add    x11, x14, x14, lsl #2\n"
+        "mov    x26, %[output_block_data]\n"
+        "mov    %[output_block_data], %[filter_workspace]\n"
+        "ldr    w7, [%[function_params], #" STR(DP_OFFSET_OUTBOUND_BLOCK_HEIGHT) "]\n"
+        "ld1r   { v0.8h }, [x10]\n"
+        "dup    v16.16b, w9\n"
+        "fmov   s6, w9\n"
+        "lsl    x15, x14, #1\n"
+        "lsl    %[filter_workspace], x21, #1\n"
+        "add    x27, x21, x21, lsl #1\n"
+        "lsl    x9, x5, #1\n"
+        "add    x10, x21, x5\n"
+        "stp    x11, x12, [sp, #208]\n"  // 16-byte Folded Spill
+        "add    x11, x11, %[scratch_block_data]\n"
+        "add    x12, x12, %[scratch_block_data]\n"
+        "str    x13, [sp, #224]\n"  // 8-byte Folded Spill
+        "add    x13, x13, %[scratch_block_data]\n"
+        "str    x8, [sp, #24]\n"  // 8-byte Folded Spill
+        "stp    x15, x14, [sp, #256]\n"  // 16-byte Folded Spill
+        "add    x8, x14, %[scratch_block_data]\n"
+        "add    x14, x15, %[scratch_block_data]\n"
+        "add    x15, x9, x5\n"
+        "add    x16, x9, x27\n"
+        "add    x17, x9, %[filter_workspace]\n"
+        "add    x6, x9, x21\n"
+        "add    %[function_params], x26, x9\n"
+        "add    x9, x26, x10\n"
+        "add    x10, x11, #32\n"  // =32
+        "add    x11, x12, #32\n"  // =32
+        "add    x12, x13, #32\n"  // =32
+        "str    x12, [sp, #312]\n"  // 8-byte Folded Spill
+        "add    x12, x14, #32\n"  // =32
+        "str    x12, [sp, #304]\n"  // 8-byte Folded Spill
+        "add    x12, x15, x27\n"
+        "add    x13, x15, %[filter_workspace]\n"
+        "add    x23, x15, x21\n"
+        "add    x14, x26, x15\n"
+        "add    x15, x27, x5\n"
+        "add    x20, x26, x17\n"
+        "mov    w17, w7\n"
+        "add    x19, x26, x15\n"
+        "add    x15, %[filter_workspace], x5\n"
+        "mov    x22, xzr\n"
+        "str    x14, [sp, #296]\n"  // 8-byte Folded Spill
+        "add    x14, x26, x16\n"
+        "add    x7, x26, x6\n"
+        "add    x16, x26, x15\n"
+        "add    x15, x26, x13\n"
+        "add    x6, x26, x23\n"
+        "and    w13, w17, #0xfffffffe\n"
+        "lsl    x23, x5, #2\n"
+        "dup    v17.8b, v5.b[0]\n"
+        "dup    v14.8b, v6.b[0]\n"
         "add    x8, x8, #32\n"  // =32
-        "stp    x12, %[scratch_block_data], [sp, #120]\n"  // 16-byte Folded Spill
-        "add    x11, %[scratch_block_data], #32\n"  // =32
-        "mov    %[filter_workspace], x21\n"
-        "str    x9, [sp, #88]\n"  // 8-byte Folded Spill
-        "lsl    %[function_params], x16, #2\n"
-        "add    x9, %[output_block_data], x16, lsl #1\n"
-        "add    x21, %[output_block_data], x16\n"
-        "add    x17, %[output_block_data], x12, lsl #1\n"
-        "add    x12, %[output_block_data], x12\n"
-        "str    q3, [sp, #256]\n"  // 16-byte Folded Spill
-        "str    %[output_block_data], [sp, #64]\n"  // 8-byte Folded Spill
-        "str    %[output_block_data], [sp, #136]\n"  // 8-byte Folded Spill
-        "stp    d6, d5, [sp, #72]\n"  // 16-byte Folded Spill
-        "b      " DC_KERNEL_NO_MULT_26 "f\n"
-        DC_KERNEL_NO_MULT_1 ":\
-"  // in Loop: Header=BB225_26 Depth=1
-        "str    w7, [sp, #36]\n"  // 4-byte Folded Spill
-        "ldr    w0, [sp, #288]\n"  // 4-byte Folded Reload
-        "ldp    q18, q7, [x6]\n"
-        "ldp    q19, q16, [x6, #32]\n"
-        "ldp    q20, q17, [x6, #64]\n"
-        "cmp    w0, #4\n"  // =4
-        "add    x6, x6, #96\n"  // =96
-        "stp    x19, %[bias_data], [sp, #48]\n"  // 16-byte Folded Spill
-        "str    x13, [sp, #40]\n"  // 8-byte Folded Spill
-        "str    x6, [sp, #24]\n"  // 8-byte Folded Spill
-        "b.ne   " DC_KERNEL_NO_MULT_14 "f\n"
-        // %bb.2:        // in Loop: Header=BB225_26 Depth=1
-        "mov    %[scratch_block_data], xzr\n"
-        "mov    %[output_block_data], x13\n"
-        "str    %[bias_data], [sp, #168]\n"  // 8-byte Folded Spill
-        "b      " DC_KERNEL_NO_MULT_13 "f\n"
-        DC_KERNEL_NO_MULT_3 ":\n"  // in Loop: Header=BB225_13 Depth=2
-        "ldr    x13, [sp, #128]\n"  // 8-byte Folded Reload
-        "str    %[scratch_block_data], [sp, #160]\n"  // 8-byte Folded Spill
-        "ldr    x6, [sp, #136]\n"  // 8-byte Folded Reload
-        "shl    v3.4s, v18.4s, #8\n"
-        "add    x13, x13, %[scratch_block_data], lsl #4\n"
-        "ldr    %[scratch_block_data], [sp, #168]\n"  // 8-byte Folded Reload
-        "ldr    q14, [x13]\n"
-        "ldr    q23, [x13, %[filter_workspace]]\n"
-        "str    q3, [sp, #240]\n"  // 16-byte Folded Spill
-        "ldr    q21, [%[scratch_block_data]]\n"
-        "ldr    %[scratch_block_data], [sp, #176]\n"  // 8-byte Folded Reload
-        "shl    v3.4s, v19.4s, #8\n"
-        "mov    w2, wzr\n"
+        "str    x14, [sp, #288]\n"  // 8-byte Folded Spill
+        "add    x14, x26, x12\n"
+        "mov    x12, xzr\n"
+        "str    w13, [sp, #12]\n"  // 4-byte Folded Spill
+        "mov    x13, x16\n"
+        "stp    x26, x23, [sp, #80]\n"  // 16-byte Folded Spill
+        "add    x23, x26, x21\n"
+        "add    x22, x26, x5\n"
+        "mov    x28, %[filter_workspace]\n"
+        "add    %[filter_workspace], x26, x1\n"
+        "add    x25, x26, x27\n"
+        "str    %[scratch_block_data], [sp, #184]\n"  // 8-byte Folded Spill
+        "str    x21, [sp, #136]\n"  // 8-byte Folded Spill
+        "str    w17, [sp, #76]\n"  // 4-byte Folded Spill
+        "str    x26, [sp, #16]\n"  // 8-byte Folded Spill
+        "stp    d14, d17, [sp, #96]\n"  // 16-byte Folded Spill
+        "stp    x6, x23, [sp, #240]\n"  // 16-byte Folded Spill
+        "b      " DC_KERNEL_NO_MULT_4 "f\n"
+        DC_KERNEL_NO_MULT_2 ":\n"  // in Loop: Header=BB225_4 Depth=1
+        "mov    %[bias_data], x16\n"
+        DC_KERNEL_NO_MULT_3 ":\n"  // in Loop: Header=BB225_4 Depth=1
+        "ldr    %[output_block_data], [sp, #24]\n"  // 8-byte Folded Reload
+        "ldr    x12, [sp, #184]\n"  // 8-byte Folded Reload
+        "ldr    w17, [sp, #60]\n"  // 4-byte Folded Reload
+        "add    x12, x12, %[output_block_data]\n"
+        "str    x12, [sp, #184]\n"  // 8-byte Folded Spill
+        "ldr    x12, [sp, #80]\n"  // 8-byte Folded Reload
+        "add    w17, w17, #1\n"  // =1
+        "str    w17, [sp, #60]\n"  // 4-byte Folded Spill
+        "add    x12, x12, #8\n"  // =8
+        "str    x12, [sp, #80]\n"  // 8-byte Folded Spill
+        "ldr    x12, [sp, #64]\n"  // 8-byte Folded Reload
+        "add    x12, x12, %[output_block_data]\n"
+        "str    x12, [sp, #64]\n"  // 8-byte Folded Spill
+        "ldr    w12, [sp, #36]\n"  // 4-byte Folded Reload
+        "cmp    w17, w12\n"
+        "ldp    x12, %[output_block_data], [sp, #40]\n"  // 16-byte Folded Reload
+        "ldr    w17, [sp, #76]\n"  // 4-byte Folded Reload
+        "add    x12, x12, #8\n"  // =8
+        "b.eq   " DC_KERNEL_NO_MULT_35 "f\n"
+        DC_KERNEL_NO_MULT_4 ":\n"  // =>This Loop Header: Depth=1
+        // Child Loop BB225_31 Depth 2
+        // Child Loop BB225_34 Depth 2
+        // Child Loop BB225_20 Depth 2
+        // Child Loop BB225_23 Depth 3
+        // Child Loop BB225_27 Depth 4
+        // Child Loop BB225_7 Depth 2
+        // Child Loop BB225_9 Depth 3
+        // Child Loop BB225_15 Depth 3
+        "ldp    q18, q15, [%[output_block_data]]\n"
+        "ldp    q19, q5, [%[output_block_data], #32]\n"
+        "ldp    q20, q6, [%[output_block_data], #64]\n"
+        "cmp    w17, #4\n"  // =4
+        "add    %[output_block_data], x3, #96\n"  // =96
+        "stp    x12, %[output_block_data], [sp, #40]\n"  // 16-byte Folded Spill
+        "b.ne   " DC_KERNEL_NO_MULT_16 "f\n"
+        // %bb.5:        // in Loop: Header=BB225_4 Depth=1
+        "mov    x24, x12\n"
+        "ldr    x12, [sp, #64]\n"  // 8-byte Folded Reload
+        "mov    x16, xzr\n"
+        "stp    q6, q5, [sp, #144]\n"  // 32-byte Folded Spill
+        "str    q15, [sp, #112]\n"  // 16-byte Folded Spill
+        "str    x12, [sp, #232]\n"  // 8-byte Folded Spill
+        "b      " DC_KERNEL_NO_MULT_7 "f\n"
+        DC_KERNEL_NO_MULT_6 ":\n"  // in Loop: Header=BB225_7 Depth=2
+        "ldr    x12, [sp, #232]\n"  // 8-byte Folded Reload
+        "ldp    q20, q19, [sp, #144]\n"  // 32-byte Folded Reload
+        "add    x16, x16, #1\n"  // =1
+        "cmp    x16, #2\n"  // =2
+        "add    x12, x12, #16\n"  // =16
+        "add    x24, x24, #4\n"  // =4
+        "mov    v18.16b, v15.16b\n"
+        "str    x12, [sp, #232]\n"  // 8-byte Folded Spill
+        "b.eq   " DC_KERNEL_NO_MULT_3 "b\n"
+        DC_KERNEL_NO_MULT_7 ":\n"  // Parent Loop BB225_4 Depth=1
+        // =>  This Loop Header: Depth=2
+        // Child Loop BB225_9 Depth 3
+        // Child Loop BB225_15 Depth 3
+        "ldr    x12, [sp, #184]\n"  // 8-byte Folded Reload
+        "ldr    q21, [%[bias_data]], #16\n"
+        "add    %[output_block_data], x12, x16, lsl #4\n"
+        "ldr    w12, [sp, #280]\n"  // 4-byte Folded Reload
+        "ldr    q22, [%[output_block_data]]\n"
         "mov    v31.16b, v21.16b\n"
-        "ldr    q24, [x13, %[scratch_block_data]]\n"
-        "ldr    %[scratch_block_data], [sp, #96]\n"  // 8-byte Folded Reload
         "mov    v8.16b, v21.16b\n"
+        "cmp    w12, #1\n"  // =1
+        "ldr    x12, [sp, #264]\n"  // 8-byte Folded Reload
         "mov    v9.16b, v21.16b\n"
         "mov    v10.16b, v21.16b\n"
-        "ldr    q25, [x13, %[scratch_block_data]]\n"
-        "ldr    %[scratch_block_data], [sp, #88]\n"  // 8-byte Folded Reload
-        "str    q3, [sp, #224]\n"  // 16-byte Folded Spill
-        "shl    v3.4s, v20.4s, #8\n"
-        ".word 0x4e98969f  // sdot   v31.4s, v20.16b, v24.16b\n"
-        "ldr    q26, [x13, %[scratch_block_data]]\n"
-        "ldp    %[scratch_block_data], x7, [sp, #104]\n"  // 16-byte Folded Reload
-        ".word 0x4e989668  // sdot   v8.4s, v19.16b, v24.16b\n"
-        ".word 0x4e989649  // sdot   v9.4s, v18.16b, v24.16b\n"
+        "ldr    q27, [%[output_block_data], x12]\n"
+        "ldr    x12, [sp, #256]\n"  // 8-byte Folded Reload
+        "ldr    q26, [%[output_block_data], x12]\n"
+        "ldr    x12, [sp, #224]\n"  // 8-byte Folded Reload
+        ".word 0x4e9a969f  // sdot   v31.4s, v20.16b, v26.16b\n"
+        "ldr    q25, [%[output_block_data], x12]\n"
+        "ldr    x12, [sp, #216]\n"  // 8-byte Folded Reload
+        ".word 0x4e9a9668  // sdot   v8.4s, v19.16b, v26.16b\n"
+        ".word 0x4e9a9649  // sdot   v9.4s, v18.16b, v26.16b\n"
         ".word 0x4e99964a  // sdot   v10.4s, v18.16b, v25.16b\n"
-        "ldr    q27, [x13, %[scratch_block_data]]\n"
-        "mov    x13, x19\n"
-        "mov    %[scratch_block_data], %[output_block_data]\n"
-        "str    q3, [sp, #208]\n"  // 16-byte Folded Spill
-        "stp    %[output_block_data], x19, [sp, #144]\n"  // 16-byte Folded Spill
-        "b      " DC_KERNEL_NO_MULT_5 "f\n"
-        DC_KERNEL_NO_MULT_4 ":\n"  // in Loop: Header=BB225_5 Depth=3
+        "ldr    q24, [%[output_block_data], x12]\n"
+        "ldr    x12, [sp, #208]\n"  // 8-byte Folded Reload
+        "ldr    q23, [%[output_block_data], x12]\n"
+        "b.lt   " DC_KERNEL_NO_MULT_11 "f\n"
+        // %bb.8:        // in Loop: Header=BB225_7 Depth=2
+        "stp    x24, x16, [sp, #192]\n"  // 16-byte Folded Spill
+        "ldr    w12, [sp, #280]\n"  // 4-byte Folded Reload
+        "mov    x17, x24\n"
+        "ldr    x21, [sp, #232]\n"  // 8-byte Folded Reload
+        "mov    x24, x25\n"
+        "mov    x25, %[filter_workspace]\n"
+        "mov    %[filter_workspace], x22\n"
+        "mov    x22, x23\n"
+        "ldr    x23, [sp, #88]\n"  // 8-byte Folded Reload
+        "shl    v28.4s, v18.4s, #8\n"
+        "shl    v29.4s, v19.4s, #8\n"
+        "shl    v30.4s, v20.4s, #8\n"
+        "mov    v11.16b, v23.16b\n"
+        "mov    v12.16b, v24.16b\n"
+        "mov    v13.16b, v27.16b\n"
+        "mov    v14.16b, v22.16b\n"
+        DC_KERNEL_NO_MULT_9 ":\n"  // Parent Loop BB225_4 Depth=1
+        // Parent Loop BB225_7 Depth=2
+        // =>  This Inner Loop Header: Depth=3
         ".word 0x4e8e965f  // sdot   v31.4s, v18.16b, v14.16b\n"
-        ".word 0x4e979648  // sdot   v8.4s, v18.16b, v23.16b\n"
+        ".word 0x4e8d9648  // sdot   v8.4s, v18.16b, v13.16b\n"
         ".word 0x4e999669  // sdot   v9.4s, v19.16b, v25.16b\n"
-        ".word 0x4e97967f  // sdot   v31.4s, v19.16b, v23.16b\n"
-        ".word 0x4e9a966a  // sdot   v10.4s, v19.16b, v26.16b\n"
+        ".word 0x4e8d967f  // sdot   v31.4s, v19.16b, v13.16b\n"
+        ".word 0x4e8c966a  // sdot   v10.4s, v19.16b, v12.16b\n"
         ".word 0x4e999688  // sdot   v8.4s, v20.16b, v25.16b\n"
-        ".word 0x4e9a9689  // sdot   v9.4s, v20.16b, v26.16b\n"
+        ".word 0x4e8c9689  // sdot   v9.4s, v20.16b, v12.16b\n"
         "sqrdmulh        v31.4s, v31.4s, v1.4s\n"
-        ".word 0x4e9b968a  // sdot   v10.4s, v20.16b, v27.16b\n"
+        ".word 0x4e8b968a  // sdot   v10.4s, v20.16b, v11.16b\n"
         "sqrdmulh        v8.4s, v8.4s, v1.4s\n"
         "sqrdmulh        v9.4s, v9.4s, v1.4s\n"
         "sqrshl v31.4s, v31.4s, v2.4s\n"
@@ -7377,437 +7445,469 @@ struct KernelMacroBlock<DepthwiseConvImplementation::kUseNeon3x3DotProduct,
         "sqadd  v8.8h, v9.8h, v0.8h\n"
         "sqxtun v31.8b, v31.8h\n"
         "sqxtun2        v31.16b, v8.8h\n"
-        "ldp    q28, q6, [sp, #256]\n"  // 32-byte Folded Reload
-        "add    %[output_block_data], x12, %[scratch_block_data]\n"
-        "ldr    q5, [sp, #208]\n"  // 16-byte Folded Reload
-        "mov    v8.16b, v21.16b\n"
-        "umax   v31.16b, v31.16b, v6.16b\n"
-        "umin   v31.16b, v31.16b, v28.16b\n"
-        "str    s31, [x6, %[scratch_block_data]]\n"
+        "umax   v31.16b, v31.16b, v7.16b\n"
+        "add    %[output_block_data], x22, x17\n"
+        "umin   v31.16b, v31.16b, v16.16b\n"
+        "str    s31, [x26, x17]\n"
         "st1    { v31.s }[1], [%[output_block_data]]\n"
-        "add    %[output_block_data], x17, %[scratch_block_data]\n"
+        "add    %[output_block_data], x25, x17\n"
         "st1    { v31.s }[2], [%[output_block_data]]\n"
-        "add    %[output_block_data], x22, %[scratch_block_data]\n"
+        "add    %[output_block_data], x24, x17\n"
+        "mov    v10.16b, v21.16b\n"
         "st1    { v31.s }[3], [%[output_block_data]]\n"
-        "ldp    q30, q29, [sp, #224]\n"  // 32-byte Folded Reload
-        "mov    v9.16b, v21.16b\n"
-        "mov    v10.16b, v21.16b\n"
-        "mov    v11.16b, v21.16b\n"
-        ".word 0x4e8e97a8  // sdot   v8.4s, v29.16b, v14.16b\n"
-        ".word 0x4e9797a9  // sdot   v9.4s, v29.16b, v23.16b\n"
-        ".word 0x4e9897aa  // sdot   v10.4s, v29.16b, v24.16b\n"
-        ".word 0x4e9797c8  // sdot   v8.4s, v30.16b, v23.16b\n"
-        ".word 0x4e9997ab  // sdot   v11.4s, v29.16b, v25.16b\n"
-        ".word 0x4e9897c9  // sdot   v9.4s, v30.16b, v24.16b\n"
-        ".word 0x4e9997ca  // sdot   v10.4s, v30.16b, v25.16b\n"
-        ".word 0x4e9894a8  // sdot   v8.4s, v5.16b, v24.16b\n"
-        ".word 0x4e9a97cb  // sdot   v11.4s, v30.16b, v26.16b\n"
-        ".word 0x4e9994a9  // sdot   v9.4s, v5.16b, v25.16b\n"
-        ".word 0x4e9a94aa  // sdot   v10.4s, v5.16b, v26.16b\n"
-        "sqrdmulh        v22.4s, v8.4s, v1.4s\n"
-        "rev32  v12.8h, v23.8h\n"
-        "rev32  v13.8h, v24.8h\n"
-        ".word 0x4e9b94ab  // sdot   v11.4s, v5.16b, v27.16b\n"
-        "sqrdmulh        v23.4s, v9.4s, v1.4s\n"
-        "sqrdmulh        v24.4s, v10.4s, v1.4s\n"
-        "sqrshl v22.4s, v22.4s, v2.4s\n"
-        "rev32  v4.8h, v25.8h\n"
-        "sqrdmulh        v25.4s, v11.4s, v1.4s\n"
-        "sqrshl v8.4s, v23.4s, v2.4s\n"
-        "sqrshl v23.4s, v24.4s, v2.4s\n"
-        "sqxtn  v10.4h, v22.4s\n"
-        "ldr    %[output_block_data], [sp, #184]\n"  // 8-byte Folded Reload
-        "rev32  v15.8h, v26.8h\n"
-        "rev32  v3.8h, v27.8h\n"
-        "sqrshl v9.4s, v25.4s, v2.4s\n"
-        "sqxtn  v11.4h, v23.4s\n"
-        "ldr    q22, [x11, x13]\n"
-        "ldr    q23, [x25, x13]\n"
-        "ldr    q24, [x24, x13]\n"
-        "ldr    q25, [x10, x13]\n"
-        "ldr    q26, [x23, x13]\n"
-        "ldr    q27, [x8, x13]\n"
-        "sqxtn2 v10.8h, v8.4s\n"
-        "sqxtn2 v11.8h, v9.4s\n"
-        "sqadd  v8.8h, v10.8h, v0.8h\n"
-        "sqadd  v9.8h, v11.8h, v0.8h\n"
-        "sqxtun v8.8b, v8.8h\n"
-        "sqxtun2        v8.16b, v9.8h\n"
-        "umax   v8.16b, v8.16b, v6.16b\n"
-        "add    %[output_block_data], x3, %[scratch_block_data]\n"
-        "umin   v8.16b, v8.16b, v28.16b\n"
-        "str    s8, [x21, %[scratch_block_data]]\n"
-        "st1    { v8.s }[1], [%[output_block_data]]\n"
-        "ldr    %[output_block_data], [sp, #192]\n"  // 8-byte Folded Reload
-        "rev32  v31.8h, v14.8h\n"
-        "mov    v9.16b, v21.16b\n"
-        "trn1   v31.8h, v31.8h, v22.8h\n"
-        "add    %[output_block_data], x3, %[scratch_block_data]\n"
-        "st1    { v8.s }[2], [%[output_block_data]]\n"
-        "add    %[output_block_data], x15, %[scratch_block_data]\n"
-        "mov    v10.16b, v21.16b\n"
-        "mov    v11.16b, v21.16b\n"
-        "trn1   v12.8h, v12.8h, v23.8h\n"
-        "trn1   v13.8h, v13.8h, v24.8h\n"
-        ".word 0x4e9f9649  // sdot   v9.4s, v18.16b, v31.16b\n"
-        "st1    { v8.s }[3], [%[output_block_data]]\n"
+        "mov    v31.16b, v21.16b\n"
         "mov    v8.16b, v21.16b\n"
-        "trn1   v14.8h, v4.8h, v25.8h\n"
-        ".word 0x4e8c964a  // sdot   v10.4s, v18.16b, v12.16b\n"
-        ".word 0x4e8d964b  // sdot   v11.4s, v18.16b, v13.16b\n"
-        ".word 0x4e8c9669  // sdot   v9.4s, v19.16b, v12.16b\n"
-        "trn1   v15.8h, v15.8h, v26.8h\n"
-        ".word 0x4e8e9648  // sdot   v8.4s, v18.16b, v14.16b\n"
-        ".word 0x4e8d966a  // sdot   v10.4s, v19.16b, v13.16b\n"
-        ".word 0x4e8e966b  // sdot   v11.4s, v19.16b, v14.16b\n"
-        ".word 0x4e8d9689  // sdot   v9.4s, v20.16b, v13.16b\n"
+        ".word 0x4e99978a  // sdot   v10.4s, v28.16b, v25.16b\n"
+        "mov    x16, x26\n"
+        "ldr    x26, [sp, #304]\n"  // 8-byte Folded Reload
+        ".word 0x4e8e979f  // sdot   v31.4s, v28.16b, v14.16b\n"
+        ".word 0x4e8d9788  // sdot   v8.4s, v28.16b, v13.16b\n"
+        ".word 0x4e8c97aa  // sdot   v10.4s, v29.16b, v12.16b\n"
+        "mov    v9.16b, v21.16b\n"
+        ".word 0x4e8d97bf  // sdot   v31.4s, v29.16b, v13.16b\n"
+        ".word 0x4e9a97a8  // sdot   v8.4s, v29.16b, v26.16b\n"
+        ".word 0x4e8b97ca  // sdot   v10.4s, v30.16b, v11.16b\n"
+        "rev32  v4.8h, v26.8h\n"
+        ".word 0x4e9a9789  // sdot   v9.4s, v28.16b, v26.16b\n"
+        ".word 0x4e9a97df  // sdot   v31.4s, v30.16b, v26.16b\n"
+        ".word 0x4e9997c8  // sdot   v8.4s, v30.16b, v25.16b\n"
+        "sqrdmulh        v26.4s, v10.4s, v1.4s\n"
+        "rev32  v6.8h, v24.8h\n"
+        ".word 0x4e9997a9  // sdot   v9.4s, v29.16b, v25.16b\n"
+        "sqrdmulh        v24.4s, v8.4s, v1.4s\n"
+        "sqrshl v8.4s, v26.4s, v2.4s\n"
+        "ldr    q26, [x26, x21]\n"
+        "ldr    x26, [sp, #312]\n"  // 8-byte Folded Reload
+        "mov    v17.16b, v16.16b\n"
+        "mov    v16.16b, v7.16b\n"
+        "rev32  v7.8h, v23.8h\n"
+        ".word 0x4e8c97c9  // sdot   v9.4s, v30.16b, v12.16b\n"
+        "sqrdmulh        v23.4s, v31.4s, v1.4s\n"
+        "rev32  v5.8h, v25.8h\n"
+        "sqrdmulh        v25.4s, v9.4s, v1.4s\n"
+        "sqrshl v23.4s, v23.4s, v2.4s\n"
+        "add    %[output_block_data], %[scratch_block_data], x21\n"
+        "sqrshl v31.4s, v24.4s, v2.4s\n"
+        "sqrshl v24.4s, v25.4s, v2.4s\n"
+        "sqxtn  v9.4h, v23.4s\n"
+        "rev32  v15.8h, v22.8h\n"
+        "ldr    q22, [%[output_block_data], #32]\n"
+        "rev32  v3.8h, v27.8h\n"
+        "sqxtn  v10.4h, v24.4s\n"
+        "ldr    q27, [x8, x21]\n"
+        "ldr    q25, [x26, x21]\n"
+        "ldr    q24, [x11, x21]\n"
+        "ldr    q23, [x10, x21]\n"
+        "sqxtn2 v9.8h, v31.4s\n"
+        "sqxtn2 v10.8h, v8.4s\n"
+        "sqadd  v31.8h, v9.8h, v0.8h\n"
+        "sqadd  v8.8h, v10.8h, v0.8h\n"
+        "sqxtun v31.8b, v31.8h\n"
+        "sqxtun2        v31.16b, v8.8h\n"
+        "umax   v31.16b, v31.16b, v16.16b\n"
+        "add    %[output_block_data], x9, x17\n"
+        "umin   v31.16b, v31.16b, v17.16b\n"
+        "str    s31, [%[filter_workspace], x17]\n"
+        "st1    { v31.s }[1], [%[output_block_data]]\n"
+        "add    %[output_block_data], x13, x17\n"
+        "st1    { v31.s }[2], [%[output_block_data]]\n"
+        "add    %[output_block_data], x19, x17\n"
+        "mov    v8.16b, v21.16b\n"
+        "st1    { v31.s }[3], [%[output_block_data]]\n"
+        "trn1   v31.8h, v15.8h, v22.8h\n"
+        "mov    v9.16b, v21.16b\n"
+        "mov    v10.16b, v21.16b\n"
         "trn1   v3.8h, v3.8h, v27.8h\n"
-        ".word 0x4e8f9668  // sdot   v8.4s, v19.16b, v15.16b\n"
-        ".word 0x4e8e968a  // sdot   v10.4s, v20.16b, v14.16b\n"
-        ".word 0x4e8f968b  // sdot   v11.4s, v20.16b, v15.16b\n"
+        "trn1   v4.8h, v4.8h, v26.8h\n"
+        ".word 0x4e9f9648  // sdot   v8.4s, v18.16b, v31.16b\n"
+        "mov    v11.16b, v21.16b\n"
+        "trn1   v5.8h, v5.8h, v25.8h\n"
+        ".word 0x4e839649  // sdot   v9.4s, v18.16b, v3.16b\n"
+        ".word 0x4e84964a  // sdot   v10.4s, v18.16b, v4.16b\n"
+        ".word 0x4e839668  // sdot   v8.4s, v19.16b, v3.16b\n"
+        "trn1   v6.8h, v6.8h, v24.8h\n"
+        ".word 0x4e85964b  // sdot   v11.4s, v18.16b, v5.16b\n"
+        ".word 0x4e849669  // sdot   v9.4s, v19.16b, v4.16b\n"
+        ".word 0x4e85966a  // sdot   v10.4s, v19.16b, v5.16b\n"
+        ".word 0x4e849688  // sdot   v8.4s, v20.16b, v4.16b\n"
+        "trn1   v7.8h, v7.8h, v23.8h\n"
+        ".word 0x4e86966b  // sdot   v11.4s, v19.16b, v6.16b\n"
+        ".word 0x4e859689  // sdot   v9.4s, v20.16b, v5.16b\n"
+        ".word 0x4e86968a  // sdot   v10.4s, v20.16b, v6.16b\n"
+        "sqrdmulh        v8.4s, v8.4s, v1.4s\n"
+        ".word 0x4e87968b  // sdot   v11.4s, v20.16b, v7.16b\n"
         "sqrdmulh        v9.4s, v9.4s, v1.4s\n"
-        ".word 0x4e839688  // sdot   v8.4s, v20.16b, v3.16b\n"
         "sqrdmulh        v10.4s, v10.4s, v1.4s\n"
+        "sqrshl v8.4s, v8.4s, v2.4s\n"
         "sqrdmulh        v11.4s, v11.4s, v1.4s\n"
         "sqrshl v9.4s, v9.4s, v2.4s\n"
-        "sqrdmulh        v8.4s, v8.4s, v1.4s\n"
         "sqrshl v10.4s, v10.4s, v2.4s\n"
+        "sqxtn  v8.4h, v8.4s\n"
         "sqrshl v11.4s, v11.4s, v2.4s\n"
-        "sqxtn  v9.4h, v9.4s\n"
-        "ldr    %[output_block_data], [sp, #200]\n"  // 8-byte Folded Reload
-        "sqrshl v8.4s, v8.4s, v2.4s\n"
-        "sqxtn  v11.4h, v11.4s\n"
-        "sqxtn2 v9.8h, v10.4s\n"
-        "sqxtn2 v11.8h, v8.4s\n"
-        "sqadd  v8.8h, v9.8h, v0.8h\n"
-        "sqadd  v9.8h, v11.8h, v0.8h\n"
+        "sqxtn  v10.4h, v10.4s\n"
+        "sqxtn2 v8.8h, v9.4s\n"
+        "sqxtn2 v10.8h, v11.4s\n"
+        "sqadd  v8.8h, v8.8h, v0.8h\n"
+        "sqadd  v9.8h, v10.8h, v0.8h\n"
         "sqxtun v8.8b, v8.8h\n"
         "sqxtun2        v8.16b, v9.8h\n"
-        "umax   v8.16b, v8.16b, v6.16b\n"
-        "add    %[output_block_data], x3, %[scratch_block_data]\n"
-        "umin   v8.16b, v8.16b, v28.16b\n"
-        "str    s8, [x9, %[scratch_block_data]]\n"
-        "st1    { v8.s }[1], [%[output_block_data]]\n"
-        "add    %[output_block_data], x30, %[scratch_block_data]\n"
-        "st1    { v8.s }[2], [%[output_block_data]]\n"
-        "add    %[output_block_data], x29, %[scratch_block_data]\n"
         "mov    v9.16b, v21.16b\n"
         "mov    v10.16b, v21.16b\n"
         "mov    v11.16b, v21.16b\n"
-        "st1    { v8.s }[3], [%[output_block_data]]\n"
-        "mov    v8.16b, v21.16b\n"
-        ".word 0x4e9f97a9  // sdot   v9.4s, v29.16b, v31.16b\n"
-        ".word 0x4e8c97aa  // sdot   v10.4s, v29.16b, v12.16b\n"
-        ".word 0x4e8d97ab  // sdot   v11.4s, v29.16b, v13.16b\n"
-        ".word 0x4e8e97a8  // sdot   v8.4s, v29.16b, v14.16b\n"
-        ".word 0x4e8c97c9  // sdot   v9.4s, v30.16b, v12.16b\n"
-        ".word 0x4e8d97ca  // sdot   v10.4s, v30.16b, v13.16b\n"
-        ".word 0x4e8e97cb  // sdot   v11.4s, v30.16b, v14.16b\n"
-        ".word 0x4e8f97c8  // sdot   v8.4s, v30.16b, v15.16b\n"
-        ".word 0x4e8d94a9  // sdot   v9.4s, v5.16b, v13.16b\n"
-        ".word 0x4e8e94aa  // sdot   v10.4s, v5.16b, v14.16b\n"
-        ".word 0x4e8f94ab  // sdot   v11.4s, v5.16b, v15.16b\n"
-        ".word 0x4e8394a8  // sdot   v8.4s, v5.16b, v3.16b\n"
+        ".word 0x4e9f9789  // sdot   v9.4s, v28.16b, v31.16b\n"
+        "mov    x26, x16\n"
+        "ldr    x16, [sp, #288]\n"  // 8-byte Folded Reload
+        "mov    v12.16b, v21.16b\n"
+        ".word 0x4e83978a  // sdot   v10.4s, v28.16b, v3.16b\n"
+        ".word 0x4e84978b  // sdot   v11.4s, v28.16b, v4.16b\n"
+        ".word 0x4e8397a9  // sdot   v9.4s, v29.16b, v3.16b\n"
+        "umax   v8.16b, v8.16b, v16.16b\n"
+        ".word 0x4e85978c  // sdot   v12.4s, v28.16b, v5.16b\n"
+        ".word 0x4e8497aa  // sdot   v10.4s, v29.16b, v4.16b\n"
+        ".word 0x4e8597ab  // sdot   v11.4s, v29.16b, v5.16b\n"
+        ".word 0x4e8497c9  // sdot   v9.4s, v30.16b, v4.16b\n"
+        "add    %[output_block_data], x7, x17\n"
+        "umin   v8.16b, v8.16b, v17.16b\n"
+        ".word 0x4e8697ac  // sdot   v12.4s, v29.16b, v6.16b\n"
+        ".word 0x4e8597ca  // sdot   v10.4s, v30.16b, v5.16b\n"
+        ".word 0x4e8697cb  // sdot   v11.4s, v30.16b, v6.16b\n"
         "sqrdmulh        v3.4s, v9.4s, v1.4s\n"
-        "sqrdmulh        v31.4s, v10.4s, v1.4s\n"
-        "sqrdmulh        v9.4s, v11.4s, v1.4s\n"
+        "str    s8, [%[function_params], x17]\n"
+        "st1    { v8.s }[1], [%[output_block_data]]\n"
+        "add    %[output_block_data], x20, x17\n"
+        ".word 0x4e8797cc  // sdot   v12.4s, v30.16b, v7.16b\n"
+        "sqrdmulh        v4.4s, v10.4s, v1.4s\n"
+        "sqrdmulh        v5.4s, v11.4s, v1.4s\n"
         "sqrshl v3.4s, v3.4s, v2.4s\n"
-        "sqrdmulh        v8.4s, v8.4s, v1.4s\n"
-        "sqrshl v31.4s, v31.4s, v2.4s\n"
-        "sqrshl v9.4s, v9.4s, v2.4s\n"
+        "st1    { v8.s }[2], [%[output_block_data]]\n"
+        "add    %[output_block_data], x16, x17\n"
+        "sqrdmulh        v6.4s, v12.4s, v1.4s\n"
+        "sqrshl v4.4s, v4.4s, v2.4s\n"
+        "sqrshl v5.4s, v5.4s, v2.4s\n"
         "sqxtn  v3.4h, v3.4s\n"
-        "sqrshl v8.4s, v8.4s, v2.4s\n"
-        "sqxtn  v9.4h, v9.4s\n"
-        "sqxtn2 v3.8h, v31.4s\n"
-        "sqxtn2 v9.8h, v8.4s\n"
+        "st1    { v8.s }[3], [%[output_block_data]]\n"
+        "sqrshl v6.4s, v6.4s, v2.4s\n"
+        "sqxtn  v5.4h, v5.4s\n"
+        "sqxtn2 v3.8h, v4.4s\n"
+        "sqxtn2 v5.8h, v6.4s\n"
         "sqadd  v3.8h, v3.8h, v0.8h\n"
-        "sqadd  v31.8h, v9.8h, v0.8h\n"
+        "sqadd  v4.8h, v5.8h, v0.8h\n"
         "sqxtun v3.8b, v3.8h\n"
-        "sqxtun2        v3.16b, v31.8h\n"
-        "umax   v3.16b, v3.16b, v6.16b\n"
-        "add    %[output_block_data], x28, %[scratch_block_data]\n"
-        "umin   v3.16b, v3.16b, v28.16b\n"
-        "str    s3, [x7, %[scratch_block_data]]\n"
+        "sqxtun2        v3.16b, v4.8h\n"
+        "ldr    x16, [sp, #296]\n"  // 8-byte Folded Reload
+        "mov    v7.16b, v16.16b\n"
+        "umax   v3.16b, v3.16b, v7.16b\n"
+        "add    %[output_block_data], x6, x17\n"
+        "umin   v3.16b, v3.16b, v17.16b\n"
+        "str    s3, [x16, x17]\n"
         "st1    { v3.s }[1], [%[output_block_data]]\n"
-        "add    %[output_block_data], x27, %[scratch_block_data]\n"
-        "st1    { v3.s }[2], [%[output_block_data]]\n"
-        "add    %[output_block_data], x26, %[scratch_block_data]\n"
+        "add    %[output_block_data], x15, x17\n"
         "mov    v31.16b, v21.16b\n"
         "mov    v8.16b, v21.16b\n"
         "mov    v9.16b, v21.16b\n"
         "mov    v10.16b, v21.16b\n"
-        "add    w2, w2, #1\n"  // =1
-        ".word 0x4e98969f  // sdot   v31.4s, v20.16b, v24.16b\n"
-        ".word 0x4e989668  // sdot   v8.4s, v19.16b, v24.16b\n"
-        ".word 0x4e989649  // sdot   v9.4s, v18.16b, v24.16b\n"
+        "mov    v16.16b, v17.16b\n"
+        "st1    { v3.s }[2], [%[output_block_data]]\n"
+        "add    %[output_block_data], x14, x17\n"
+        "subs   w12, w12, #1\n"  // =1
+        "add    x21, x21, #32\n"  // =32
+        ".word 0x4e9a969f  // sdot   v31.4s, v20.16b, v26.16b\n"
+        ".word 0x4e9a9668  // sdot   v8.4s, v19.16b, v26.16b\n"
+        ".word 0x4e9a9649  // sdot   v9.4s, v18.16b, v26.16b\n"
         ".word 0x4e99964a  // sdot   v10.4s, v18.16b, v25.16b\n"
-        "st1    { v3.s }[3], [%[output_block_data]]\n"
-        "add    %[scratch_block_data], x0, %[function_params]\n"
-        "add    x13, x13, #32\n"  // =32
+        "add    x17, x17, x23\n"
+        "mov    v11.16b, v23.16b\n"
+        "mov    v12.16b, v24.16b\n"
+        "mov    v13.16b, v27.16b\n"
         "mov    v14.16b, v22.16b\n"
-        DC_KERNEL_NO_MULT_5 ":\n"  // Parent Loop BB225_26 Depth=1
-        // Parent Loop BB225_13 Depth=2
-        // =>  This Inner Loop Header: Depth=3
-        "cmp    w2, w14\n"
-        "b.lt   " DC_KERNEL_NO_MULT_4 "b\n"
-        // %bb.6:        // in Loop: Header=BB225_13 Depth=2
-        "ldr    %[bias_data], [sp, #168]\n"  // 8-byte Folded Reload
-        "ldp    d6, d5, [sp, #72]\n"  // 16-byte Folded Reload
-        "cmp    w5, #0\n"  // =0
-        "add    %[bias_data], x2, #16\n"  // =16
-        "str    %[bias_data], [sp, #168]\n"  // 8-byte Folded Spill
-        "b.le   " DC_KERNEL_NO_MULT_12 "f\n"
-        // %bb.7:        // in Loop: Header=BB225_13 Depth=2
+        "st1    { v3.s }[3], [%[output_block_data]]\n"
+        "b.ne   " DC_KERNEL_NO_MULT_9 "b\n"
+        // %bb.10:        // in Loop: Header=BB225_7 Depth=2
+        "add    %[output_block_data], %[scratch_block_data], x21\n"
+        "ldr    x21, [sp, #136]\n"  // 8-byte Folded Reload
+        "ldp    d14, d17, [sp, #96]\n"  // 16-byte Folded Reload
+        "mov    x23, x22\n"
+        "mov    x22, %[filter_workspace]\n"
+        "mov    %[filter_workspace], x25\n"
+        "mov    x25, x24\n"
+        "ldr    q15, [sp, #112]\n"  // 16-byte Folded Reload
+        "ldp    x24, x16, [sp, #192]\n"  // 16-byte Folded Reload
+        "add    x12, x26, x17\n"
+        "ldr    w17, [sp, #284]\n"  // 4-byte Folded Reload
+        "cmp    w17, #0\n"  // =0
+        "b.gt   " DC_KERNEL_NO_MULT_12 "f\n"
+        "b      " DC_KERNEL_NO_MULT_6 "b\n"
+        DC_KERNEL_NO_MULT_11 ":\n"  // in Loop: Header=BB225_7 Depth=2
+        "ldr    x12, [sp, #80]\n"  // 8-byte Folded Reload
+        "add    x12, x12, x16, lsl #2\n"
+        "ldr    w17, [sp, #284]\n"  // 4-byte Folded Reload
+        "cmp    w17, #0\n"  // =0
+        "b.le   " DC_KERNEL_NO_MULT_6 "b\n"
+        DC_KERNEL_NO_MULT_12 ":\n"  // in Loop: Header=BB225_7 Depth=2
+        "ldr    w17, [sp, #284]\n"  // 4-byte Folded Reload
         "movi   v28.16b, #0\n"
-        "cmp    w5, #3\n"  // =3
         "movi   v29.16b, #0\n"
         "movi   v30.16b, #0\n"
+        "cmp    w17, #3\n"  // =3
         "movi   v11.16b, #0\n"
         "movi   v12.16b, #0\n"
         "movi   v13.16b, #0\n"
-        "b.lt   " DC_KERNEL_NO_MULT_9 "f\n"
-        // %bb.8:        // in Loop: Header=BB225_13 Depth=2
-        "ldr    q28, [x11, x13]\n"
-        "ldr    q29, [x25, x13]\n"
-        "ldr    q30, [x24, x13]\n"
-        "ldr    q11, [x10, x13]\n"
-        "ldr    q12, [x23, x13]\n"
-        "ldr    q13, [x8, x13]\n"
-        DC_KERNEL_NO_MULT_9 ":\n"  // in Loop: Header=BB225_13 Depth=2
-        "ldr    x19, [sp, #136]\n"  // 8-byte Folded Reload
-        "mov    x13, xzr\n"
-        "mov    w2, wzr\n"
-        "add    %[output_block_data], x22, %[scratch_block_data]\n"
-        "add    x6, x17, %[scratch_block_data]\n"
-        "add    x7, x12, %[scratch_block_data]\n"
-        "add    %[scratch_block_data], x19, x0\n"
-        "b      " DC_KERNEL_NO_MULT_11 "f\n"
-        DC_KERNEL_NO_MULT_10 ":\n"  // in Loop: Header=BB225_11 Depth=3
-        ".word 0x4e8e965f  // sdot   v31.4s, v18.16b, v14.16b\n"
-        ".word 0x4e979648  // sdot   v8.4s, v18.16b, v23.16b\n"
+        "b.lt   " DC_KERNEL_NO_MULT_14 "f\n"
+        // %bb.13:        // in Loop: Header=BB225_7 Depth=2
+        "add    x17, %[output_block_data], #32\n"  // =32
+        "ldr    %[output_block_data], [sp, #264]\n"  // 8-byte Folded Reload
+        "ldr    q13, [x17]\n"
+        "ldr    q12, [x17, %[output_block_data]]\n"
+        "ldr    %[output_block_data], [sp, #256]\n"  // 8-byte Folded Reload
+        "ldr    q11, [x17, %[output_block_data]]\n"
+        "ldr    %[output_block_data], [sp, #224]\n"  // 8-byte Folded Reload
+        "ldr    q30, [x17, %[output_block_data]]\n"
+        "ldr    %[output_block_data], [sp, #216]\n"  // 8-byte Folded Reload
+        "ldr    q29, [x17, %[output_block_data]]\n"
+        "ldr    %[output_block_data], [sp, #208]\n"  // 8-byte Folded Reload
+        "ldr    q28, [x17, %[output_block_data]]\n"
+        DC_KERNEL_NO_MULT_14 ":\n"  // in Loop: Header=BB225_7 Depth=2
+        "ldr    w17, [sp, #284]\n"  // 4-byte Folded Reload
+        DC_KERNEL_NO_MULT_15 ":\n"  // Parent Loop BB225_4 Depth=1
+        // Parent Loop BB225_7 Depth=2
+        // =>  This Inner Loop Header: Depth=3
+        ".word 0x4e96965f  // sdot   v31.4s, v18.16b, v22.16b\n"
+        ".word 0x4e9b9648  // sdot   v8.4s, v18.16b, v27.16b\n"
         ".word 0x4e999669  // sdot   v9.4s, v19.16b, v25.16b\n"
-        ".word 0x4e97967f  // sdot   v31.4s, v19.16b, v23.16b\n"
-        ".word 0x4e9a966a  // sdot   v10.4s, v19.16b, v26.16b\n"
+        ".word 0x4e9b967f  // sdot   v31.4s, v19.16b, v27.16b\n"
+        ".word 0x4e98966a  // sdot   v10.4s, v19.16b, v24.16b\n"
         ".word 0x4e999688  // sdot   v8.4s, v20.16b, v25.16b\n"
-        ".word 0x4e9a9689  // sdot   v9.4s, v20.16b, v26.16b\n"
+        ".word 0x4e989689  // sdot   v9.4s, v20.16b, v24.16b\n"
         "sqrdmulh        v3.4s, v31.4s, v1.4s\n"
-        ".word 0x4e9b968a  // sdot   v10.4s, v20.16b, v27.16b\n"
-        "sqrdmulh        v31.4s, v8.4s, v1.4s\n"
-        "sqrdmulh        v8.4s, v9.4s, v1.4s\n"
+        ".word 0x4e97968a  // sdot   v10.4s, v20.16b, v23.16b\n"
+        "sqrdmulh        v4.4s, v8.4s, v1.4s\n"
+        "sqrdmulh        v5.4s, v9.4s, v1.4s\n"
         "sqrshl v3.4s, v3.4s, v2.4s\n"
-        "sqrdmulh        v9.4s, v10.4s, v1.4s\n"
-        "sqrshl v31.4s, v31.4s, v2.4s\n"
-        "sqrshl v8.4s, v8.4s, v2.4s\n"
+        "sqrdmulh        v6.4s, v10.4s, v1.4s\n"
+        "sqrshl v4.4s, v4.4s, v2.4s\n"
+        "sqrshl v5.4s, v5.4s, v2.4s\n"
         "sqxtn  v3.4h, v3.4s\n"
-        "sqrshl v9.4s, v9.4s, v2.4s\n"
-        "sqxtn  v8.4h, v8.4s\n"
-        "sqxtn2 v3.8h, v31.4s\n"
-        "sqxtn2 v8.8h, v9.4s\n"
+        "sqrshl v6.4s, v6.4s, v2.4s\n"
+        "sqxtn  v5.4h, v5.4s\n"
+        "sqxtn2 v3.8h, v4.4s\n"
+        "sqxtn2 v5.8h, v6.4s\n"
         "sqadd  v3.8h, v3.8h, v0.8h\n"
-        "sqadd  v31.8h, v8.8h, v0.8h\n"
+        "sqadd  v4.8h, v5.8h, v0.8h\n"
         "sqxtun v3.8b, v3.8h\n"
-        "sqxtun2        v3.16b, v31.8h\n"
-        "ldr    q4, [sp, #272]\n"  // 16-byte Folded Reload
-        "add    x19, x7, x13\n"
-        "ushr   v24.4s, v24.4s, #8\n"
-        "ushr   v25.4s, v25.4s, #8\n"
-        "umax   v3.16b, v3.16b, v4.16b\n"
-        "ldr    q4, [sp, #256]\n"  // 16-byte Folded Reload
-        "ushr   v14.4s, v14.4s, #8\n"
-        "ushr   v23.4s, v23.4s, #8\n"
-        "sli    v24.4s, v30.4s, #24\n"
-        "umin   v3.16b, v3.16b, v4.16b\n"
-        "str    s3, [%[scratch_block_data], x13]\n"
-        "st1    { v3.s }[1], [x19]\n"
-        "add    x19, x6, x13\n"
-        "st1    { v3.s }[2], [x19]\n"
-        "add    x19, %[output_block_data], x13\n"
+        "sqxtun2        v3.16b, v4.8h\n"
+        "umax   v3.16b, v3.16b, v7.16b\n"
+        "add    %[output_block_data], x12, x21\n"
+        "umin   v3.16b, v3.16b, v16.16b\n"
         "ushr   v26.4s, v26.4s, #8\n"
+        "ushr   v25.4s, v25.4s, #8\n"
+        "str    s3, [x12]\n"
+        "st1    { v3.s }[1], [%[output_block_data]]\n"
+        "add    %[output_block_data], x12, x28\n"
+        "ushr   v22.4s, v22.4s, #8\n"
         "ushr   v27.4s, v27.4s, #8\n"
-        "sli    v25.4s, v11.4s, #24\n"
+        "sli    v26.4s, v11.4s, #24\n"
+        "ushr   v24.4s, v24.4s, #8\n"
+        "ushr   v23.4s, v23.4s, #8\n"
+        "sli    v25.4s, v30.4s, #24\n"
         "mov    v31.16b, v21.16b\n"
         "mov    v8.16b, v21.16b\n"
         "mov    v9.16b, v21.16b\n"
         "mov    v10.16b, v21.16b\n"
-        "add    w2, w2, #1\n"  // =1
-        "sli    v14.4s, v28.4s, #24\n"
-        "ushr   v28.4s, v28.4s, #8\n"
-        "ushr   v30.4s, v30.4s, #8\n"
-        "sli    v23.4s, v29.4s, #24\n"
-        "ushr   v29.4s, v29.4s, #8\n"
-        "ushr   v11.4s, v11.4s, #8\n"
-        "sli    v26.4s, v12.4s, #24\n"
-        "ushr   v12.4s, v12.4s, #8\n"
-        "sli    v27.4s, v13.4s, #24\n"
+        "st1    { v3.s }[2], [%[output_block_data]]\n"
+        "add    %[output_block_data], x12, x27\n"
+        "subs   w17, w17, #1\n"  // =1
+        "sli    v22.4s, v13.4s, #24\n"
         "ushr   v13.4s, v13.4s, #8\n"
-        "st1    { v3.s }[3], [x19]\n"
-        ".word 0x4e98969f  // sdot   v31.4s, v20.16b, v24.16b\n"
-        ".word 0x4e989668  // sdot   v8.4s, v19.16b, v24.16b\n"
-        ".word 0x4e989649  // sdot   v9.4s, v18.16b, v24.16b\n"
+        "ushr   v11.4s, v11.4s, #8\n"
+        "sli    v27.4s, v12.4s, #24\n"
+        "ushr   v12.4s, v12.4s, #8\n"
+        "ushr   v30.4s, v30.4s, #8\n"
+        "sli    v24.4s, v29.4s, #24\n"
+        "ushr   v29.4s, v29.4s, #8\n"
+        "sli    v23.4s, v28.4s, #24\n"
+        "ushr   v28.4s, v28.4s, #8\n"
+        ".word 0x4e9a969f  // sdot   v31.4s, v20.16b, v26.16b\n"
+        ".word 0x4e9a9668  // sdot   v8.4s, v19.16b, v26.16b\n"
+        ".word 0x4e9a9649  // sdot   v9.4s, v18.16b, v26.16b\n"
+        "add    x12, x12, x5\n"
         ".word 0x4e99964a  // sdot   v10.4s, v18.16b, v25.16b\n"
-        "add    x13, x13, x16\n"
-        DC_KERNEL_NO_MULT_11 ":\n"  // Parent Loop BB225_26 Depth=1
-        // Parent Loop BB225_13 Depth=2
-        // =>  This Inner Loop Header: Depth=3
-        "cmp    w2, w5\n"
-        "b.lt   " DC_KERNEL_NO_MULT_10 "b\n"
-        DC_KERNEL_NO_MULT_12 ":\n"  // in Loop: Header=BB225_13 Depth=2
-        "ldp    x19, %[scratch_block_data], [sp, #152]\n"  // 16-byte Folded Reload
-        "ldr    %[output_block_data], [sp, #144]\n"  // 8-byte Folded Reload
-        "mov    v20.16b, v17.16b\n"
-        "mov    v19.16b, v16.16b\n"
-        "add    %[scratch_block_data], x0, #1\n"  // =1
-        "add    %[output_block_data], x3, #4\n"  // =4
-        "add    x19, x19, #16\n"  // =16
-        "mov    v18.16b, v7.16b\n"
-        DC_KERNEL_NO_MULT_13 ":\n"  // Parent Loop BB225_26 Depth=1
-        // =>  This Loop Header: Depth=2
-        // Child Loop BB225_5 Depth 3
-        // Child Loop BB225_11 Depth 3
-        "cmp    %[scratch_block_data], #2\n"  // =2
-        "b.ne   " DC_KERNEL_NO_MULT_3 "b\n"
-        "b      " DC_KERNEL_NO_MULT_25 "f\n"
-        DC_KERNEL_NO_MULT_14 ":\n"  // in Loop: Header=BB225_26 Depth=1
+        "st1    { v3.s }[3], [%[output_block_data]]\n"
+        "b.ne   " DC_KERNEL_NO_MULT_15 "b\n"
+        "b      " DC_KERNEL_NO_MULT_6 "b\n"
+        DC_KERNEL_NO_MULT_16 ":\n"  // in Loop: Header=BB225_4 Depth=1
+        "cmp    w17, #1\n"  // =1
+        "add    x16, %[bias_data], #32\n"  // =32
+        "b.lt   " DC_KERNEL_NO_MULT_2 "b\n"
+        // %bb.17:        // in Loop: Header=BB225_4 Depth=1
+        "ldr    w23, [sp, #276]\n"  // 4-byte Folded Reload
+        "cmp    w23, #1\n"  // =1
+        "b.lt   " DC_KERNEL_NO_MULT_29 "f\n"
+        // %bb.18:        // in Loop: Header=BB225_4 Depth=1
+        "str    x16, [sp, #192]\n"  // 8-byte Folded Spill
         "ldp    q21, q22, [%[bias_data]]\n"
-        "ldr    %[bias_data], [sp, #64]\n"  // 8-byte Folded Reload
-        "ldr    x7, [sp, #128]\n"  // 8-byte Folded Reload
-        "mov    w0, wzr\n"
-        "b      " DC_KERNEL_NO_MULT_24 "f\n"
-        DC_KERNEL_NO_MULT_15 ":\n"  // in Loop: Header=BB225_24 Depth=2
-        "str    w0, [sp, #240]\n"  // 4-byte Folded Spill
-        "ldr    %[scratch_block_data], [sp, #176]\n"  // 8-byte Folded Reload
-        "add    %[output_block_data], x7, %[filter_workspace]\n"
-        "ldp    q23, q24, [x7]\n"
-        "ldp    q25, q26, [%[output_block_data]]\n"
-        "add    %[scratch_block_data], x7, x0\n"
-        "str    %[output_block_data], [sp, #208]\n"  // 8-byte Folded Spill
-        "ldp    q27, q28, [%[scratch_block_data]]\n"
-        "mov    w13, wzr\n"
-        "mov    %[scratch_block_data], %[bias_data]\n"
-        "str    %[bias_data], [sp, #224]\n"  // 8-byte Folded Spill
-        "b      " DC_KERNEL_NO_MULT_22 "f\n"
-        DC_KERNEL_NO_MULT_16 ":\n"  // in Loop: Header=BB225_22 Depth=3
-        "cmp    w13, w14\n"
-        "orr    w2, wzr, #0x4\n"
-        "csel   w6, w5, w2, eq\n"
-        "add    %[output_block_data], x7, #32\n"  // =32
+        "ldr    x17, [sp, #184]\n"  // 8-byte Folded Reload
+        "ldr    x12, [sp, #80]\n"  // 8-byte Folded Reload
+        "ldr    x23, [sp, #248]\n"  // 8-byte Folded Reload
+        "mov    w24, wzr\n"
+        "b      " DC_KERNEL_NO_MULT_20 "f\n"
+        DC_KERNEL_NO_MULT_19 ":\n"  // in Loop: Header=BB225_20 Depth=2
+        "ldr    w12, [sp, #76]\n"  // 4-byte Folded Reload
+        "add    w24, w24, #1\n"  // =1
+        "ldr    x21, [sp, #136]\n"  // 8-byte Folded Reload
+        "ldr    x17, [sp, #200]\n"  // 8-byte Folded Reload
+        "cmp    w24, w12\n"
+        "ldr    x12, [sp, #232]\n"  // 8-byte Folded Reload
+        "add    x12, x12, x21\n"
+        "b.eq   " DC_KERNEL_NO_MULT_28 "f\n"
+        DC_KERNEL_NO_MULT_20 ":\n"  // Parent Loop BB225_4 Depth=1
+        // =>  This Loop Header: Depth=2
+        // Child Loop BB225_23 Depth 3
+        // Child Loop BB225_27 Depth 4
+        "ldr    %[output_block_data], [sp, #264]\n"  // 8-byte Folded Reload
+        "ldp    q23, q24, [x17]\n"
+        "mov    x21, x12\n"
+        "mov    w12, wzr\n"
+        "add    x16, x17, %[output_block_data]\n"
+        "ldr    %[output_block_data], [sp, #256]\n"  // 8-byte Folded Reload
+        "ldp    q25, q26, [x16]\n"
+        "str    x16, [sp, #200]\n"  // 8-byte Folded Spill
+        "add    %[output_block_data], x17, x3\n"
+        "ldp    q27, q28, [%[output_block_data]]\n"
+        "str    x21, [sp, #232]\n"  // 8-byte Folded Spill
+        "b      " DC_KERNEL_NO_MULT_23 "f\n"
+        DC_KERNEL_NO_MULT_21 ":\n"  // in Loop: Header=BB225_23 Depth=3
+        "mov    %[filter_workspace], x26\n"
+        DC_KERNEL_NO_MULT_22 ":\n"  // in Loop: Header=BB225_23 Depth=3
+        "ldr    w17, [sp, #276]\n"  // 4-byte Folded Reload
+        "add    w12, w12, #1\n"  // =1
+        "cmp    w12, w17\n"
+        "mov    x17, x16\n"
+        "b.eq   " DC_KERNEL_NO_MULT_19 "b\n"
+        DC_KERNEL_NO_MULT_23 ":\n"  // Parent Loop BB225_4 Depth=1
+        // Parent Loop BB225_20 Depth=2
+        // =>  This Loop Header: Depth=3
+        // Child Loop BB225_27 Depth 4
+        "mov    x26, %[filter_workspace]\n"
+        "ldr    w1, [sp, #280]\n"  // 4-byte Folded Reload
+        "ldr    w3, [sp, #284]\n"  // 4-byte Folded Reload
+        "add    x16, x17, #32\n"  // =32
+        "cmp    w12, w1\n"
+        "mov    w1, #4\n"
+        "csel   w3, w3, w1, eq\n"
+        "cmp    w3, #3\n"  // =3
+        "b.ge   " DC_KERNEL_NO_MULT_25 "f\n"
+        // %bb.24:        // in Loop: Header=BB225_23 Depth=3
         "movi   v29.16b, #0\n"
+        "cmp    w3, #1\n"  // =1
         "movi   v30.16b, #0\n"
-        "movi   v8.16b, #0\n"
         "movi   v31.16b, #0\n"
-        "cmp    w6, #3\n"  // =3
         "movi   v9.16b, #0\n"
         "movi   v10.16b, #0\n"
-        "b.lt   " DC_KERNEL_NO_MULT_18 "f\n"
-        // %bb.17:        // in Loop: Header=BB225_22 Depth=3
-        "ldr    %[bias_data], [sp, #176]\n"  // 8-byte Folded Reload
-        "add    x19, %[output_block_data], %[filter_workspace]\n"
-        "ldp    q29, q31, [x7, #32]\n"
-        "ldp    q30, q9, [x19]\n"
-        "add    %[bias_data], %[output_block_data], x2\n"
-        "ldp    q8, q10, [%[bias_data]]\n"
-        DC_KERNEL_NO_MULT_18 ":\n"  // in Loop: Header=BB225_22 Depth=3
-        "mov    w7, wzr\n"
-        "b      " DC_KERNEL_NO_MULT_20 "f\n"
-        DC_KERNEL_NO_MULT_19 ":\n"  // in Loop: Header=BB225_20 Depth=4
+        "movi   v8.16b, #0\n"
+        "b.ge   " DC_KERNEL_NO_MULT_26 "f\n"
+        "b      " DC_KERNEL_NO_MULT_21 "b\n"
+        DC_KERNEL_NO_MULT_25 ":\n"  // in Loop: Header=BB225_23 Depth=3
+        "ldr    x23, [sp, #264]\n"  // 8-byte Folded Reload
+        "mov    %[filter_workspace], x22\n"
+        "mov    x22, x15\n"
+        "mov    x15, x14\n"
+        "add    x23, x16, x23\n"
+        "mov    x14, x13\n"
+        "mov    x13, x20\n"
+        "mov    x20, x16\n"
+        "mov    x16, x25\n"
+        "ldr    x25, [sp, #256]\n"  // 8-byte Folded Reload
+        "ldp    q8, q31, [x17, #32]\n"
+        "ldp    q10, q30, [x23]\n"
+        "ldp    x6, x23, [sp, #240]\n"  // 16-byte Folded Reload
+        "add    x25, x20, x25\n"
+        "ldp    q9, q29, [x25]\n"
+        "mov    x25, x16\n"
+        "mov    x16, x20\n"
+        "mov    x20, x13\n"
+        "mov    x13, x14\n"
+        "mov    x14, x15\n"
+        "mov    x15, x22\n"
+        "mov    x22, %[filter_workspace]\n"
+        "mov    %[bias_data], x7\n"
+        DC_KERNEL_NO_MULT_26 ":\n"  // in Loop: Header=BB225_23 Depth=3
+        "mov    %[filter_workspace], x26\n"
+        DC_KERNEL_NO_MULT_27 ":\n"  // Parent Loop BB225_4 Depth=1
+        // Parent Loop BB225_20 Depth=2
+        // Parent Loop BB225_23 Depth=3
+        // =>  This Inner Loop Header: Depth=4
         "mov    v3.16b, v21.16b\n"
-        "mov    v11.16b, v22.16b\n"
+        "mov    v4.16b, v22.16b\n"
         ".word 0x4e979643  // sdot   v3.4s, v18.16b, v23.16b\n"
-        ".word 0x4e9894eb  // sdot   v11.4s, v7.16b, v24.16b\n"
+        ".word 0x4e9895e4  // sdot   v4.4s, v15.16b, v24.16b\n"
         ".word 0x4e999663  // sdot   v3.4s, v19.16b, v25.16b\n"
-        ".word 0x4e9a960b  // sdot   v11.4s, v16.16b, v26.16b\n"
+        ".word 0x4e9a94a4  // sdot   v4.4s, v5.16b, v26.16b\n"
         ".word 0x4e9b9683  // sdot   v3.4s, v20.16b, v27.16b\n"
-        ".word 0x4e9c962b  // sdot   v11.4s, v17.16b, v28.16b\n"
+        ".word 0x4e9c94c4  // sdot   v4.4s, v6.16b, v28.16b\n"
         "sqrdmulh        v3.4s, v3.4s, v1.4s\n"
-        "sqrdmulh        v11.4s, v11.4s, v1.4s\n"
+        "sqrdmulh        v4.4s, v4.4s, v1.4s\n"
         "sqrshl v3.4s, v3.4s, v2.4s\n"
-        "sqrshl v11.4s, v11.4s, v2.4s\n"
+        "sqrshl v4.4s, v4.4s, v2.4s\n"
         "sqxtn  v3.4h, v3.4s\n"
-        "sqxtn2 v3.8h, v11.4s\n"
+        "sqxtn2 v3.8h, v4.4s\n"
         "sqadd  v3.8h, v3.8h, v0.8h\n"
         "sqxtun v3.8b, v3.8h\n"
-        "umax   v3.8b, v3.8b, v5.8b\n"
+        "umax   v3.8b, v3.8b, v17.8b\n"
         "ushr   v23.4s, v23.4s, #8\n"
         "ushr   v24.4s, v24.4s, #8\n"
         "ushr   v25.4s, v25.4s, #8\n"
         "ushr   v26.4s, v26.4s, #8\n"
         "ushr   v27.4s, v27.4s, #8\n"
         "ushr   v28.4s, v28.4s, #8\n"
-        "umin   v3.8b, v3.8b, v6.8b\n"
-        "sli    v23.4s, v29.4s, #24\n"
-        "ushr   v29.4s, v29.4s, #8\n"
+        "umin   v3.8b, v3.8b, v14.8b\n"
+        "subs   w3, w3, #1\n"  // =1
+        "sli    v23.4s, v8.4s, #24\n"
+        "ushr   v8.4s, v8.4s, #8\n"
         "sli    v24.4s, v31.4s, #24\n"
         "ushr   v31.4s, v31.4s, #8\n"
-        "sli    v25.4s, v30.4s, #24\n"
-        "ushr   v30.4s, v30.4s, #8\n"
-        "sli    v26.4s, v9.4s, #24\n"
-        "ushr   v9.4s, v9.4s, #8\n"
-        "sli    v27.4s, v8.4s, #24\n"
-        "ushr   v8.4s, v8.4s, #8\n"
-        "sli    v28.4s, v10.4s, #24\n"
+        "sli    v25.4s, v10.4s, #24\n"
         "ushr   v10.4s, v10.4s, #8\n"
-        "str    d3, [%[scratch_block_data]]\n"
-        "add    %[scratch_block_data], x0, x16\n"
-        "add    w7, w7, #1\n"  // =1
-        DC_KERNEL_NO_MULT_20 ":\n"  // Parent Loop BB225_26 Depth=1
-        // Parent Loop BB225_24 Depth=2
-        // Parent Loop BB225_22 Depth=3
-        // =>  This Inner Loop Header: Depth=4
-        "cmp    w7, w6\n"
-        "b.lt   " DC_KERNEL_NO_MULT_19 "b\n"
-        // %bb.21:        // in Loop: Header=BB225_22 Depth=3
-        "add    w13, w13, #1\n"  // =1
-        "mov    x7, %[output_block_data]\n"
-        DC_KERNEL_NO_MULT_22 ":\n"  // Parent Loop BB225_26 Depth=1
-        // Parent Loop BB225_24 Depth=2
-        // =>  This Loop Header: Depth=3
-        // Child Loop BB225_20 Depth 4
-        "ldr    w2, [sp, #292]\n"  // 4-byte Folded Reload
-        "cmp    w13, w2\n"
-        "b.lt   " DC_KERNEL_NO_MULT_16 "b\n"
-        // %bb.23:        // in Loop: Header=BB225_24 Depth=2
-        "ldr    x13, [sp, #120]\n"  // 8-byte Folded Reload
-        "ldr    %[bias_data], [sp, #224]\n"  // 8-byte Folded Reload
-        "ldr    w0, [sp, #240]\n"  // 4-byte Folded Reload
-        "ldr    x7, [sp, #208]\n"  // 8-byte Folded Reload
-        "add    %[bias_data], x2, x13\n"
-        "add    w0, w0, #1\n"  // =1
-        DC_KERNEL_NO_MULT_24 ":\n"  // Parent Loop BB225_26 Depth=1
-        // =>  This Loop Header: Depth=2
-        // Child Loop BB225_22 Depth 3
-        // Child Loop BB225_20 Depth 4
-        "ldr    w13, [sp, #288]\n"  // 4-byte Folded Reload
-        "cmp    w0, w13\n"
-        "b.lt   " DC_KERNEL_NO_MULT_15 "b\n"
-        DC_KERNEL_NO_MULT_25 ":\n"  // in Loop: Header=BB225_26 Depth=1
-        "ldr    x13, [sp, #128]\n"  // 8-byte Folded Reload
-        "ldr    %[scratch_block_data], [sp, #8]\n"  // 8-byte Folded Reload
-        "ldp    x19, %[bias_data], [sp, #48]\n"  // 16-byte Folded Reload
-        "ldr    w7, [sp, #36]\n"  // 4-byte Folded Reload
-        "ldr    x6, [sp, #24]\n"  // 8-byte Folded Reload
-        "add    x13, x13, %[scratch_block_data]\n"
-        "str    x13, [sp, #128]\n"  // 8-byte Folded Spill
-        "ldr    x13, [sp, #64]\n"  // 8-byte Folded Reload
-        "add    %[bias_data], x2, #32\n"  // =32
-        "add    w7, w7, #1\n"  // =1
-        "add    x19, x19, %[scratch_block_data]\n"
-        "add    x13, x13, #8\n"  // =8
-        "str    x13, [sp, #64]\n"  // 8-byte Folded Spill
-        "ldr    x13, [sp, #40]\n"  // 8-byte Folded Reload
-        "add    x13, x13, #8\n"  // =8
-        DC_KERNEL_NO_MULT_26 ":\n"  // =>This Loop Header: Depth=1
-        // Child Loop BB225_24 Depth 2
-        // Child Loop BB225_22 Depth 3
-        // Child Loop BB225_20 Depth 4
-        // Child Loop BB225_13 Depth 2
-        // Child Loop BB225_5 Depth 3
-        // Child Loop BB225_11 Depth 3
-        "ldr    w0, [sp, #20]\n"  // 4-byte Folded Reload
-        "cmp    w7, w0\n"
-        "b.lt   " DC_KERNEL_NO_MULT_1 "b\n"
-        // %bb.27:
-         // Compiled intrinsics total stack 448, now 304 for spillage only.
-        "add    sp, sp, #304\n"  // =448
+        "sli    v26.4s, v30.4s, #24\n"
+        "ushr   v30.4s, v30.4s, #8\n"
+        "sli    v27.4s, v9.4s, #24\n"
+        "ushr   v9.4s, v9.4s, #8\n"
+        "sli    v28.4s, v29.4s, #24\n"
+        "ushr   v29.4s, v29.4s, #8\n"
+        "str    d3, [x21]\n"
+        "add    x21, x21, x5\n"
+        "b.ne   " DC_KERNEL_NO_MULT_27 "b\n"
+        "b      " DC_KERNEL_NO_MULT_22 "b\n"
+        DC_KERNEL_NO_MULT_28 ":\n"  // in Loop: Header=BB225_4 Depth=1
+        "ldr    %[bias_data], [sp, #192]\n"  // 8-byte Folded Reload
+        "ldr    x26, [sp, #16]\n"  // 8-byte Folded Reload
+        "b      " DC_KERNEL_NO_MULT_3 "b\n"
+        DC_KERNEL_NO_MULT_29 ":\n"  // in Loop: Header=BB225_4 Depth=1
+        "ldr    w12, [sp, #12]\n"  // 4-byte Folded Reload
+        "cmp    w17, #2\n"  // =2
+        "b.hs   " DC_KERNEL_NO_MULT_31 "f\n"
+        // %bb.30:        // in Loop: Header=BB225_4 Depth=1
+        "ldr    x23, [sp, #248]\n"  // 8-byte Folded Reload
+        "mov    w12, wzr\n"
+        "b      " DC_KERNEL_NO_MULT_33 "f\n"
+        DC_KERNEL_NO_MULT_31 ":\n"  // Parent Loop BB225_4 Depth=1
+        // =>  This Inner Loop Header: Depth=2
+        "subs   w12, w12, #2\n"  // =2
+        "b.ne   " DC_KERNEL_NO_MULT_31 "b\n"
+        // %bb.32:        // in Loop: Header=BB225_4 Depth=1
+        "ldr    w12, [sp, #12]\n"  // 4-byte Folded Reload
+        "ldr    x23, [sp, #248]\n"  // 8-byte Folded Reload
+        "cmp    w17, w12\n"
+        "b.eq   " DC_KERNEL_NO_MULT_2 "b\n"
+        DC_KERNEL_NO_MULT_33 ":\n"  // in Loop: Header=BB225_4 Depth=1
+        "sub    w12, w17, w12\n"
+        DC_KERNEL_NO_MULT_34 ":\n"  // Parent Loop BB225_4 Depth=1
+        // =>  This Inner Loop Header: Depth=2
+        "subs   w12, w12, #1\n"  // =1
+        "b.ne   " DC_KERNEL_NO_MULT_34 "b\n"
+        "b      " DC_KERNEL_NO_MULT_2 "b\n"
+        DC_KERNEL_NO_MULT_35 ":\n"
+        // Compiled intrinsics total stack 464, now 320 for spillage only.
+        "add    sp, sp, #320\n"  // =464
         :
         // Outputs.
         [ scratch_block_data ] "+r"(scratch_block_data),
@@ -7827,8 +7927,8 @@ struct KernelMacroBlock<DepthwiseConvImplementation::kUseNeon3x3DotProduct,
         "v31",
         // We use these general-purpose registers.
         "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
-        "x16", "x17", "x19", "x21", "x22", "x23", "x24", "x25", "x26", "x27",
-        "x28", "x29", "x30");
+        "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26",
+        "x27", "x28");
 #endif  // __linux__
   }  // NOLINT(readability/fn_size) Manually unrolled.
 
@@ -7858,6 +7958,15 @@ struct KernelMacroBlock<DepthwiseConvImplementation::kUseNeon3x3DotProduct,
 #undef DC_KERNEL_NO_MULT_24
 #undef DC_KERNEL_NO_MULT_25
 #undef DC_KERNEL_NO_MULT_26
+#undef DC_KERNEL_NO_MULT_27
+#undef DC_KERNEL_NO_MULT_28
+#undef DC_KERNEL_NO_MULT_29
+#undef DC_KERNEL_NO_MULT_30
+#undef DC_KERNEL_NO_MULT_31
+#undef DC_KERNEL_NO_MULT_32
+#undef DC_KERNEL_NO_MULT_33
+#undef DC_KERNEL_NO_MULT_34
+#undef DC_KERNEL_NO_MULT_35
 
   static void __attribute__((noinline))
   Run(const int8* scratch_block_data, const int8* filter_workspace,
@@ -7901,302 +8010,384 @@ struct KernelMacroBlock<DepthwiseConvImplementation::kUseNeon3x3DotProduct,
 #define DC_KERNEL_NO_MULT_STRIDE_17 "17"
 #define DC_KERNEL_NO_MULT_STRIDE_18 "18"
 #define DC_KERNEL_NO_MULT_STRIDE_19 "19"
+#define DC_KERNEL_NO_MULT_STRIDE_20 "20"
+#define DC_KERNEL_NO_MULT_STRIDE_21 "21"
+#define DC_KERNEL_NO_MULT_STRIDE_22 "22"
+#define DC_KERNEL_NO_MULT_STRIDE_23 "23"
+#define DC_KERNEL_NO_MULT_STRIDE_24 "24"
+#define DC_KERNEL_NO_MULT_STRIDE_25 "25"
+#define DC_KERNEL_NO_MULT_STRIDE_26 "26"
+#define DC_KERNEL_NO_MULT_STRIDE_27 "27"
+#define DC_KERNEL_NO_MULT_STRIDE_28 "28"
+#define DC_KERNEL_NO_MULT_STRIDE_29 "29"
+#define DC_KERNEL_NO_MULT_STRIDE_30 "30"
+#define DC_KERNEL_NO_MULT_STRIDE_31 "31"
+#define DC_KERNEL_NO_MULT_STRIDE_32 "32"
+#define DC_KERNEL_NO_MULT_STRIDE_33 "33"
+#define DC_KERNEL_NO_MULT_STRIDE_34 "34"
+#define DC_KERNEL_NO_MULT_STRIDE_35 "35"
 
 #ifdef __linux__
     asm volatile(
-        // Compiled code used block of 48 for spill out of total stack of 208.
-        // However, an 8-byte spill was sneaked in to #120.
-        // Spillage increased to 64 and these are mapped to #48.
-        "sub    sp, sp, #64\n"  // =208
-        "ldp    w13, w14, [%[function_params], #" STR(DP_OFFSET_OUTPUT_RESIDUAL_WIDTH) "]\n"
-        "ldrsw  x15, [%[function_params], #" STR(DP_OFFSET_INPUT_WIDTH_OVERALL_MICRO_REPEATS) "]\n"
-        "ldp    w11, w16, [%[function_params], #" STR(DP_OFFSET_OUTPUT_WIDTH_OVERALL_MICRO_REPEATS) "]\n"
-        "ldr    x7, [%[function_params]]\n"
-        "ldpsw  x9, x10, [%[function_params], #" STR(DP_OFFSET_OUTPUT_HEIGHT_STRIDE) "]\n"
-        "ldrsw  x26, [%[function_params], #" STR(DP_OFFSET_DEPTH_MICRO_REPEATS) "]\n"
-        "ldr    w27, [%[function_params], #" STR(DP_OFFSET_OUTBOUND_BLOCK_HEIGHT) "]\n"
-        "add    x17, %[function_params], #" STR(DP_OFFSET_QUANTIZED_ACTIVATION_MIN) "\n"  // =40
-        "add    x12, %[function_params], #" STR(DP_OFFSET_QUANTIZED_ACTIVATION_MAX) "\n"  // =44
-        "add    x5, %[function_params], #" STR(DP_OFFSET_OUTPUT_MULTIPLIER) "\n"  // =32
+        // Compiled code used block of 160 for spill out of total stack of 304.
+        "sub    sp, sp, #160\n"  // =304
+        "stp    %[output_block_data], %[filter_workspace], [sp, #144]\n"  // 16-byte Folded Spill
+        "ldr    w8, [%[function_params], #" STR(DP_OFFSET_DEPTH_MICRO_REPEATS) "]\n"
+        "cmp    w8, #1\n"  // =1
+        "b.lt   " DC_KERNEL_NO_MULT_STRIDE_35 "f\n"
+        // %bb.1:
+        "ldr    x14, [%[function_params]]\n"
+        "ldpsw  x11, x12, [%[function_params], #" STR(DP_OFFSET_OUTPUT_HEIGHT_STRIDE) "]\n"
+        "ldp    w13, w3, [%[function_params], #" STR(DP_OFFSET_OUTPUT_WIDTH_OVERALL_MICRO_REPEATS) "]\n"
+        "add    x15, %[function_params], #" STR(DP_OFFSET_QUANTIZED_ACTIVATION_MIN) "\n"  // =40
+        "add    x17, %[function_params], #" STR(DP_OFFSET_QUANTIZED_ACTIVATION_MAX) "\n"  // =44
+        "add    x5, %[function_params], #" STR(DP_OFFSET_OUTPUT_OFFSET) "\n"  // =28
         "add    x6, %[function_params], #" STR(DP_OFFSET_OUTPUT_SHIFT) "\n"  // =36
-        "add    %[function_params], %[function_params], #" STR(DP_OFFSET_OUTPUT_OFFSET) "\n"  // =28
-        "sxtw   x11, w11\n"
-        "ld1r   { v0.8h }, [%[function_params]]\n"
-        "ld1r   { v1.8b }, [x17]\n"
-        "ld1r   { v2.8b }, [x12]\n"
-        "ld1r   { v3.4s }, [x5]\n"
+        "add    x7, %[function_params], #" STR(DP_OFFSET_OUTPUT_MULTIPLIER) "\n"  // =32
+        "ldrsw  x19, [%[function_params], #" STR(DP_OFFSET_INPUT_WIDTH_OVERALL_MICRO_REPEATS) "]\n"
+        "ldr    w1, [%[function_params], #" STR(DP_OFFSET_OUTBOUND_BLOCK_HEIGHT) "]\n"
+        "ldp    w16, w4, [%[function_params], #" STR(DP_OFFSET_OUTPUT_RESIDUAL_WIDTH) "]\n"
+        "ld1r   { v1.8b }, [x15]\n"
+        "lsl    w15, w14, #1\n"
+        "sxtw   x20, w15\n"
+        "cmp    w16, #1\n"  // =1
+        "ldr    x15, [sp, #144]\n"  // 8-byte Folded Reload
+        "ccmp   w3, w13, #0, eq\n"
+        "ld1r   { v0.8h }, [x5]\n"
+        "ld1r   { v2.8b }, [x17]\n"
+        "ld1r   { v3.4s }, [x7]\n"
         "ld1r   { v4.4s }, [x6]\n"
-        "cmp    w13, #1\n"  // =1
-        "lsl    x28, x15, #5\n"
-        "lsl    w15, w7, #1\n"
-        "ccmp   w16, w11, #0, eq\n"
-        "sxtw   %[function_params], w7\n"
-        "sxtw   x7, w15\n"
-        "csel   w15, w16, w11, lt\n"
-        "mov    x8, xzr\n"
-        "lsl    x17, x10, #1\n"
-        "add    x5, x10, x10, lsl #1\n"
-        "lsl    x6, x10, #2\n"
-        "sxtw   x19, w15\n"
-        "lsl    x21, x16, #5\n"
-        // implicit-def: $q19
-        // implicit-def: $q20
-        // implicit-def: $q21
-        // implicit-def: $q22
-        // implicit-def: $q23
-        // implicit-def: $q5
-        // implicit-def: $q6
+        "csel   w23, w3, w13, lt\n"
+        "sxtw   x6, w14\n"
+        "bic    w14, w23, w23, asr #31\n"
+        "lsl    x5, x12, #1\n"
+        "madd   x15, x20, x14, x15\n"
+        "sub    x14, x13, x14\n"
+        "mov    x9, xzr\n"
+        "mov    x10, xzr\n"
+        "str    w4, [sp, #84]\n"  // 4-byte Folded Spill
+        "lsl    %[function_params], x19, #5\n"
+        "lsl    x7, x12, #2\n"
+        "add    x19, x5, x12\n"
+        "str    x14, [sp, #136]\n"  // 8-byte Folded Spill
+        "add    x14, x15, #4\n"  // =4
+        "str    %[output_block_data], [sp, #72]\n"  // 8-byte Folded Spill
+        "str    x15, [sp, #88]\n"  // 8-byte Folded Spill
+        "str    x14, [sp, #8]\n"  // 8-byte Folded Spill
         // implicit-def: $q16
         // implicit-def: $q7
-        // implicit-def: $q17
+        // implicit-def: $q22
         // implicit-def: $q18
-        "str    %[filter_workspace], [sp, #48]\n"  // 8-byte Folded Spill
-        "stp    %[scratch_block_data], %[output_block_data], [sp, #32]\n"  // 16-byte Folded Spill
-        "str    x26, [sp, #24]\n"  // 8-byte Folded Spill
-        "str    w27, [sp, #20]\n"  // 4-byte Folded Spill
-        "str    x28, [sp, #8]\n"  // 8-byte Folded Spill
-        "b      " DC_KERNEL_NO_MULT_STRIDE_19 "f\n"
-        DC_KERNEL_NO_MULT_STRIDE_1 ":\n"  // in Loop: Header=BB227_19 Depth=1
-        "and    x15, x8, #0x1fffffff\n"
-        "add    w16, w8, w8, lsl #1\n"
-        "add    x22, %[output_block_data], x15, lsl #3\n"
-        "lsl    w15, w16, #5\n"
-        "cmp    w27, #2\n"  // =2
-        "add    x23, %[filter_workspace], x15\n"
-        "mov    x15, xzr\n"
-        "b.ne   " DC_KERNEL_NO_MULT_STRIDE_11 "f\n"
-        // %bb.2:        // in Loop: Header=BB227_19 Depth=1
-        "sxtw   x16, w8\n"
-        "ubfiz  x12, x8, #3, #29\n"
-        "mov    x25, xzr\n"
-        "madd   x26, x28, x16, %[scratch_block_data]\n"
-        "add    x27, %[output_block_data], x12\n"
-        "add    x28, x22, x9\n"
-        "mov    x29, %[bias_data]\n"
-        "b      " DC_KERNEL_NO_MULT_STRIDE_9 "f\n"
-        DC_KERNEL_NO_MULT_STRIDE_3 ":\n"  // in Loop: Header=BB227_9 Depth=2
-        "add    %[scratch_block_data], x26, x25, lsl #4\n"
-        "add    x16, x23, x25, lsl #4\n"
-        "ldr    q8, [%[scratch_block_data], x6]\n"
-        "ldr    q24, [x29]\n"
-        "ldr    q25, [x16]\n"
-        "ldr    q26, [x16, #32]\n"
-        "ldr    q27, [x16, #64]\n"
-        "ldr    q30, [%[scratch_block_data]]\n"
-        "ldr    q29, [%[scratch_block_data], x10]\n"
-        "ldr    q28, [%[scratch_block_data], x17]\n"
-        "ldr    q31, [%[scratch_block_data], x5]\n"
-        "mov    x30, xzr\n"
-        "add    %[filter_workspace], %[scratch_block_data], #32\n"  // =32
-        "add    %[scratch_block_data], x27, x25, lsl #2\n"
-        "mov    x24, x15\n"
-        "mov    v9.16b, v8.16b\n"
-        "b      " DC_KERNEL_NO_MULT_STRIDE_5 "f\n"
-        DC_KERNEL_NO_MULT_STRIDE_4 ":\n"  // in Loop: Header=BB227_5 Depth=3
-        "mov    v23.16b, v24.16b\n"
-        "mov    v10.16b, v24.16b\n"
-        ".word 0x4e9e9737  // sdot   v23.4s, v25.16b, v30.16b\n"
-        ".word 0x4e9c972a  // sdot   v10.4s, v25.16b, v28.16b\n"
-        ".word 0x4e9d9757  // sdot   v23.4s, v26.16b, v29.16b\n"
-        ".word 0x4e9f974a  // sdot   v10.4s, v26.16b, v31.16b\n"
-        ".word 0x4e9c9777  // sdot   v23.4s, v27.16b, v28.16b\n"
-        ".word 0x4e88976a  // sdot   v10.4s, v27.16b, v8.16b\n"
-        "sqrdmulh        v23.4s, v23.4s, v3.4s\n"
-        "ubfiz  x12, x30, #5, #27\n"
-        "rev32  v13.8h, v28.8h\n"
-        "sqrdmulh        v28.4s, v10.4s, v3.4s\n"
-        "sqrshl v23.4s, v23.4s, v4.4s\n"
-        "add    x12, %[filter_workspace], x12\n"
-        "sqrshl v28.4s, v28.4s, v4.4s\n"
-        "sqxtn  v23.4h, v23.4s\n"
-        "ldr    q19, [x12]\n"
-        "ldr    q20, [x12, x10]\n"
-        "ldr    q21, [x12, x17]\n"
-        "ldr    q22, [x12, x5]\n"
-        "ldr    q8, [x12, x6]\n"
-        "sqxtn2 v23.8h, v28.4s\n"
-        "sqadd  v23.8h, v23.8h, v0.8h\n"
-        "sqxtun v23.8b, v23.8h\n"
-        "madd   x16, x30, x7, %[scratch_block_data]\n"
-        "rev32  v11.8h, v30.8h\n"
-        "umax   v23.8b, v23.8b, v1.8b\n"
-        "rev32  v12.8h, v29.8h\n"
-        "mov    v28.16b, v24.16b\n"
-        "add    x12, x16, x9\n"
-        "umin   v23.8b, v23.8b, v2.8b\n"
-        "trn1   v29.8h, v11.8h, v19.8h\n"
-        "rev32  v14.8h, v31.8h\n"
-        "str    s23, [x16]\n"
-        "st1    { v23.s }[1], [x12]\n"
-        "mov    v23.16b, v24.16b\n"
-        "trn1   v30.8h, v12.8h, v20.8h\n"
-        "trn1   v31.8h, v13.8h, v21.8h\n"
-        ".word 0x4e9d973c  // sdot   v28.4s, v25.16b, v29.16b\n"
-        "rev32  v9.8h, v9.8h\n"
-        "trn1   v10.8h, v14.8h, v22.8h\n"
-        ".word 0x4e9f9737  // sdot   v23.4s, v25.16b, v31.16b\n"
-        ".word 0x4e9e975c  // sdot   v28.4s, v26.16b, v30.16b\n"
-        "trn1   v9.8h, v9.8h, v8.8h\n"
-        ".word 0x4e8a9757  // sdot   v23.4s, v26.16b, v10.16b\n"
-        ".word 0x4e9f977c  // sdot   v28.4s, v27.16b, v31.16b\n"
-        ".word 0x4e899777  // sdot   v23.4s, v27.16b, v9.16b\n"
-        "sqrdmulh        v28.4s, v28.4s, v3.4s\n"
-        "sqrdmulh        v23.4s, v23.4s, v3.4s\n"
-        "sqrshl v28.4s, v28.4s, v4.4s\n"
-        "sqrshl v23.4s, v23.4s, v4.4s\n"
-        "sqxtn  v28.4h, v28.4s\n"
-        "sqxtn2 v28.8h, v23.4s\n"
-        "sqadd  v23.8h, v28.8h, v0.8h\n"
-        "sqxtun v23.8b, v23.8h\n"
-        "add    x12, x16, %[function_params]\n"
-        "umax   v23.8b, v23.8b, v1.8b\n"
-        "add    x16, x12, x9\n"
-        "umin   v23.8b, v23.8b, v2.8b\n"
-        "add    x30, x30, #1\n"  // =1
-        "str    s23, [x12]\n"
-        "st1    { v23.s }[1], [x16]\n"
-        "add    x24, x24, x7\n"
-        "mov    v30.16b, v19.16b\n"
-        "mov    v29.16b, v20.16b\n"
-        "mov    v28.16b, v21.16b\n"
-        "mov    v31.16b, v22.16b\n"
-        "mov    v9.16b, v8.16b\n"
-        "mov    v23.16b, v8.16b\n"
-        DC_KERNEL_NO_MULT_STRIDE_5 ":\n"  // Parent Loop BB227_19 Depth=1
-        // Parent Loop BB227_9 Depth=2
-        // =>  This Inner Loop Header: Depth=3
-        "cmp    x30, x19\n"
-        "b.lt   " DC_KERNEL_NO_MULT_STRIDE_4 "b\n"
-        "b      " DC_KERNEL_NO_MULT_STRIDE_7 "f\n"
-        DC_KERNEL_NO_MULT_STRIDE_6 ":\n"  // in Loop: Header=BB227_7 Depth=3
-        "mov    v8.16b, v24.16b\n"
-        "mov    v10.16b, v24.16b\n"
-        ".word 0x4e9e9728  // sdot   v8.4s, v25.16b, v30.16b\n"
-        ".word 0x4e9d9748  // sdot   v8.4s, v26.16b, v29.16b\n"
-        ".word 0x4e9c972a  // sdot   v10.4s, v25.16b, v28.16b\n"
-        ".word 0x4e9c9768  // sdot   v8.4s, v27.16b, v28.16b\n"
-        ".word 0x4e9f974a  // sdot   v10.4s, v26.16b, v31.16b\n"
-        ".word 0x4e89976a  // sdot   v10.4s, v27.16b, v9.16b\n"
-        "sqrdmulh        v8.4s, v8.4s, v3.4s\n"
+        // implicit-def: $q17
+        // implicit-def: $q6
+        // implicit-def: $q11
+        // implicit-def: $q13
+        // implicit-def: $q14
+        // implicit-def: $q15
+        // implicit-def: $q20
+        "b      " DC_KERNEL_NO_MULT_STRIDE_4 "f\n"
+        DC_KERNEL_NO_MULT_STRIDE_2 ":\n"  // in Loop: Header=BB227_4 Depth=1
+        "add    x25, %[bias_data], #32\n"  // =32
+        "mov    v22.16b, v12.16b\n"
+        DC_KERNEL_NO_MULT_STRIDE_3 ":\n"  // in Loop: Header=BB227_4 Depth=1
+        "add    x10, x10, #1\n"  // =1
+        "cmp    x10, x8\n"
+        "add    x9, x9, #8\n"  // =8
+        "mov    %[bias_data], x25\n"
+        "b.eq   " DC_KERNEL_NO_MULT_STRIDE_35 "f\n"
+        DC_KERNEL_NO_MULT_STRIDE_4 ":\n"  // =>This Loop Header: Depth=1
+        // Child Loop BB227_30 Depth 2
+        // Child Loop BB227_22 Depth 2
+        // Child Loop BB227_7 Depth 2
+        // Child Loop BB227_10 Depth 2
+        // Child Loop BB227_13 Depth 2
+        // Child Loop BB227_26 Depth 2
+        "ldr    x15, [sp, #152]\n"  // 8-byte Folded Reload
+        "add    w14, w10, w10, lsl #1\n"
+        "lsl    w14, w14, #5\n"
+        "cmp    w1, #2\n"  // =2
+        "add    x27, x15, x14\n"
+        "madd   x26, x10, %[function_params], %[scratch_block_data]\n"
+        "b.ne   " DC_KERNEL_NO_MULT_STRIDE_15 "f\n"
+        // %bb.5:        // in Loop: Header=BB227_4 Depth=1
+        "ubfx   x14, x9, #3, #29\n"
+        "lsl    x25, x14, #3\n"
+        "ldr    x14, [sp, #88]\n"  // 8-byte Folded Reload
+        "ldr    q24, [x27]\n"
+        "ldr    q25, [x27, #32]\n"
+        "ldr    q26, [x27, #64]\n"
+        "add    x24, x14, x25\n"
+        "ldr    x14, [sp, #144]\n"  // 8-byte Folded Reload
+        "ldr    q27, [%[bias_data]]\n"
+        "ldr    q31, [x26]\n"
+        "ldr    q8, [x26, x12]\n"
+        "ldr    q30, [x26, x5]\n"
+        "ldr    q29, [x26, x19]\n"
+        "ldr    q28, [x26, x7]\n"
+        "lsl    w15, w10, #3\n"
+        "cmp    w23, #1\n"  // =1
+        "add    x28, x14, x15\n"
+        "mov    v12.16b, v22.16b\n"
+        "mov    w14, wzr\n"
+        "b.lt   " DC_KERNEL_NO_MULT_STRIDE_9 "f\n"
+        // %bb.6:        // in Loop: Header=BB227_4 Depth=1
+        "mov    x17, xzr\n"
+        "add    x22, x26, #32\n"  // =32
+        "mov    x21, x23\n"
+        "mov    v19.16b, v30.16b\n"
+        DC_KERNEL_NO_MULT_STRIDE_7 ":\n"  // Parent Loop BB227_4 Depth=1
+        // =>  This Inner Loop Header: Depth=2
+        "mov    v20.16b, v27.16b\n"
+        "mov    v21.16b, v27.16b\n"
+        ".word 0x4e9f9714  // sdot   v20.4s, v24.16b, v31.16b\n"
+        ".word 0x4e939715  // sdot   v21.4s, v24.16b, v19.16b\n"
+        ".word 0x4e889734  // sdot   v20.4s, v25.16b, v8.16b\n"
+        ".word 0x4e9d9735  // sdot   v21.4s, v25.16b, v29.16b\n"
+        ".word 0x4e939754  // sdot   v20.4s, v26.16b, v19.16b\n"
+        ".word 0x4e9c9755  // sdot   v21.4s, v26.16b, v28.16b\n"
+        "sqrdmulh        v20.4s, v20.4s, v3.4s\n"
+        "and    %[output_block_data], x17, #0xffffffe0\n"
+        "sqrdmulh        v21.4s, v21.4s, v3.4s\n"
+        "sqrshl v20.4s, v20.4s, v4.4s\n"
+        "add    %[output_block_data], x22, x3\n"
+        "sqrshl v21.4s, v21.4s, v4.4s\n"
+        "sqxtn  v20.4h, v20.4s\n"
+        "rev32  v22.8h, v31.8h\n"
+        "rev32  v23.8h, v8.8h\n"
+        "rev32  v9.8h, v30.8h\n"
+        "rev32  v10.8h, v29.8h\n"
+        "ldr    q31, [%[output_block_data]]\n"
+        "ldr    q8, [%[output_block_data], x12]\n"
+        "ldr    q30, [%[output_block_data], x5]\n"
+        "ldr    q29, [%[output_block_data], x19]\n"
+        "rev32  v19.8h, v28.8h\n"
+        "ldr    q28, [%[output_block_data], x7]\n"
+        "sqxtn2 v20.8h, v21.4s\n"
+        "sqadd  v20.8h, v20.8h, v0.8h\n"
+        "sqxtun v20.8b, v20.8h\n"
+        "add    x15, x28, w14, sxtw\n"
+        "umax   v20.8b, v20.8b, v1.8b\n"
+        "add    %[output_block_data], x15, x11\n"
+        "umin   v20.8b, v20.8b, v2.8b\n"
+        "mov    v11.16b, v27.16b\n"
+        "str    s20, [x15]\n"
+        "st1    { v20.s }[1], [%[output_block_data]]\n"
+        "trn1   v20.8h, v22.8h, v31.8h\n"
+        "mov    v21.16b, v27.16b\n"
+        "trn1   v22.8h, v23.8h, v8.8h\n"
+        "trn1   v23.8h, v9.8h, v30.8h\n"
+        ".word 0x4e94970b  // sdot   v11.4s, v24.16b, v20.16b\n"
+        "trn1   v9.8h, v10.8h, v29.8h\n"
+        ".word 0x4e979715  // sdot   v21.4s, v24.16b, v23.16b\n"
+        ".word 0x4e96972b  // sdot   v11.4s, v25.16b, v22.16b\n"
+        "trn1   v19.8h, v19.8h, v28.8h\n"
+        ".word 0x4e899735  // sdot   v21.4s, v25.16b, v9.16b\n"
+        ".word 0x4e97974b  // sdot   v11.4s, v26.16b, v23.16b\n"
+        ".word 0x4e939755  // sdot   v21.4s, v26.16b, v19.16b\n"
+        "sqrdmulh        v19.4s, v11.4s, v3.4s\n"
+        "sqrdmulh        v20.4s, v21.4s, v3.4s\n"
+        "sqrshl v19.4s, v19.4s, v4.4s\n"
+        "sqrshl v20.4s, v20.4s, v4.4s\n"
+        "sqxtn  v19.4h, v19.4s\n"
+        "sqxtn2 v19.8h, v20.4s\n"
+        "sqadd  v19.8h, v19.8h, v0.8h\n"
+        "sqxtun v19.8b, v19.8h\n"
+        "add    x15, x15, x6\n"
+        "umax   v19.8b, v19.8b, v1.8b\n"
+        "add    %[output_block_data], x15, x11\n"
+        "umin   v19.8b, v19.8b, v2.8b\n"
+        "add    x17, x17, #32\n"  // =32
+        "subs   x21, x21, #1\n"  // =1
+        "str    s19, [x15]\n"
+        "st1    { v19.s }[1], [%[output_block_data]]\n"
+        "add    w14, w14, w20\n"
+        "mov    v19.16b, v30.16b\n"
+        "b.ne   " DC_KERNEL_NO_MULT_STRIDE_7 "b\n"
+        // %bb.8:        // in Loop: Header=BB227_4 Depth=1
+        "mov    v20.16b, v31.16b\n"
+        "mov    v15.16b, v8.16b\n"
+        "mov    v14.16b, v30.16b\n"
+        "mov    v13.16b, v29.16b\n"
+        "mov    v11.16b, v28.16b\n"
+        "mov    w14, w23\n"
+        DC_KERNEL_NO_MULT_STRIDE_9 ":\n"  // in Loop: Header=BB227_4 Depth=1
+        "cmp    w14, w13\n"
+        "ldr    x14, [sp, #136]\n"  // 8-byte Folded Reload
+        "b.ge   " DC_KERNEL_NO_MULT_STRIDE_11 "f\n"
+        DC_KERNEL_NO_MULT_STRIDE_10 ":\n"  // Parent Loop BB227_4 Depth=1
+        // =>  This Inner Loop Header: Depth=2
+        "mov    v9.16b, v27.16b\n"
+        "mov    v10.16b, v27.16b\n"
+        ".word 0x4e9f9709  // sdot   v9.4s, v24.16b, v31.16b\n"
+        ".word 0x4e889729  // sdot   v9.4s, v25.16b, v8.16b\n"
+        ".word 0x4e9e970a  // sdot   v10.4s, v24.16b, v30.16b\n"
+        ".word 0x4e9e9749  // sdot   v9.4s, v26.16b, v30.16b\n"
+        ".word 0x4e9d972a  // sdot   v10.4s, v25.16b, v29.16b\n"
+        ".word 0x4e9c974a  // sdot   v10.4s, v26.16b, v28.16b\n"
+        "sqrdmulh        v9.4s, v9.4s, v3.4s\n"
         "sqrdmulh        v10.4s, v10.4s, v3.4s\n"
-        "sqrshl v8.4s, v8.4s, v4.4s\n"
+        "sqrshl v9.4s, v9.4s, v4.4s\n"
         "sqrshl v10.4s, v10.4s, v4.4s\n"
-        "sqxtn  v8.4h, v8.4s\n"
-        "sqxtn2 v8.8h, v10.4s\n"
-        "sqadd  v8.8h, v8.8h, v0.8h\n"
-        "sqxtun v8.8b, v8.8h\n"
-        "umax   v8.8b, v8.8b, v1.8b\n"
-        "add    x12, x28, x24\n"
+        "sqxtn  v9.4h, v9.4s\n"
+        "sqxtn2 v9.8h, v10.4s\n"
+        "sqadd  v9.8h, v9.8h, v0.8h\n"
+        "sqxtun v9.8b, v9.8h\n"
+        "umax   v9.8b, v9.8b, v1.8b\n"
+        "rev32  v31.8h, v31.8h\n"
+        "rev32  v8.8h, v8.8h\n"
         "rev32  v30.8h, v30.8h\n"
         "rev32  v29.8h, v29.8h\n"
         "rev32  v28.8h, v28.8h\n"
-        "rev32  v31.8h, v31.8h\n"
-        "rev32  v9.8h, v9.8h\n"
-        "umin   v8.8b, v8.8b, v2.8b\n"
-        "add    x30, x30, #1\n"  // =1
-        "trn1   v30.8h, v30.8h, v19.8h\n"
-        "trn1   v29.8h, v29.8h, v20.8h\n"
-        "trn1   v31.8h, v31.8h, v22.8h\n"
-        "trn1   v28.8h, v28.8h, v21.8h\n"
-        "trn1   v9.8h, v9.8h, v23.8h\n"
-        "str    s8, [x22, x24]\n"
-        "st1    { v8.s }[1], [x12]\n"
-        "add    x24, x24, x7\n"
-        DC_KERNEL_NO_MULT_STRIDE_7 ":\n"  // Parent Loop BB227_19 Depth=1
-        // Parent Loop BB227_9 Depth=2
-        // =>  This Inner Loop Header: Depth=3
-        "cmp    x30, x11\n"
-        "b.lt   " DC_KERNEL_NO_MULT_STRIDE_6 "b\n"
-        // %bb.8:        // in Loop: Header=BB227_9 Depth=2
-        "add    x29, x29, #16\n"  // =16
-        "add    x25, x25, #1\n"  // =1
-        "add    x15, x15, #4\n"  // =4
-        DC_KERNEL_NO_MULT_STRIDE_9 ":\n"  // Parent Loop BB227_19 Depth=1
-        // =>  This Loop Header: Depth=2
-        // Child Loop BB227_5 Depth 3
-        // Child Loop BB227_7 Depth 3
-        "cmp    x25, #2\n"  // =2
-        "b.ne   " DC_KERNEL_NO_MULT_STRIDE_3 "b\n"
-        // %bb.10:        // in Loop: Header=BB227_19 Depth=1
-        "ldr    %[filter_workspace], [sp, #48]\n"  // 8-byte Folded Reload
-        "ldp    %[scratch_block_data], %[output_block_data], [sp, #32]\n"  // 16-byte Folded Reload
-        "ldr    x26, [sp, #24]\n"  // 8-byte Folded Reload
-        "ldr    w27, [sp, #20]\n"  // 4-byte Folded Reload
-        "ldr    x28, [sp, #8]\n"  // 8-byte Folded Reload
-        "b      " DC_KERNEL_NO_MULT_STRIDE_18 "f\n"
-        DC_KERNEL_NO_MULT_STRIDE_11 ":\n"  // in Loop: Header=BB227_19 Depth=1
-        "mul    w12, w28, w8\n"
-        "add    x12, %[scratch_block_data], w12, sxtw\n"
-        "add    x16, x12, x10\n"
-        "ldp    q8, q9, [x16]\n"
-        "add    x16, x12, x17\n"
-        "ldp    q24, q25, [x23]\n"
-        "ldp    q26, q27, [x23, #32]\n"
-        "ldp    q28, q29, [x23, #64]\n"
-        "ldp    q10, q12, [x16]\n"
-        "ldp    q30, q31, [%[bias_data]]\n"
-        "ldp    q13, q11, [x12]\n"
-        "mov    x24, xzr\n"
-        "add    x23, x12, #32\n"  // =32
-        "b      " DC_KERNEL_NO_MULT_STRIDE_17 "f\n"
-        DC_KERNEL_NO_MULT_STRIDE_12 ":\n"  // in Loop: Header=BB227_17 Depth=2
-        "cmp    w11, w14\n"
-        "ccmp   x21, x15, #0, eq\n"
-        "b.eq   " DC_KERNEL_NO_MULT_STRIDE_14 "f\n"
-        // %bb.13:        // in Loop: Header=BB227_17 Depth=2
-        "and    x12, x15, #0xffffffe0\n"
-        "add    x12, x23, x12\n"
-        "add    x16, x12, x10\n"
-        "add    x25, x12, x17\n"
-        "ldp    q5, q7, [x12]\n"
-        "ldp    q6, q17, [x16]\n"
-        "ldp    q16, q18, [x25]\n"
-        DC_KERNEL_NO_MULT_STRIDE_14 ":\n"  // in Loop: Header=BB227_17 Depth=2
-        "mov    v14.16b, v30.16b\n"
-        "mov    v15.16b, v31.16b\n"
-        ".word 0x4e8d970e  // sdot   v14.4s, v24.16b, v13.16b\n"
-        ".word 0x4e88974e  // sdot   v14.4s, v26.16b, v8.16b\n"
-        ".word 0x4e8b972f  // sdot   v15.4s, v25.16b, v11.16b\n"
-        ".word 0x4e8a978e  // sdot   v14.4s, v28.16b, v10.16b\n"
-        ".word 0x4e89976f  // sdot   v15.4s, v27.16b, v9.16b\n"
-        ".word 0x4e8c97af  // sdot   v15.4s, v29.16b, v12.16b\n"
-        "sqrdmulh        v14.4s, v14.4s, v3.4s\n"
-        "sqrdmulh        v15.4s, v15.4s, v3.4s\n"
-        "sqrshl v14.4s, v14.4s, v4.4s\n"
-        "sqrshl v15.4s, v15.4s, v4.4s\n"
-        "sqxtn  v14.4h, v14.4s\n"
-        "sqxtn2 v14.8h, v15.4s\n"
-        "sqadd  v14.8h, v14.8h, v0.8h\n"
-        "sqxtun v14.8b, v14.8h\n"
-        "rev32  v13.8h, v13.8h\n"
-        "rev32  v8.8h, v8.8h\n"
-        "rev32  v10.8h, v10.8h\n"
-        "rev32  v11.8h, v11.8h\n"
-        "rev32  v9.8h, v9.8h\n"
-        "rev32  v12.8h, v12.8h\n"
+        "umin   v9.8b, v9.8b, v2.8b\n"
+        "add    x15, x24, x11\n"
+        "subs   x14, x14, #1\n"  // =1
+        "trn1   v31.8h, v31.8h, v20.8h\n"
+        "trn1   v8.8h, v8.8h, v15.8h\n"
+        "trn1   v29.8h, v29.8h, v13.8h\n"
+        "trn1   v30.8h, v30.8h, v14.8h\n"
+        "trn1   v28.8h, v28.8h, v11.8h\n"
+        "str    s9, [x24]\n"
+        "add    x24, x24, x20\n"
+        "st1    { v9.s }[1], [x15]\n"
+        "b.ne   " DC_KERNEL_NO_MULT_STRIDE_10 "b\n"
+        DC_KERNEL_NO_MULT_STRIDE_11 ":\n"  // in Loop: Header=BB227_4 Depth=1
+        "ldr    q24, [x27, #16]\n"
+        "ldr    q25, [x27, #48]\n"
+        "ldr    q26, [x27, #80]\n"
+        "ldr    q30, [x26, #16]!\n"
+        "ldr    q27, [%[bias_data], #16]\n"
+        "cmp    w23, #0\n"  // =0
+        "ldr    q8, [x26, x12]\n"
+        "ldr    q31, [x26, x5]\n"
+        "ldr    q29, [x26, x19]\n"
+        "ldr    q28, [x26, x7]\n"
+        "b.le   " DC_KERNEL_NO_MULT_STRIDE_24 "f\n"
+        // %bb.12:        // in Loop: Header=BB227_4 Depth=1
+        "mov    w14, wzr\n"
+        "mov    x17, xzr\n"
+        "add    x22, x26, #32\n"  // =32
+        "add    x24, x28, #4\n"  // =4
+        "mov    x21, x23\n"
+        "mov    v19.16b, v31.16b\n"
+        DC_KERNEL_NO_MULT_STRIDE_13 ":\n"  // Parent Loop BB227_4 Depth=1
+        // =>  This Inner Loop Header: Depth=2
+        "mov    v5.16b, v27.16b\n"
+        "mov    v20.16b, v27.16b\n"
+        ".word 0x4e9e9705  // sdot   v5.4s, v24.16b, v30.16b\n"
+        ".word 0x4e939714  // sdot   v20.4s, v24.16b, v19.16b\n"
+        ".word 0x4e889725  // sdot   v5.4s, v25.16b, v8.16b\n"
+        ".word 0x4e9d9734  // sdot   v20.4s, v25.16b, v29.16b\n"
+        ".word 0x4e939745  // sdot   v5.4s, v26.16b, v19.16b\n"
+        ".word 0x4e9c9754  // sdot   v20.4s, v26.16b, v28.16b\n"
+        "sqrdmulh        v5.4s, v5.4s, v3.4s\n"
+        "and    %[output_block_data], x17, #0xffffffe0\n"
+        "sqrdmulh        v20.4s, v20.4s, v3.4s\n"
+        "sqrshl v5.4s, v5.4s, v4.4s\n"
+        "add    %[output_block_data], x22, x3\n"
+        "sqrshl v20.4s, v20.4s, v4.4s\n"
+        "sqxtn  v5.4h, v5.4s\n"
+        "rev32  v21.8h, v30.8h\n"
+        "rev32  v22.8h, v8.8h\n"
+        "rev32  v23.8h, v31.8h\n"
+        "rev32  v9.8h, v29.8h\n"
+        "ldr    q30, [%[output_block_data]]\n"
+        "ldr    q8, [%[output_block_data], x12]\n"
+        "ldr    q31, [%[output_block_data], x5]\n"
+        "ldr    q29, [%[output_block_data], x19]\n"
+        "rev32  v19.8h, v28.8h\n"
+        "ldr    q28, [%[output_block_data], x7]\n"
+        "sqxtn2 v5.8h, v20.4s\n"
+        "sqadd  v5.8h, v5.8h, v0.8h\n"
+        "sqxtun v5.8b, v5.8h\n"
+        "add    x15, x24, w14, sxtw\n"
+        "umax   v5.8b, v5.8b, v1.8b\n"
+        "add    %[output_block_data], x15, x11\n"
+        "umin   v5.8b, v5.8b, v2.8b\n"
+        "mov    v10.16b, v27.16b\n"
+        "str    s5, [x15]\n"
+        "st1    { v5.s }[1], [%[output_block_data]]\n"
+        "trn1   v5.8h, v21.8h, v30.8h\n"
+        "mov    v20.16b, v27.16b\n"
+        "trn1   v21.8h, v22.8h, v8.8h\n"
+        "trn1   v22.8h, v23.8h, v31.8h\n"
+        ".word 0x4e85970a  // sdot   v10.4s, v24.16b, v5.16b\n"
+        "trn1   v23.8h, v9.8h, v29.8h\n"
+        ".word 0x4e969714  // sdot   v20.4s, v24.16b, v22.16b\n"
+        ".word 0x4e95972a  // sdot   v10.4s, v25.16b, v21.16b\n"
+        "trn1   v19.8h, v19.8h, v28.8h\n"
+        ".word 0x4e979734  // sdot   v20.4s, v25.16b, v23.16b\n"
+        ".word 0x4e96974a  // sdot   v10.4s, v26.16b, v22.16b\n"
+        ".word 0x4e939754  // sdot   v20.4s, v26.16b, v19.16b\n"
+        "sqrdmulh        v5.4s, v10.4s, v3.4s\n"
+        "sqrdmulh        v19.4s, v20.4s, v3.4s\n"
+        "sqrshl v5.4s, v5.4s, v4.4s\n"
+        "sqrshl v19.4s, v19.4s, v4.4s\n"
+        "sqxtn  v5.4h, v5.4s\n"
+        "sqxtn2 v5.8h, v19.4s\n"
+        "sqadd  v5.8h, v5.8h, v0.8h\n"
+        "sqxtun v5.8b, v5.8h\n"
+        "add    x15, x15, x6\n"
+        "umax   v5.8b, v5.8b, v1.8b\n"
+        "add    x17, x17, #32\n"  // =32
+        "subs   x21, x21, #1\n"  // =1
+        "add    %[output_block_data], x15, x11\n"
+        "umin   v5.8b, v5.8b, v2.8b\n"
+        "add    w14, w14, w20\n"
+        "mov    v19.16b, v31.16b\n"
+        "str    s5, [x15]\n"
+        "st1    { v5.s }[1], [%[output_block_data]]\n"
+        "b.ne   " DC_KERNEL_NO_MULT_STRIDE_13 "b\n"
+        // %bb.14:        // in Loop: Header=BB227_4 Depth=1
+        "mov    v20.16b, v30.16b\n"
+        "mov    v15.16b, v8.16b\n"
+        "mov    v14.16b, v31.16b\n"
+        "mov    v13.16b, v29.16b\n"
+        "mov    v11.16b, v28.16b\n"
+        "mov    w14, w23\n"
+        "cmp    w14, w13\n"
+        "b.ge   " DC_KERNEL_NO_MULT_STRIDE_2 "b\n"
+        "b      " DC_KERNEL_NO_MULT_STRIDE_25 "f\n"
+        DC_KERNEL_NO_MULT_STRIDE_15 ":\n"  // in Loop: Header=BB227_4 Depth=1
         "cmp    w13, #1\n"  // =1
-        "umax   v14.8b, v14.8b, v1.8b\n"
-        "trn1   v13.8h, v13.8h, v5.8h\n"
-        "trn1   v11.8h, v11.8h, v7.8h\n"
-        "ccmp   x21, x15, #0, le\n"
-        "trn1   v8.8h, v8.8h, v6.8h\n"
-        "trn1   v9.8h, v9.8h, v17.8h\n"
-        "trn1   v10.8h, v10.8h, v16.8h\n"
-        "umin   v14.8b, v14.8b, v2.8b\n"
-        "trn1   v12.8h, v12.8h, v18.8h\n"
-        "str    d14, [x22]\n"
-        "b.eq   " DC_KERNEL_NO_MULT_STRIDE_16 "f\n"
-        // %bb.15:        // in Loop: Header=BB227_17 Depth=2
+        "add    x25, %[bias_data], #32\n"  // =32
+        "b.lt   " DC_KERNEL_NO_MULT_STRIDE_3 "b\n"
+        // %bb.16:        // in Loop: Header=BB227_4 Depth=1
+        "stp    q13, q11, [sp, #96]\n"  // 32-byte Folded Spill
+        "add    x15, x26, x12\n"
+        "ldp    q9, q10, [x15]\n"
+        "ldr    x15, [sp, #144]\n"  // 8-byte Folded Reload
+        "lsl    w14, w10, #3\n"
+        "ldp    q30, q31, [%[bias_data]]\n"
+        "add    x17, x26, x5\n"
+        "add    %[bias_data], x15, x14\n"
+        "ldr    w14, [sp, #84]\n"  // 4-byte Folded Reload
+        "ldp    q24, q25, [x27]\n"
+        "ldp    q26, q27, [x27, #32]\n"
+        "ldp    q28, q29, [x27, #64]\n"
+        "ldp    q12, q11, [x26], #32\n"
+        "ldp    q8, q13, [x17]\n"
+        "cmp    w13, w14\n"
+        "b.ne   " DC_KERNEL_NO_MULT_STRIDE_27 "f\n"
+        // %bb.17:        // in Loop: Header=BB227_4 Depth=1
+        "ldr    x14, [sp, #72]\n"  // 8-byte Folded Reload
+        "mov    x24, xzr\n"
+        "mov    w27, wzr\n"
+        "mov    x28, x13\n"
+        "mov    v19.16b, v15.16b\n"
+        "mov    v5.16b, v14.16b\n"
+        "cbnz   x14,    " DC_KERNEL_NO_MULT_STRIDE_21 "f\n"
+        "b      " DC_KERNEL_NO_MULT_STRIDE_22 "f\n"
+        DC_KERNEL_NO_MULT_STRIDE_18 ":\n"  // in Loop: Header=BB227_22 Depth=2
         "mov    v14.16b, v30.16b\n"
-        ".word 0x4e8d970e  // sdot   v14.4s, v24.16b, v13.16b\n"
-        "mov    v13.16b, v31.16b\n"
-        ".word 0x4e8b972d  // sdot   v13.4s, v25.16b, v11.16b\n"
-        ".word 0x4e88974e  // sdot   v14.4s, v26.16b, v8.16b\n"
-        ".word 0x4e89976d  // sdot   v13.4s, v27.16b, v9.16b\n"
-        ".word 0x4e8a978e  // sdot   v14.4s, v28.16b, v10.16b\n"
-        ".word 0x4e8c97ad  // sdot   v13.4s, v29.16b, v12.16b\n"
+        ".word 0x4e8c970e  // sdot   v14.4s, v24.16b, v12.16b\n"
+        "mov    v12.16b, v31.16b\n"
+        ".word 0x4e8b972c  // sdot   v12.4s, v25.16b, v11.16b\n"
+        ".word 0x4e89974e  // sdot   v14.4s, v26.16b, v9.16b\n"
+        ".word 0x4e8a976c  // sdot   v12.4s, v27.16b, v10.16b\n"
+        ".word 0x4e88978e  // sdot   v14.4s, v28.16b, v8.16b\n"
+        ".word 0x4e8d97ac  // sdot   v12.4s, v29.16b, v13.16b\n"
         "sqrdmulh        v8.4s, v14.4s, v3.4s\n"
-        "sqrdmulh        v9.4s, v13.4s, v3.4s\n"
+        "sqrdmulh        v9.4s, v12.4s, v3.4s\n"
         "sqrshl v8.4s, v8.4s, v4.4s\n"
         "sqrshl v9.4s, v9.4s, v4.4s\n"
         "sqxtn  v8.4h, v8.4s\n"
@@ -8205,34 +8396,220 @@ struct KernelMacroBlock<DepthwiseConvImplementation::kUseNeon3x3DotProduct,
         "sqxtun v8.8b, v8.8h\n"
         "umax   v8.8b, v8.8b, v1.8b\n"
         "umin   v8.8b, v8.8b, v2.8b\n"
-        "str    d8, [x22, %[function_params]]\n"
-        "mov    v13.16b, v5.16b\n"
-        "mov    v8.16b, v6.16b\n"
-        "mov    v10.16b, v16.16b\n"
-        "mov    v11.16b, v7.16b\n"
+        "str    d8, [x15, x6]\n"
+        "mov    v12.16b, v6.16b\n"
         "mov    v9.16b, v17.16b\n"
-        "mov    v12.16b, v18.16b\n"
-        DC_KERNEL_NO_MULT_STRIDE_16 ":\n"  // in Loop: Header=BB227_17 Depth=2
-        "add    x24, x24, #1\n"  // =1
-        "add    x22, x22, x7\n"
-        "add    x15, x15, #32\n"  // =32
-        DC_KERNEL_NO_MULT_STRIDE_17 ":\n"  // Parent Loop BB227_19 Depth=1
+        "mov    v8.16b, v18.16b\n"
+        "mov    v11.16b, v22.16b\n"
+        "mov    v10.16b, v7.16b\n"
+        "mov    v13.16b, v16.16b\n"
+        DC_KERNEL_NO_MULT_STRIDE_19 ":\n"  // in Loop: Header=BB227_22 Depth=2
+        "mov    v14.16b, v5.16b\n"
+        "mov    v15.16b, v19.16b\n"
+        "add    w27, w27, w20\n"
+        "add    x24, x24, #32\n"  // =32
+        "subs   x28, x28, #1\n"  // =1
+        "sub    x14, x14, #1\n"  // =1
+        "b.eq   " DC_KERNEL_NO_MULT_STRIDE_33 "f\n"
+        // %bb.20:        // in Loop: Header=BB227_22 Depth=2
+        "mov    v19.16b, v15.16b\n"
+        "mov    v5.16b, v14.16b\n"
+        "cbz    x14,    " DC_KERNEL_NO_MULT_STRIDE_22 "f\n"
+        DC_KERNEL_NO_MULT_STRIDE_21 ":\n"  // in Loop: Header=BB227_4 Depth=1
+        "and    x15, x24, #0xffffffe0\n"
+        "add    x15, x26, x15\n"
+        "add    x17, x15, x12\n"
+        "add    %[output_block_data], x15, x5\n"
+        "ldp    q6, q22, [x15]\n"
+        "ldp    q17, q7, [x17]\n"
+        "ldp    q18, q16, [%[output_block_data]]\n"
+        DC_KERNEL_NO_MULT_STRIDE_22 ":\n"  // Parent Loop BB227_4 Depth=1
         // =>  This Inner Loop Header: Depth=2
-        "cmp    x24, x11\n"
-        "b.lt   " DC_KERNEL_NO_MULT_STRIDE_12 "b\n"
-        DC_KERNEL_NO_MULT_STRIDE_18 ":\n"  // in Loop: Header=BB227_19 Depth=1
-        "add    %[bias_data], x2, #32\n"  // =32
-        "add    x8, x8, #1\n"  // =1
-        DC_KERNEL_NO_MULT_STRIDE_19 ":\n"  // =>This Loop Header: Depth=1
-        // Child Loop BB227_17 Depth 2
-        // Child Loop BB227_9 Depth 2
-        // Child Loop BB227_5 Depth 3
-        // Child Loop BB227_7 Depth 3
-        "cmp    x8, x26\n"
-        "b.lt   " DC_KERNEL_NO_MULT_STRIDE_1 "b\n"
-        // %bb.20:
-        // Compiled intrinsics total stack 208, now 64 for spillage only.
-        "add    sp, sp, #64\n"  // =208
+        "mov    v14.16b, v30.16b\n"
+        "mov    v15.16b, v31.16b\n"
+        ".word 0x4e8c970e  // sdot   v14.4s, v24.16b, v12.16b\n"
+        ".word 0x4e89974e  // sdot   v14.4s, v26.16b, v9.16b\n"
+        ".word 0x4e8b972f  // sdot   v15.4s, v25.16b, v11.16b\n"
+        ".word 0x4e88978e  // sdot   v14.4s, v28.16b, v8.16b\n"
+        ".word 0x4e8a976f  // sdot   v15.4s, v27.16b, v10.16b\n"
+        ".word 0x4e8d97af  // sdot   v15.4s, v29.16b, v13.16b\n"
+        "sqrdmulh        v14.4s, v14.4s, v3.4s\n"
+        "sqrdmulh        v15.4s, v15.4s, v3.4s\n"
+        "sqrshl v14.4s, v14.4s, v4.4s\n"
+        "sqrshl v15.4s, v15.4s, v4.4s\n"
+        "sqxtn  v14.4h, v14.4s\n"
+        "sqxtn2 v14.8h, v15.4s\n"
+        "sqadd  v14.8h, v14.8h, v0.8h\n"
+        "sqxtun v14.8b, v14.8h\n"
+        "rev32  v12.8h, v12.8h\n"
+        "rev32  v9.8h, v9.8h\n"
+        "rev32  v8.8h, v8.8h\n"
+        "rev32  v11.8h, v11.8h\n"
+        "rev32  v10.8h, v10.8h\n"
+        "rev32  v13.8h, v13.8h\n"
+        "umax   v14.8b, v14.8b, v1.8b\n"
+        "add    x15, %[bias_data], w27, sxtw\n"
+        "cmp    w16, #1\n"  // =1
+        "trn1   v12.8h, v12.8h, v6.8h\n"
+        "trn1   v11.8h, v11.8h, v22.8h\n"
+        "trn1   v9.8h, v9.8h, v17.8h\n"
+        "trn1   v10.8h, v10.8h, v7.8h\n"
+        "trn1   v8.8h, v8.8h, v18.8h\n"
+        "umin   v14.8b, v14.8b, v2.8b\n"
+        "trn1   v13.8h, v13.8h, v16.8h\n"
+        "str    d14, [x15]\n"
+        "b.gt   " DC_KERNEL_NO_MULT_STRIDE_18 "b\n"
+        // %bb.23:        // in Loop: Header=BB227_22 Depth=2
+        "cbz    x14,    " DC_KERNEL_NO_MULT_STRIDE_19 "b\n"
+        "b      " DC_KERNEL_NO_MULT_STRIDE_18 "b\n"
+        DC_KERNEL_NO_MULT_STRIDE_24 ":\n"  // in Loop: Header=BB227_4 Depth=1
+        "mov    w14, wzr\n"
+        "cmp    w14, w13\n"
+        "b.ge   " DC_KERNEL_NO_MULT_STRIDE_2 "b\n"
+        DC_KERNEL_NO_MULT_STRIDE_25 ":\n"  // in Loop: Header=BB227_4 Depth=1
+        "ldr    x14, [sp, #8]\n"  // 8-byte Folded Reload
+        "ldr    x15, [sp, #136]\n"  // 8-byte Folded Reload
+        "add    x14, x14, x25\n"
+        DC_KERNEL_NO_MULT_STRIDE_26 ":\n"  // Parent Loop BB227_4 Depth=1
+        // =>  This Inner Loop Header: Depth=2
+        "mov    v5.16b, v27.16b\n"
+        "mov    v19.16b, v27.16b\n"
+        ".word 0x4e9e9705  // sdot   v5.4s, v24.16b, v30.16b\n"
+        ".word 0x4e889725  // sdot   v5.4s, v25.16b, v8.16b\n"
+        ".word 0x4e9f9713  // sdot   v19.4s, v24.16b, v31.16b\n"
+        ".word 0x4e9f9745  // sdot   v5.4s, v26.16b, v31.16b\n"
+        ".word 0x4e9d9733  // sdot   v19.4s, v25.16b, v29.16b\n"
+        ".word 0x4e9c9753  // sdot   v19.4s, v26.16b, v28.16b\n"
+        "sqrdmulh        v5.4s, v5.4s, v3.4s\n"
+        "sqrdmulh        v19.4s, v19.4s, v3.4s\n"
+        "sqrshl v5.4s, v5.4s, v4.4s\n"
+        "sqrshl v19.4s, v19.4s, v4.4s\n"
+        "sqxtn  v5.4h, v5.4s\n"
+        "sqxtn2 v5.8h, v19.4s\n"
+        "sqadd  v5.8h, v5.8h, v0.8h\n"
+        "sqxtun v5.8b, v5.8h\n"
+        "umax   v5.8b, v5.8b, v1.8b\n"
+        "mov    v9.16b, v20.16b\n"
+        "rev32  v20.8h, v30.8h\n"
+        "rev32  v21.8h, v8.8h\n"
+        "rev32  v22.8h, v31.8h\n"
+        "rev32  v23.8h, v29.8h\n"
+        "rev32  v28.8h, v28.8h\n"
+        "umin   v5.8b, v5.8b, v2.8b\n"
+        "add    x17, x14, x11\n"
+        "subs   x15, x15, #1\n"  // =1
+        "trn1   v30.8h, v20.8h, v9.8h\n"
+        "mov    v20.16b, v9.16b\n"
+        "trn1   v8.8h, v21.8h, v15.8h\n"
+        "trn1   v29.8h, v23.8h, v13.8h\n"
+        "trn1   v31.8h, v22.8h, v14.8h\n"
+        "trn1   v28.8h, v28.8h, v11.8h\n"
+        "str    s5, [x14]\n"
+        "add    x14, x14, x20\n"
+        "st1    { v5.s }[1], [x17]\n"
+        "b.ne   " DC_KERNEL_NO_MULT_STRIDE_26 "b\n"
+        "b      " DC_KERNEL_NO_MULT_STRIDE_2 "b\n"
+        DC_KERNEL_NO_MULT_STRIDE_27 ":\n"  // in Loop: Header=BB227_4 Depth=1
+        "ldr    x28, [sp, #72]\n"  // 8-byte Folded Reload
+        "mov    w14, wzr\n"
+        "mov    x24, xzr\n"
+        "mov    x27, x13\n"
+        "stp    q20, q15, [sp, #16]\n"  // 32-byte Folded Spill
+        "str    q14, [sp, #48]\n"  // 16-byte Folded Spill
+        "b      " DC_KERNEL_NO_MULT_STRIDE_30 "f\n"
+        DC_KERNEL_NO_MULT_STRIDE_28 ":\n"  // in Loop: Header=BB227_30 Depth=2
+        "mov    v5.16b, v30.16b\n"
+        ".word 0x4e8c9705  // sdot   v5.4s, v24.16b, v12.16b\n"
+        "mov    v19.16b, v31.16b\n"
+        ".word 0x4e8b9733  // sdot   v19.4s, v25.16b, v11.16b\n"
+        ".word 0x4e899745  // sdot   v5.4s, v26.16b, v9.16b\n"
+        ".word 0x4e8a9773  // sdot   v19.4s, v27.16b, v10.16b\n"
+        ".word 0x4e889785  // sdot   v5.4s, v28.16b, v8.16b\n"
+        ".word 0x4e8d97b3  // sdot   v19.4s, v29.16b, v13.16b\n"
+        "sqrdmulh        v5.4s, v5.4s, v3.4s\n"
+        "sqrdmulh        v19.4s, v19.4s, v3.4s\n"
+        "sqrshl v5.4s, v5.4s, v4.4s\n"
+        "sqrshl v19.4s, v19.4s, v4.4s\n"
+        "sqxtn  v5.4h, v5.4s\n"
+        "sqxtn2 v5.8h, v19.4s\n"
+        "sqadd  v5.8h, v5.8h, v0.8h\n"
+        "sqxtun v5.8b, v5.8h\n"
+        "umax   v5.8b, v5.8b, v1.8b\n"
+        "umin   v5.8b, v5.8b, v2.8b\n"
+        "mov    v6.16b, v14.16b\n"
+        "mov    v12.16b, v14.16b\n"
+        "mov    v9.16b, v17.16b\n"
+        "mov    v8.16b, v18.16b\n"
+        "mov    v11.16b, v22.16b\n"
+        "mov    v10.16b, v7.16b\n"
+        "mov    v13.16b, v16.16b\n"
+        "str    d5, [x15, x6]\n"
+        DC_KERNEL_NO_MULT_STRIDE_29 ":\n"  // in Loop: Header=BB227_30 Depth=2
+        "add    x24, x24, #32\n"  // =32
+        "sub    x28, x28, #1\n"  // =1
+        "subs   x27, x27, #1\n"  // =1
+        "add    w14, w14, w20\n"
+        "b.eq   " DC_KERNEL_NO_MULT_STRIDE_34 "f\n"
+        DC_KERNEL_NO_MULT_STRIDE_30 ":\n"  // Parent Loop BB227_4 Depth=1
+        // =>  This Inner Loop Header: Depth=2
+        "mov    v14.16b, v30.16b\n"
+        "mov    v15.16b, v31.16b\n"
+        ".word 0x4e8c970e  // sdot   v14.4s, v24.16b, v12.16b\n"
+        "and    x17, x24, #0xffffffe0\n"
+        ".word 0x4e8b972f  // sdot   v15.4s, v25.16b, v11.16b\n"
+        ".word 0x4e89974e  // sdot   v14.4s, v26.16b, v9.16b\n"
+        "add    x17, x26, x17\n"
+        ".word 0x4e8a976f  // sdot   v15.4s, v27.16b, v10.16b\n"
+        ".word 0x4e88978e  // sdot   v14.4s, v28.16b, v8.16b\n"
+        "rev32  v21.8h, v8.8h\n"
+        "rev32  v6.8h, v11.8h\n"
+        "ldp    q11, q22, [x17]\n"
+        ".word 0x4e8d97af  // sdot   v15.4s, v29.16b, v13.16b\n"
+        "sqrdmulh        v8.4s, v14.4s, v3.4s\n"
+        "rev32  v20.8h, v9.8h\n"
+        "sqrdmulh        v9.4s, v15.4s, v3.4s\n"
+        "sqrshl v8.4s, v8.4s, v4.4s\n"
+        "rev32  v5.8h, v13.8h\n"
+        "add    %[output_block_data], x17, x12\n"
+        "add    x17, x17, x5\n"
+        "sqrshl v9.4s, v9.4s, v4.4s\n"
+        "sqxtn  v13.4h, v8.4s\n"
+        "rev32  v19.8h, v12.8h\n"
+        "ldp    q17, q7, [%[output_block_data]]\n"
+        "ldp    q18, q16, [x17]\n"
+        "sqxtn2 v13.8h, v9.4s\n"
+        "trn1   v12.8h, v19.8h, v11.8h\n"
+        "sqadd  v19.8h, v13.8h, v0.8h\n"
+        "sqxtun v19.8b, v19.8h\n"
+        "rev32  v23.8h, v10.8h\n"
+        "umax   v19.8b, v19.8b, v1.8b\n"
+        "add    x15, %[bias_data], w14, sxtw\n"
+        "cmp    w16, #1\n"  // =1
+        "mov    v14.16b, v11.16b\n"
+        "trn1   v11.8h, v6.8h, v22.8h\n"
+        "trn1   v9.8h, v20.8h, v17.8h\n"
+        "trn1   v8.8h, v21.8h, v18.8h\n"
+        "trn1   v10.8h, v23.8h, v7.8h\n"
+        "umin   v19.8b, v19.8b, v2.8b\n"
+        "trn1   v13.8h, v5.8h, v16.8h\n"
+        "str    d19, [x15]\n"
+        "b.gt   " DC_KERNEL_NO_MULT_STRIDE_28 "b\n"
+        // %bb.31:        // in Loop: Header=BB227_30 Depth=2
+        "cbnz   x28,    " DC_KERNEL_NO_MULT_STRIDE_28 "b\n"
+        // %bb.32:        // in Loop: Header=BB227_30 Depth=2
+        "mov    v6.16b, v14.16b\n"
+        "b      " DC_KERNEL_NO_MULT_STRIDE_29 "b\n"
+        DC_KERNEL_NO_MULT_STRIDE_33 ":\n"  // in Loop: Header=BB227_4 Depth=1
+        "ldp    q13, q11, [sp, #96]\n"  // 32-byte Folded Reload
+        "b      " DC_KERNEL_NO_MULT_STRIDE_3 "b\n"
+        DC_KERNEL_NO_MULT_STRIDE_34 ":\n"  // in Loop: Header=BB227_4 Depth=1
+        "ldp    q13, q11, [sp, #96]\n"  // 32-byte Folded Reload
+        "ldp    q15, q14, [sp, #32]\n"  // 32-byte Folded Reload
+        "ldr    q20, [sp, #16]\n"  // 16-byte Folded Reload
+        "b      " DC_KERNEL_NO_MULT_STRIDE_3 "b\n"
+        DC_KERNEL_NO_MULT_STRIDE_35 ":\n"
+        // Compiled intrinsics total stack 304, now 160 for spillage only.
+        "add    sp, sp, #160\n"  // =304
         :
         // Outputs.
         [ scratch_block_data ] "+r"(scratch_block_data),
@@ -8252,8 +8629,8 @@ struct KernelMacroBlock<DepthwiseConvImplementation::kUseNeon3x3DotProduct,
         "v31",
         // We use these general-purpose registers.
         "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
-        "x16", "x17", "x19", "x21", "x22", "x23", "x24", "x25", "x26", "x27",
-        "x28", "x29", "x30");
+        "x16", "x17", "x19",  "x20", "x21", "x22", "x23", "x24", "x25", "x26",
+        "x27", "x28");
 #endif  // __linux__
   }  // NOLINT(readability/fn_size) Manually unrolled.
 
@@ -8276,6 +8653,22 @@ struct KernelMacroBlock<DepthwiseConvImplementation::kUseNeon3x3DotProduct,
 #undef DC_KERNEL_NO_MULT_STRIDE_17
 #undef DC_KERNEL_NO_MULT_STRIDE_18
 #undef DC_KERNEL_NO_MULT_STRIDE_19
+#undef DC_KERNEL_NO_MULT_STRIDE_20
+#undef DC_KERNEL_NO_MULT_STRIDE_21
+#undef DC_KERNEL_NO_MULT_STRIDE_22
+#undef DC_KERNEL_NO_MULT_STRIDE_23
+#undef DC_KERNEL_NO_MULT_STRIDE_24
+#undef DC_KERNEL_NO_MULT_STRIDE_25
+#undef DC_KERNEL_NO_MULT_STRIDE_26
+#undef DC_KERNEL_NO_MULT_STRIDE_27
+#undef DC_KERNEL_NO_MULT_STRIDE_28
+#undef DC_KERNEL_NO_MULT_STRIDE_29
+#undef DC_KERNEL_NO_MULT_STRIDE_30
+#undef DC_KERNEL_NO_MULT_STRIDE_31
+#undef DC_KERNEL_NO_MULT_STRIDE_32
+#undef DC_KERNEL_NO_MULT_STRIDE_33
+#undef DC_KERNEL_NO_MULT_STRIDE_34
+#undef DC_KERNEL_NO_MULT_STRIDE_35
 
   static void __attribute__((noinline))
   Run(const int8* scratch_block_data, const int8* filter_workspace,
@@ -8325,157 +8718,230 @@ struct KernelMacroBlock<DepthwiseConvImplementation::kUseNeon3x3DotProduct,
 
 #ifdef __linux__
     asm volatile(
-        // Compiled code used block of 160 for spill out of total stack of 288.
-        // However, an 8-byte spill was sneaked in to #168.
-        // Spillage increased to 176 and so the original offset of #168 is OK.
-        "sub    sp, sp, #176\n"  // =288
-        "stp    xzr, %[bias_data], [sp, #32]\n"  // 16-byte Folded Spill
+        // Compiled code used block of 288 for spill out of total stack of 400.
+        // However, an 8-byte spill was sneaked in to #296.
+        // Spillage increased to 304 and these are mapped to #288.
+        "sub    sp, sp, #304\n"  // =400
         "ldr    w8, [%[function_params], #" STR(DP_OFFSET_DEPTH_MICRO_REPEATS) "]\n"
-        "str    %[filter_workspace], [sp, #16]\n"  // 8-byte Folded Spill
-        "ldpsw  x19, x11, [%[function_params], #" STR(DP_OFFSET_OUTPUT_HEIGHT_STRIDE) "]\n"
-        "ldp    w20, w26, [%[function_params], #" STR(DP_OFFSET_OUTPUT_WIDTH_OVERALL_MICRO_REPEATS) "]\n"
-        "str    w8, [sp, #8]\n"  // 4-byte Folded Spill
-        "ldrsw  x8, [%[function_params], #" STR(DP_OFFSET_OUTBOUND_BLOCK_HEIGHT) "]\n"
-        "ldrsw  x15, [%[function_params], #" STR(DP_OFFSET_OUTPUT_DEPTH) "]\n"
-        "add    x12, %[function_params], #" STR(DP_OFFSET_OUTPUT_SHIFT) "\n"  // =36
-        "add    x14, x11, x11, lsl #2\n"
-        "stp    x8, %[output_block_data], [sp, #88]\n"  // 16-byte Folded Spill
+        "str    %[filter_workspace], [sp, #32]\n"  // 8-byte Folded Spill
+        "cmp    w8, #1\n"  // =1
+        "str    w8, [sp, #12]\n"  // 4-byte Folded Spill
+        "b.lt   " DC_KERNEL_MULT_22 "f\n"
+        // %bb.1:
+        "str    wzr, [sp, #28]\n"  // 4-byte Folded Spill
+        "ldpsw  x21, x5, [%[function_params], #" STR(DP_OFFSET_OUTPUT_HEIGHT_STRIDE) "]\n"
         "ldrb   w8, [%[function_params], #" STR(DP_OFFSET_QUANTIZED_ACTIVATION_MIN) "]\n"
-        "ldrb   w9, [%[function_params], #" STR(DP_OFFSET_QUANTIZED_ACTIVATION_MAX) "]\n"
-        "ldr    w17, [%[function_params], #" STR(DP_OFFSET_OUTPUT_RESIDUAL_WIDTH) "]\n"
-        "add    %[filter_workspace], x19, x15, lsl #1\n"
+        "ldrsw  x17, [%[function_params], #" STR(DP_OFFSET_OUTPUT_DEPTH) "]\n"
+        "ldr    w13, [%[function_params], #" STR(DP_OFFSET_OUTPUT_RESIDUAL_WIDTH) "]\n"
+        "add    x11, %[function_params], #" STR(DP_OFFSET_OUTPUT_SHIFT) "\n"  // =36
+        "ldp    w1, w15, [%[function_params], #" STR(DP_OFFSET_OUTPUT_WIDTH_OVERALL_MICRO_REPEATS) "]\n"
+        "add    x10, %[function_params], #" STR(DP_OFFSET_OUTPUT_OFFSET) "\n"  // =28
+        "add    x12, %[function_params], #" STR(DP_OFFSET_OUTPUT_MULTIPLIER) "\n"  // =32
+        "ld1r   { v2.4s }, [x11]\n"
         "dup    v3.16b, w8\n"
-        "dup    v4.16b, w9\n"
-        "dup    v5.8b, w8\n"
-        "dup    v6.8b, w9\n"
-        "add    x8, x19, x19, lsl #1\n"
-        "add    x9, x15, x15, lsl #1\n"
-        "add    %[bias_data], x15, x19, lsl #1\n"
-        "add    x10, %[function_params], #" STR(DP_OFFSET_OUTPUT_MULTIPLIER) "\n"  // =32
-        "add    x13, %[function_params], #" STR(DP_OFFSET_OUTPUT_OFFSET) "\n"  // =28
-        "ld1r   { v2.4s }, [x12]\n"
-        "add    x6, x19, x15\n"
-        "add    x12, %[scratch_block_data], x14\n"
-        "add    x14, x9, x8\n"
-        "add    %[function_params], x9, x19, lsl #1\n"
-        "add    x5, x9, x19\n"
-        "add    x9, %[output_block_data], x9\n"
-        "add    %[filter_workspace], %[output_block_data], x1\n"
-        "add    %[bias_data], %[output_block_data], x2\n"
-        "ld1r   { v0.8h }, [x13]\n"
-        "ld1r   { v1.4s }, [x10]\n"
-        "str    x9, [sp, #56]\n"  // 8-byte Folded Spill
-        "add    x9, x8, x15, lsl #1\n"
-        "str    %[filter_workspace], [sp, #168]\n"  // 8-byte Folded Spill
-        "add    %[filter_workspace], x8, x15\n"
-        "str    %[bias_data], [sp, #152]\n"  // 8-byte Folded Spill
-        "add    %[bias_data], %[output_block_data], x6\n"
-        "add    x21, %[output_block_data], x8\n"
-        "add    x8, %[output_block_data], x14\n"
-        "add    x25, x11, x11, lsl #1\n"
-        "cmp    w17, #4\n"  // =4
-        "lsl    x16, x15, #1\n"
-        "stp    x8, %[bias_data], [sp, #136]\n"  // 16-byte Folded Spill
-        "add    x8, %[output_block_data], %[function_params]\n"
-        "add    x28, %[output_block_data], x9\n"
-        "lsl    x9, x11, #2\n"
-        "add    x10, %[scratch_block_data], x11\n"
-        "add    x23, %[scratch_block_data], x11, lsl #1\n"
-        "add    x13, %[scratch_block_data], x11, lsl #2\n"
-        "ccmp   w26, w20, #0, lt\n"
-        "add    x16, x16, x19, lsl #1\n"
-        "str    x8, [sp, #128]\n"  // 8-byte Folded Spill
-        "add    x8, %[scratch_block_data], x25\n"
-        "str    x9, [sp, #48]\n"  // 8-byte Folded Spill
-        "mov    w9, w26\n"
-        "mov    w7, wzr\n"
-        "add    x22, x13, #4\n"  // =4
-        "add    x23, x23, #4\n"  // =4
-        "add    x24, x10, #4\n"  // =4
-        "add    x27, %[output_block_data], x5\n"
-        "add    x29, %[output_block_data], x16\n"
-        "add    x30, %[output_block_data], %[filter_workspace]\n"
-        "stp    x25, x19, [sp, #72]\n"  // 16-byte Folded Spill
-        "add    x14, x8, #4\n"  // =4
-        "lsl    x8, x11, #1\n"
-        "lsl    x10, x15, #2\n"
-        "add    %[bias_data], %[output_block_data], x15, lsl #1\n"
-        "add    %[filter_workspace], %[output_block_data], x15\n"
-        "add    %[function_params], %[output_block_data], x19, lsl #1\n"
-        "add    x5, %[output_block_data], x19\n"
-        "mov    w25, w20\n"
-        "csel   w9, w26, w20, lt\n"
-        "add    x16, x12, #4\n"  // =4
-        "str    x12, [sp, #64]\n"  // 8-byte Folded Spill
-        "str    %[output_block_data], [sp, #24]\n"  // 8-byte Folded Spill
-        "b      " DC_KERNEL_MULT_22 "f\n"
-        DC_KERNEL_MULT_1 ":\n"  // in Loop: Header=BB205_22 Depth=1
-        "ldr    x12, [sp, #16]\n"  // 8-byte Folded Reload
-        "str    w7, [sp, #12]\n"  // 4-byte Folded Spill
-        "ldr    x13, [sp, #88]\n"  // 8-byte Folded Reload
-        "ldp    q18, q7, [x12]\n"
-        "ldp    q19, q16, [x12, #32]\n"
-        "ldp    q20, q17, [x12, #64]\n"
-        "add    x12, x12, #96\n"  // =96
+        "fmov   s5, w8\n"
+        "lsl    x11, x21, #1\n"
+        "add    x7, x21, x21, lsl #1\n"
+        "lsl    x8, x17, #1\n"
+        "ldr    w16, [%[function_params], #" STR(DP_OFFSET_OUTBOUND_BLOCK_HEIGHT) "]\n"
+        "ld1r   { v0.8h }, [x10]\n"
+        "ld1r   { v1.4s }, [x12]\n"
+        "str    w13, [sp, #272]\n"  // 4-byte Folded Spill
         "cmp    w13, #4\n"  // =4
-        "str    x12, [sp, #16]\n"  // 8-byte Folded Spill
-        "mov    x12, xzr\n"
-        "b.ne   " DC_KERNEL_MULT_12 "f\n"
-        // %bb.2:        // in Loop: Header=BB205_22 Depth=1
-        "ldp    x19, x13, [sp, #32]\n"  // 16-byte Folded Reload
-        "str    x13, [sp, #120]\n"  // 8-byte Folded Spill
-        "b      " DC_KERNEL_MULT_11 "f\n"
-        DC_KERNEL_MULT_3 ":\n"  // in Loop: Header=BB205_11 Depth=2
-        "str    x12, [sp, #112]\n"  // 8-byte Folded Spill
+        "add    x10, x8, x17\n"
+        "add    x6, x8, x7\n"
+        "add    x12, x8, x11\n"
+        "add    x13, x8, x21\n"
+        "add    x8, %[output_block_data], x8\n"
+        "str    x8, [sp, #176]\n"  // 8-byte Folded Spill
+        "add    x8, x7, x17\n"
+        "add    x14, x11, x17\n"
+        "add    x24, %[output_block_data], x8\n"
+        "add    x8, %[output_block_data], x14\n"
+        "add    x14, x5, #4\n"  // =4
+        "ccmp   w15, w1, #0, lt\n"
+        "str    x14, [sp, #136]\n"  // 8-byte Folded Spill
+        "lsl    x14, x17, #2\n"
+        "ldrb   w9, [%[function_params], #" STR(DP_OFFSET_QUANTIZED_ACTIVATION_MAX) "]\n"
+        "csel   w25, w15, w1, lt\n"
+        "cmp    w16, #1\n"  // =1
+        "str    x14, [sp, #128]\n"  // 8-byte Folded Spill
+        "add    x14, %[output_block_data], x21\n"
+        "add    x22, x5, x5, lsl #2\n"
+        "str    x16, [sp, #56]\n"  // 8-byte Folded Spill
+        "cset   w16, lt\n"
+        "cmp    w1, #1\n"  // =1
+        "str    x14, [sp, #120]\n"  // 8-byte Folded Spill
+        "add    x14, %[output_block_data], x17\n"
+        "lsl    x20, x5, #2\n"
+        "str    w1, [sp, #276]\n"  // 4-byte Folded Spill
+        "cset   w1, lt\n"
+        "str    x14, [sp, #112]\n"  // 8-byte Folded Spill
+        "add    x14, x22, #4\n"  // =4
+        "add    x19, x5, x5, lsl #1\n"
+        "orr    w16, w16, w1\n"
+        "str    x14, [sp, #104]\n"  // 8-byte Folded Spill
+        "add    x14, x20, #4\n"  // =4
+        "dup    v4.16b, w9\n"
+        "fmov   s6, w9\n"
+        "lsl    %[function_params], x5, #1\n"
+        "add    x9, x21, x17\n"
+        "str    w16, [sp, #8]\n"  // 4-byte Folded Spill
+        "add    x16, x10, x21\n"
+        "str    x14, [sp, #96]\n"  // 8-byte Folded Spill
+        "add    x14, x19, #4\n"  // =4
+        "mov    x23, xzr\n"
+        "add    x9, %[output_block_data], x9\n"
+        "str    w15, [sp, #268]\n"  // 4-byte Folded Spill
+        "add    x15, x10, x11\n"
+        "add    x27, %[output_block_data], x12\n"
+        "add    x12, %[output_block_data], x16\n"
+        "str    x14, [sp, #88]\n"  // 8-byte Folded Spill
+        "add    x14, %[function_params], #4\n"  // =4
+        "stp    x11, x21, [sp, #184]\n"  // 16-byte Folded Spill
+        "add    x11, %[output_block_data], x11\n"
+        "str    x9, [sp, #168]\n"  // 8-byte Folded Spill
+        "add    x9, x10, x7\n"
+        "add    x26, %[output_block_data], x6\n"
+        "add    x28, %[output_block_data], x13\n"
+        "mov    x13, x23\n"
+        "str    x12, [sp, #144]\n"  // 8-byte Folded Spill
+        "mov    x12, x7\n"
+        "stp    x7, %[output_block_data], [sp, #40]\n"  // 16-byte Folded Spill
+        "stp    x19, x5, [sp, #248]\n"  // 16-byte Folded Spill
+        "stp    x22, x20, [sp, #232]\n"  // 16-byte Folded Spill
+        "stp    x11, x14, [sp, #72]\n"  // 16-byte Folded Spill
+        "add    x11, %[output_block_data], x7\n"
+        "ldp    x7, x6, [sp, #120]\n"  // 16-byte Folded Reload
+        "ldr    x23, [sp, #112]\n"  // 8-byte Folded Reload
+        "ldp    x22, x19, [sp, #88]\n"  // 16-byte Folded Reload
+        "add    x10, %[output_block_data], x10\n"
+        "dup    v5.8b, v5.b[0]\n"
+        "dup    v6.8b, v6.b[0]\n"
+        "str    x10, [sp, #152]\n"  // 8-byte Folded Spill
+        "add    x9, %[output_block_data], x9\n"
+        "add    x10, %[output_block_data], x15\n"
+        "mov    w15, #4\n"
+        "mov    x20, x14\n"
+        "str    %[function_params], [sp, #280]\n"  // 8-byte Folded Spill
+        "str    x11, [sp, #64]\n"  // 8-byte Folded Spill
+        "str    %[scratch_block_data], [sp, #200]\n"  // 8-byte Folded Spill
+        "str    w25, [sp, #164]\n"  // 4-byte Folded Spill
+        "str    x9, [sp, #288]\n"  // 8-byte Folded Spill
+        "b      " DC_KERNEL_MULT_4 "f\n"
+        DC_KERNEL_MULT_2 ":\n"  // in Loop: Header=BB205_4 Depth=1
+        "mov    %[bias_data], x11\n"
+        DC_KERNEL_MULT_3 ":\n"  // in Loop: Header=BB205_4 Depth=1
+        "ldr    w13, [sp, #28]\n"  // 4-byte Folded Reload
+        "ldr    w12, [sp, #12]\n"  // 4-byte Folded Reload
+        "ldr    x11, [sp, #48]\n"  // 8-byte Folded Reload
+        "add    w13, w13, #1\n"  // =1
+        "str    w13, [sp, #28]\n"  // 4-byte Folded Spill
+        "cmp    w13, w12\n"
+        "ldr    x13, [sp, #16]\n"  // 8-byte Folded Reload
+        "add    x11, x11, #8\n"  // =8
+        "str    x11, [sp, #48]\n"  // 8-byte Folded Spill
+        "add    x13, x13, #8\n"  // =8
+        "b.eq   " DC_KERNEL_MULT_22 "f\n"
+        DC_KERNEL_MULT_4 ":\n"  // =>This Loop Header: Depth=1
+        // Child Loop BB205_18 Depth 2
+        // Child Loop BB205_20 Depth 3
+        // Child Loop BB205_21 Depth 4
+        // Child Loop BB205_7 Depth 2
+        // Child Loop BB205_9 Depth 3
+        // Child Loop BB205_13 Depth 3
+        "ldr    x12, [sp, #32]\n"  // 8-byte Folded Reload
+        "ldr    x14, [sp, #56]\n"  // 8-byte Folded Reload
+        "ldp    q20, q7, [x12]\n"
+        "ldp    q19, q16, [x12, #32]\n"
+        "ldp    q18, q17, [x12, #64]\n"
+        "cmp    w14, #4\n"  // =4
+        "add    x12, x12, #96\n"  // =96
+        "str    x12, [sp, #32]\n"  // 8-byte Folded Spill
+        "str    x13, [sp, #16]\n"  // 8-byte Folded Spill
+        "b.ne   " DC_KERNEL_MULT_15 "f\n"
+        // %bb.5:        // in Loop: Header=BB205_4 Depth=1
+        "mov    %[filter_workspace], xzr\n"
+        "mov    x5, x13\n"
+        "b      " DC_KERNEL_MULT_7 "f\n"
+        DC_KERNEL_MULT_6 ":\n"  // in Loop: Header=BB205_7 Depth=2
+        "add    %[filter_workspace], x1, #1\n"  // =1
+        "cmp    %[filter_workspace], #2\n"  // =2
+        "add    x5, x5, #4\n"  // =4
+        "mov    v18.16b, v17.16b\n"
+        "mov    v19.16b, v16.16b\n"
+        "mov    v20.16b, v7.16b\n"
+        "b.eq   " DC_KERNEL_MULT_3 "b\n"
+        DC_KERNEL_MULT_7 ":\n"  // Parent Loop BB205_4 Depth=1
+        // =>  This Loop Header: Depth=2
+        // Child Loop BB205_9 Depth 3
+        // Child Loop BB205_13 Depth 3
+        "ldr    q21, [%[bias_data]], #16\n"
         "ldr    w12, [%[scratch_block_data]]\n"
-        "add    %[output_block_data], %[scratch_block_data], x11\n"
-        "ldr    x7, [sp, #72]\n"  // 8-byte Folded Reload
-        "ldr    w6, [%[scratch_block_data], x8]\n"
-        "fmov   s21, w12\n"
-        "mov    v21.s[1], w12\n"
-        "ld1    { v21.s }[2], [%[output_block_data]]\n"
-        "ldr    %[output_block_data], [sp, #120]\n"  // 8-byte Folded Reload
-        "ldr    w7, [%[scratch_block_data], x7]\n"
-        "fmov   s23, w6\n"
-        "mov    v23.s[1], w6\n"
-        "ldr    q22, [%[output_block_data]]\n"
-        "ldr    %[output_block_data], [sp, #48]\n"  // 8-byte Folded Reload
-        "mov    v23.s[2], w7\n"
-        "dup    v8.4s, w7\n"
-        "dup    v31.4s, w6\n"
-        "ldr    w3, [%[scratch_block_data], %[output_block_data]]\n"
-        "mov    v23.s[3], w6\n"
-        "ldp    x7, x6, [sp, #56]\n"  // 16-byte Folded Reload
-        "mov    v28.16b, v22.16b\n"
-        "fmov   s24, w3\n"
-        "mov    v24.s[1], w3\n"
-        "ld1    { v24.s }[2], [x6]\n"
-        "ldr    x6, [sp, #96]\n"  // 8-byte Folded Reload
-        "mov    v29.16b, v22.16b\n"
-        "mov    v30.16b, v22.16b\n"
-        ".word 0x4e9f969c  // sdot   v28.4s, v20.16b, v31.16b\n"
-        ".word 0x4e9f967d  // sdot   v29.4s, v19.16b, v31.16b\n"
-        ".word 0x4e9f965e  // sdot   v30.4s, v18.16b, v31.16b\n"
-        "mov    v31.16b, v22.16b\n"
-        "mov    x13, xzr\n"
-        "shl    v25.4s, v18.4s, #8\n"
+        "ldp    %[function_params], x13, [sp, #248]\n"  // 16-byte Folded Reload
+        "ldr    x16, [sp, #240]\n"  // 8-byte Folded Reload
+        "ldr    x14, [sp, #280]\n"  // 8-byte Folded Reload
+        "fmov   s22, w12\n"
+        "add    x13, %[scratch_block_data], x13\n"
+        "ldr    w16, [%[scratch_block_data], x16]\n"
+        "mov    v22.s[1], w12\n"
+        "ld1    { v22.s }[2], [x13]\n"
+        "ldr    x13, [sp, #232]\n"  // 8-byte Folded Reload
+        "ldr    w14, [%[scratch_block_data], x14]\n"
+        "fmov   s23, w16\n"
+        "ldr    w4, [%[scratch_block_data], %[function_params]]\n"
+        "add    x13, %[scratch_block_data], x13\n"
+        "mov    v23.s[1], w16\n"
+        "ld1    { v23.s }[2], [x13]\n"
+        "fmov   s24, w14\n"
+        "mov    v24.s[1], w14\n"
+        "dup    v25.4s, w14\n"
+        "mov    v28.16b, v21.16b\n"
+        "mov    v29.16b, v21.16b\n"
+        "mov    v30.16b, v21.16b\n"
+        "dup    v26.4s, w4\n"
+        "mov    v31.16b, v21.16b\n"
+        "mov    v24.s[2], w4\n"
+        "cmp    w25, #1\n"  // =1
+        ".word 0x4e99965c  // sdot   v28.4s, v18.16b, v25.16b\n"
+        ".word 0x4e99967d  // sdot   v29.4s, v19.16b, v25.16b\n"
+        ".word 0x4e99969e  // sdot   v30.4s, v20.16b, v25.16b\n"
+        "mov    v24.s[3], w14\n"
+        "mov    v22.s[3], w12\n"
+        "mov    v23.s[3], w16\n"
+        ".word 0x4e9a969f  // sdot   v31.4s, v20.16b, v26.16b\n"
+        "b.lt   " DC_KERNEL_MULT_14 "f\n"
+        // %bb.8:        // in Loop: Header=BB205_7 Depth=2
+        "stp    %[filter_workspace], %[bias_data], [sp, #216]\n"  // 16-byte Folded Spill
+        "mov    w13, w25\n"
+        "str    x5, [sp, #208]\n"  // 8-byte Folded Spill
+        "mov    x16, x5\n"
+        "mov    x14, %[scratch_block_data]\n"
+        "ldp    x25, %[scratch_block_data], [sp, #168]\n"  // 16-byte Folded Reload
+        "mov    x15, x10\n"
+        "mov    x9, x8\n"
+        "mov    x8, x24\n"
+        "mov    x24, x28\n"
+        "mov    x28, x27\n"
+        "ldp    %[filter_workspace], x27, [sp, #144]\n"  // 16-byte Folded Reload
+        "ldr    x5, [sp, #136]\n"  // 8-byte Folded Reload
+        "ldr    %[bias_data], [sp, #104]\n"  // 8-byte Folded Reload
+        "ldp    x10, x11, [sp, #64]\n"  // 16-byte Folded Reload
+        "shl    v25.4s, v20.4s, #8\n"
         "shl    v26.4s, v19.4s, #8\n"
-        "shl    v27.4s, v20.4s, #8\n"
-        "mov    v21.s[3], w12\n"
-        "mov    v24.s[3], w3\n"
-        ".word 0x4e88965f  // sdot   v31.4s, v18.16b, v8.16b\n"
-        "mov    x12, x19\n"
-        "b      " DC_KERNEL_MULT_5 "f\n"
-        DC_KERNEL_MULT_4 ":\n"  // in Loop: Header=BB205_5 Depth=3
-        ".word 0x4f95e25c  // sdot   v28.4s, v18.16b, v21.4b[0]\n"
-        ".word 0x4f95ea5d  // sdot   v29.4s, v18.16b, v21.4b[2]\n"
-        ".word 0x4f97ea7e  // sdot   v30.4s, v19.16b, v23.4b[2]\n"
-        ".word 0x4f95ea7c  // sdot   v28.4s, v19.16b, v21.4b[2]\n"
-        ".word 0x4f98e27f  // sdot   v31.4s, v19.16b, v24.4b[0]\n"
-        ".word 0x4f97ea9d  // sdot   v29.4s, v20.16b, v23.4b[2]\n"
-        ".word 0x4f98e29e  // sdot   v30.4s, v20.16b, v24.4b[0]\n"
+        "shl    v27.4s, v18.4s, #8\n"
+        DC_KERNEL_MULT_9 ":\n"  // Parent Loop BB205_4 Depth=1
+        // Parent Loop BB205_7 Depth=2
+        // =>  This Inner Loop Header: Depth=3
+        ".word 0x4f96e29c  // sdot   v28.4s, v20.16b, v22.4b[0]\n"
+        ".word 0x4f96ea9d  // sdot   v29.4s, v20.16b, v22.4b[2]\n"
+        ".word 0x4f98ea7e  // sdot   v30.4s, v19.16b, v24.4b[2]\n"
+        ".word 0x4f96ea7c  // sdot   v28.4s, v19.16b, v22.4b[2]\n"
+        ".word 0x4f97e27f  // sdot   v31.4s, v19.16b, v23.4b[0]\n"
+        ".word 0x4f98ea5d  // sdot   v29.4s, v18.16b, v24.4b[2]\n"
+        ".word 0x4f97e25e  // sdot   v30.4s, v18.16b, v23.4b[0]\n"
         "sqrdmulh        v28.4s, v28.4s, v1.4s\n"
-        ".word 0x4f98ea9f  // sdot   v31.4s, v20.16b, v24.4b[2]\n"
+        ".word 0x4f97ea5f  // sdot   v31.4s, v18.16b, v23.4b[2]\n"
         "sqrdmulh        v29.4s, v29.4s, v1.4s\n"
         "sqrdmulh        v30.4s, v30.4s, v1.4s\n"
         "sqrshl v28.4s, v28.4s, v2.4s\n"
@@ -8492,52 +8958,51 @@ struct KernelMacroBlock<DepthwiseConvImplementation::kUseNeon3x3DotProduct,
         "sqxtun v28.8b, v28.8h\n"
         "sqxtun2        v28.16b, v29.8h\n"
         "umax   v28.16b, v28.16b, v3.16b\n"
-        "add    %[output_block_data], x5, x12\n"
+        "add    %[function_params], x7, x16\n"
         "umin   v28.16b, v28.16b, v4.16b\n"
-        "str    s28, [x6, x12]\n"
-        "st1    { v28.s }[1], [%[output_block_data]]\n"
-        "add    %[output_block_data], %[function_params], x12\n"
-        "st1    { v28.s }[2], [%[output_block_data]]\n"
-        "add    %[output_block_data], x21, x12\n"
-        "st1    { v28.s }[3], [%[output_block_data]]\n"
-        "add    %[output_block_data], %[scratch_block_data], x13, lsl #2\n"
-        "add    %[output_block_data], x3, #4\n"  // =4
-        "ld1    { v21.s }[1], [%[output_block_data]]\n"
-        "add    %[output_block_data], x23, x13, lsl #2\n"
-        "ld1    { v23.s }[1], [%[output_block_data]]\n"
-        "add    %[output_block_data], x22, x13, lsl #2\n"
-        "ld1    { v24.s }[1], [%[output_block_data]]\n"
-        "add    %[output_block_data], x24, x13, lsl #2\n"
-        "ld1    { v21.s }[3], [%[output_block_data]]\n"
-        "add    %[output_block_data], x14, x13, lsl #2\n"
-        "ld1    { v23.s }[3], [%[output_block_data]]\n"
-        "add    %[output_block_data], x16, x13, lsl #2\n"
-        "mov    v28.16b, v22.16b\n"
-        "ld1    { v24.s }[3], [%[output_block_data]]\n"
-        "mov    v29.16b, v22.16b\n"
-        "mov    v30.16b, v22.16b\n"
-        ".word 0x4f95e33c  // sdot   v28.4s, v25.16b, v21.4b[0]\n"
-        "mov    v31.16b, v22.16b\n"
-        ".word 0x4f95eb3d  // sdot   v29.4s, v25.16b, v21.4b[2]\n"
-        ".word 0x4f97e33e  // sdot   v30.4s, v25.16b, v23.4b[0]\n"
-        ".word 0x4f95eb5c  // sdot   v28.4s, v26.16b, v21.4b[2]\n"
-        ".word 0x4f97eb3f  // sdot   v31.4s, v25.16b, v23.4b[2]\n"
-        ".word 0x4f97e35d  // sdot   v29.4s, v26.16b, v23.4b[0]\n"
-        ".word 0x4f97eb5e  // sdot   v30.4s, v26.16b, v23.4b[2]\n"
-        ".word 0x4f97e37c  // sdot   v28.4s, v27.16b, v23.4b[0]\n"
-        ".word 0x4f98e35f  // sdot   v31.4s, v26.16b, v24.4b[0]\n"
-        ".word 0x4f97eb7d  // sdot   v29.4s, v27.16b, v23.4b[2]\n"
-        ".word 0x4f98e37e  // sdot   v30.4s, v27.16b, v24.4b[0]\n"
+        "add    x21, x11, x16\n"
+        "str    s28, [%[output_block_data], x16]\n"
+        "st1    { v28.s }[1], [%[function_params]]\n"
+        "add    %[function_params], x10, x16\n"
+        "st1    { v28.s }[2], [x21]\n"
+        "st1    { v28.s }[3], [%[function_params]]\n"
+        "mov    x12, x14\n"
+        "add    x21, x14, x20\n"
+        "ldr    w4, [x14, #4]!\n"
+        "ld1    { v24.s }[1], [x21]\n"
+        "add    x21, x12, x19\n"
+        "ld1    { v23.s }[1], [x21]\n"
+        "mov    v22.s[1], w4\n"
+        "add    %[function_params], x12, x22\n"
+        "ld1    { v24.s }[3], [%[function_params]]\n"
+        "add    %[function_params], x12, x5\n"
+        "ld1    { v22.s }[3], [%[function_params]]\n"
+        "add    x12, x12, %[bias_data]\n"
+        "mov    v28.16b, v21.16b\n"
+        "ld1    { v23.s }[3], [x12]\n"
+        "mov    v29.16b, v21.16b\n"
+        "mov    v30.16b, v21.16b\n"
+        ".word 0x4f96e33c  // sdot   v28.4s, v25.16b, v22.4b[0]\n"
+        "mov    v31.16b, v21.16b\n"
+        ".word 0x4f98e33e  // sdot   v30.4s, v25.16b, v24.4b[0]\n"
+        ".word 0x4f96eb3d  // sdot   v29.4s, v25.16b, v22.4b[2]\n"
+        ".word 0x4f96eb5c  // sdot   v28.4s, v26.16b, v22.4b[2]\n"
+        ".word 0x4f98eb3f  // sdot   v31.4s, v25.16b, v24.4b[2]\n"
+        ".word 0x4f98eb5e  // sdot   v30.4s, v26.16b, v24.4b[2]\n"
+        ".word 0x4f98e35d  // sdot   v29.4s, v26.16b, v24.4b[0]\n"
+        ".word 0x4f98e37c  // sdot   v28.4s, v27.16b, v24.4b[0]\n"
+        ".word 0x4f97e35f  // sdot   v31.4s, v26.16b, v23.4b[0]\n"
+        ".word 0x4f97e37e  // sdot   v30.4s, v27.16b, v23.4b[0]\n"
+        ".word 0x4f98eb7d  // sdot   v29.4s, v27.16b, v24.4b[2]\n"
         "sqrdmulh        v28.4s, v28.4s, v1.4s\n"
-        ".word 0x4f98eb7f  // sdot   v31.4s, v27.16b, v24.4b[2]\n"
-        "sqrdmulh        v29.4s, v29.4s, v1.4s\n"
+        ".word 0x4f97eb7f  // sdot   v31.4s, v27.16b, v23.4b[2]\n"
         "sqrdmulh        v30.4s, v30.4s, v1.4s\n"
+        "sqrdmulh        v29.4s, v29.4s, v1.4s\n"
         "sqrshl v28.4s, v28.4s, v2.4s\n"
         "sqrdmulh        v31.4s, v31.4s, v1.4s\n"
-        "sqrshl v29.4s, v29.4s, v2.4s\n"
         "sqrshl v30.4s, v30.4s, v2.4s\n"
+        "sqrshl v29.4s, v29.4s, v2.4s\n"
         "sqxtn  v28.4h, v28.4s\n"
-        "ldr    %[output_block_data], [sp, #144]\n"  // 8-byte Folded Reload
         "sqrshl v31.4s, v31.4s, v2.4s\n"
         "sqxtn  v30.4h, v30.4s\n"
         "sqxtn2 v28.8h, v29.4s\n"
@@ -8547,43 +9012,41 @@ struct KernelMacroBlock<DepthwiseConvImplementation::kUseNeon3x3DotProduct,
         "sqxtun v28.8b, v28.8h\n"
         "sqxtun2        v28.16b, v29.8h\n"
         "umax   v28.16b, v28.16b, v3.16b\n"
-        "add    %[output_block_data], x3, x12\n"
+        "add    x12, x25, x16\n"
         "umin   v28.16b, v28.16b, v4.16b\n"
-        "str    s28, [%[filter_workspace], x12]\n"
-        "st1    { v28.s }[1], [%[output_block_data]]\n"
-        "ldr    %[output_block_data], [sp, #152]\n"  // 8-byte Folded Reload
-        "mov    v29.16b, v22.16b\n"
-        "mov    v30.16b, v22.16b\n"
-        "mov    v31.16b, v22.16b\n"
-        "add    %[output_block_data], x3, x12\n"
-        "st1    { v28.s }[2], [%[output_block_data]]\n"
-        "add    %[output_block_data], x30, x12\n"
-        "st1    { v28.s }[3], [%[output_block_data]]\n"
-        "ushr   v28.2d, v21.2d, #16\n"
+        "add    %[function_params], x9, x16\n"
+        "str    s28, [x23, x16]\n"
+        "st1    { v28.s }[1], [x12]\n"
+        "add    x12, x8, x16\n"
+        "mov    v29.16b, v21.16b\n"
+        "ushr   v10.2d, v22.2d, #16\n"
+        "mov    v30.16b, v21.16b\n"
+        "mov    v31.16b, v21.16b\n"
+        "st1    { v28.s }[2], [%[function_params]]\n"
+        "st1    { v28.s }[3], [x12]\n"
+        "ushr   v28.2d, v24.2d, #16\n"
+        ".word 0x4f8ae29d  // sdot   v29.4s, v20.16b, v10.4b[0]\n"
+        "mov    v8.16b, v21.16b\n"
+        ".word 0x4f9ce29f  // sdot   v31.4s, v20.16b, v28.4b[0]\n"
+        ".word 0x4f8aea9e  // sdot   v30.4s, v20.16b, v10.4b[2]\n"
+        ".word 0x4f8aea7d  // sdot   v29.4s, v19.16b, v10.4b[2]\n"
         "ushr   v9.2d, v23.2d, #16\n"
+        ".word 0x4f9cea88  // sdot   v8.4s, v20.16b, v28.4b[2]\n"
+        ".word 0x4f9cea7f  // sdot   v31.4s, v19.16b, v28.4b[2]\n"
+        ".word 0x4f9ce27e  // sdot   v30.4s, v19.16b, v28.4b[0]\n"
         ".word 0x4f9ce25d  // sdot   v29.4s, v18.16b, v28.4b[0]\n"
-        "mov    v8.16b, v22.16b\n"
-        ".word 0x4f9cea5e  // sdot   v30.4s, v18.16b, v28.4b[2]\n"
+        ".word 0x4f89e268  // sdot   v8.4s, v19.16b, v9.4b[0]\n"
         ".word 0x4f89e25f  // sdot   v31.4s, v18.16b, v9.4b[0]\n"
-        ".word 0x4f9cea7d  // sdot   v29.4s, v19.16b, v28.4b[2]\n"
-        "ushr   v10.2d, v24.2d, #16\n"
-        ".word 0x4f89ea48  // sdot   v8.4s, v18.16b, v9.4b[2]\n"
-        ".word 0x4f89e27e  // sdot   v30.4s, v19.16b, v9.4b[0]\n"
-        ".word 0x4f89ea7f  // sdot   v31.4s, v19.16b, v9.4b[2]\n"
-        ".word 0x4f89e29d  // sdot   v29.4s, v20.16b, v9.4b[0]\n"
-        ".word 0x4f8ae268  // sdot   v8.4s, v19.16b, v10.4b[0]\n"
-        ".word 0x4f89ea9e  // sdot   v30.4s, v20.16b, v9.4b[2]\n"
-        ".word 0x4f8ae29f  // sdot   v31.4s, v20.16b, v10.4b[0]\n"
+        ".word 0x4f9cea5e  // sdot   v30.4s, v18.16b, v28.4b[2]\n"
         "sqrdmulh        v29.4s, v29.4s, v1.4s\n"
-        ".word 0x4f8aea88  // sdot   v8.4s, v20.16b, v10.4b[2]\n"
-        "sqrdmulh        v30.4s, v30.4s, v1.4s\n"
+        ".word 0x4f89ea48  // sdot   v8.4s, v18.16b, v9.4b[2]\n"
         "sqrdmulh        v31.4s, v31.4s, v1.4s\n"
+        "sqrdmulh        v30.4s, v30.4s, v1.4s\n"
         "sqrshl v29.4s, v29.4s, v2.4s\n"
         "sqrdmulh        v8.4s, v8.4s, v1.4s\n"
-        "sqrshl v30.4s, v30.4s, v2.4s\n"
         "sqrshl v31.4s, v31.4s, v2.4s\n"
+        "sqrshl v30.4s, v30.4s, v2.4s\n"
         "sqxtn  v29.4h, v29.4s\n"
-        "ldr    %[output_block_data], [sp, #168]\n"  // 8-byte Folded Reload
         "sqrshl v8.4s, v8.4s, v2.4s\n"
         "sqxtn  v31.4h, v31.4s\n"
         "sqxtn2 v29.8h, v30.4s\n"
@@ -8593,114 +9056,121 @@ struct KernelMacroBlock<DepthwiseConvImplementation::kUseNeon3x3DotProduct,
         "sqxtun v29.8b, v29.8h\n"
         "sqxtun2        v29.16b, v30.8h\n"
         "umax   v29.16b, v29.16b, v3.16b\n"
-        "add    %[output_block_data], x3, x12\n"
+        "add    %[function_params], x24, x16\n"
         "umin   v29.16b, v29.16b, v4.16b\n"
-        "str    s29, [%[bias_data], x12]\n"
-        "st1    { v29.s }[1], [%[output_block_data]]\n"
-        "add    %[output_block_data], x29, x12\n"
-        "mov    v30.16b, v22.16b\n"
-        "st1    { v29.s }[2], [%[output_block_data]]\n"
-        "add    %[output_block_data], x28, x12\n"
-        "mov    v31.16b, v22.16b\n"
-        "mov    v8.16b, v22.16b\n"
-        ".word 0x4f9ce33e  // sdot   v30.4s, v25.16b, v28.4b[0]\n"
-        "st1    { v29.s }[3], [%[output_block_data]]\n"
-        "mov    v29.16b, v22.16b\n"
-        ".word 0x4f9ceb3f  // sdot   v31.4s, v25.16b, v28.4b[2]\n"
-        ".word 0x4f89e328  // sdot   v8.4s, v25.16b, v9.4b[0]\n"
-        ".word 0x4f9ceb5e  // sdot   v30.4s, v26.16b, v28.4b[2]\n"
-        ".word 0x4f89eb3d  // sdot   v29.4s, v25.16b, v9.4b[2]\n"
-        ".word 0x4f89e35f  // sdot   v31.4s, v26.16b, v9.4b[0]\n"
-        ".word 0x4f89eb48  // sdot   v8.4s, v26.16b, v9.4b[2]\n"
-        ".word 0x4f89e37e  // sdot   v30.4s, v27.16b, v9.4b[0]\n"
-        ".word 0x4f8ae35d  // sdot   v29.4s, v26.16b, v10.4b[0]\n"
-        ".word 0x4f89eb7f  // sdot   v31.4s, v27.16b, v9.4b[2]\n"
-        ".word 0x4f8ae368  // sdot   v8.4s, v27.16b, v10.4b[0]\n"
-        "sqrdmulh        v28.4s, v30.4s, v1.4s\n"
-        ".word 0x4f8aeb7d  // sdot   v29.4s, v27.16b, v10.4b[2]\n"
-        "sqrdmulh        v30.4s, v31.4s, v1.4s\n"
-        "sqrdmulh        v31.4s, v8.4s, v1.4s\n"
-        "sqrshl v28.4s, v28.4s, v2.4s\n"
-        "sqrdmulh        v29.4s, v29.4s, v1.4s\n"
+        "mov    v30.16b, v21.16b\n"
+        "add    x12, x28, x16\n"
+        "str    s29, [%[scratch_block_data], x16]\n"
+        "st1    { v29.s }[1], [%[function_params]]\n"
+        "add    %[function_params], x26, x16\n"
+        "mov    v31.16b, v21.16b\n"
+        "mov    v8.16b, v21.16b\n"
+        ".word 0x4f8ae33e  // sdot   v30.4s, v25.16b, v10.4b[0]\n"
+        "st1    { v29.s }[2], [x12]\n"
+        "st1    { v29.s }[3], [%[function_params]]\n"
+        "mov    v29.16b, v21.16b\n"
+        ".word 0x4f9ce328  // sdot   v8.4s, v25.16b, v28.4b[0]\n"
+        ".word 0x4f8aeb3f  // sdot   v31.4s, v25.16b, v10.4b[2]\n"
+        ".word 0x4f8aeb5e  // sdot   v30.4s, v26.16b, v10.4b[2]\n"
+        ".word 0x4f9ceb3d  // sdot   v29.4s, v25.16b, v28.4b[2]\n"
+        ".word 0x4f9ceb48  // sdot   v8.4s, v26.16b, v28.4b[2]\n"
+        ".word 0x4f9ce35f  // sdot   v31.4s, v26.16b, v28.4b[0]\n"
+        ".word 0x4f9ce37e  // sdot   v30.4s, v27.16b, v28.4b[0]\n"
+        ".word 0x4f89e35d  // sdot   v29.4s, v26.16b, v9.4b[0]\n"
+        ".word 0x4f89e368  // sdot   v8.4s, v27.16b, v9.4b[0]\n"
+        ".word 0x4f9ceb7f  // sdot   v31.4s, v27.16b, v28.4b[2]\n"
+        "sqrdmulh        v30.4s, v30.4s, v1.4s\n"
+        ".word 0x4f89eb7d  // sdot   v29.4s, v27.16b, v9.4b[2]\n"
+        "sqrdmulh        v28.4s, v8.4s, v1.4s\n"
+        "sqrdmulh        v31.4s, v31.4s, v1.4s\n"
         "sqrshl v30.4s, v30.4s, v2.4s\n"
+        "sqrdmulh        v29.4s, v29.4s, v1.4s\n"
+        "sqrshl v28.4s, v28.4s, v2.4s\n"
         "sqrshl v31.4s, v31.4s, v2.4s\n"
-        "sqxtn  v28.4h, v28.4s\n"
+        "sqxtn  v30.4h, v30.4s\n"
         "sqrshl v29.4s, v29.4s, v2.4s\n"
-        "sqxtn  v31.4h, v31.4s\n"
-        "sqxtn2 v28.8h, v30.4s\n"
-        "sqxtn2 v31.8h, v29.4s\n"
+        "sqxtn  v28.4h, v28.4s\n"
+        "sqxtn2 v30.8h, v31.4s\n"
+        "sqxtn2 v28.8h, v29.4s\n"
+        "sqadd  v29.8h, v30.8h, v0.8h\n"
         "sqadd  v28.8h, v28.8h, v0.8h\n"
-        "sqadd  v29.8h, v31.8h, v0.8h\n"
-        "sqxtun v28.8b, v28.8h\n"
-        "sqxtun2        v28.16b, v29.8h\n"
-        "umax   v28.16b, v28.16b, v3.16b\n"
-        "add    %[output_block_data], x27, x12\n"
+        "sqxtun v29.8b, v29.8h\n"
+        "sqxtun2        v29.16b, v28.8h\n"
+        "umax   v28.16b, v29.16b, v3.16b\n"
+        "add    x12, %[filter_workspace], x16\n"
         "umin   v8.16b, v28.16b, v4.16b\n"
-        "str    s8, [x7, x12]\n"
-        "st1    { v8.s }[1], [%[output_block_data]]\n"
-        "ldr    %[output_block_data], [sp, #128]\n"  // 8-byte Folded Reload
-        "mov    v28.16b, v22.16b\n"
-        "mov    v29.16b, v22.16b\n"
-        "mov    v30.16b, v22.16b\n"
-        "add    %[output_block_data], x3, x12\n"
-        "st1    { v8.s }[2], [%[output_block_data]]\n"
-        "ldr    %[output_block_data], [sp, #136]\n"  // 8-byte Folded Reload
-        "mov    v31.16b, v22.16b\n"
-        "ushr   v23.2d, v23.2d, #32\n"
-        "add    x13, x13, #1\n"  // =1
-        "add    %[output_block_data], x3, x12\n"
-        "ushr   v21.2d, v21.2d, #32\n"
+        "str    s8, [x27, x16]\n"
+        "st1    { v8.s }[1], [x12]\n"
+        "ldr    x12, [sp, #288]\n"  // 8-byte Folded Reload
+        "mov    v28.16b, v21.16b\n"
+        "mov    v29.16b, v21.16b\n"
+        "mov    v30.16b, v21.16b\n"
+        "mov    v31.16b, v21.16b\n"
         "ushr   v24.2d, v24.2d, #32\n"
-        ".word 0x4f97e29c  // sdot   v28.4s, v20.16b, v23.4b[0]\n"
-        ".word 0x4f97e27d  // sdot   v29.4s, v19.16b, v23.4b[0]\n"
-        ".word 0x4f97e25e  // sdot   v30.4s, v18.16b, v23.4b[0]\n"
-        ".word 0x4f97ea5f  // sdot   v31.4s, v18.16b, v23.4b[2]\n"
-        "st1    { v8.s }[3], [%[output_block_data]]\n"
-        "add    x12, x12, x10\n"
-        DC_KERNEL_MULT_5 ":\n"  // Parent Loop BB205_22 Depth=1
-        // Parent Loop BB205_11 Depth=2
-        // =>  This Inner Loop Header: Depth=3
-        "cmp    w13, w9\n"
-        "b.lt   " DC_KERNEL_MULT_4 "b\n"
-        // %bb.6:        // in Loop: Header=BB205_11 Depth=2
-        "ldr    %[output_block_data], [sp, #120]\n"  // 8-byte Folded Reload
-        "cmp    w13, w25\n"
-        "str    x19, [sp, #104]\n"  // 8-byte Folded Spill
-        "add    %[output_block_data], x3, #16\n"  // =16
-        "str    %[output_block_data], [sp, #120]\n"  // 8-byte Folded Spill
-        "b.ge   " DC_KERNEL_MULT_10 "f\n"
-        // %bb.7:        // in Loop: Header=BB205_11 Depth=2
-        "add    x7, %[scratch_block_data], x13, lsl #2\n"
-        "add    x19, x23, x13, lsl #2\n"
-        "ld1    { v23.s }[1], [x19]\n"
-        "add    x19, x22, x13, lsl #2\n"
-        "add    x7, x7, #4\n"  // =4
-        "ld1    { v24.s }[1], [x19]\n"
-        "ld1    { v21.s }[1], [x7]\n"
-        "add    x19, x24, x13, lsl #2\n"
-        "add    x7, x14, x13, lsl #2\n"
-        "add    x13, x16, x13, lsl #2\n"
-        "ldr    x20, [sp, #96]\n"  // 8-byte Folded Reload
-        "ld1    { v23.s }[3], [x7]\n"
-        "ld1    { v24.s }[3], [x13]\n"
-        "ld1    { v21.s }[3], [x19]\n"
-        "mov    %[output_block_data], xzr\n"
-        "mov    w6, wzr\n"
-        "add    x13, x21, x12\n"
-        "add    x7, %[function_params], x12\n"
-        "add    x19, x5, x12\n"
-        "add    x12, x20, x12\n"
-        "b      " DC_KERNEL_MULT_9 "f\n"
-        DC_KERNEL_MULT_8 ":\n"  // in Loop: Header=BB205_9 Depth=3
-        ".word 0x4f95e25c  // sdot   v28.4s, v18.16b, v21.4b[0]\n"
-        ".word 0x4f95ea5d  // sdot   v29.4s, v18.16b, v21.4b[2]\n"
-        ".word 0x4f97ea7e  // sdot   v30.4s, v19.16b, v23.4b[2]\n"
-        ".word 0x4f95ea7c  // sdot   v28.4s, v19.16b, v21.4b[2]\n"
-        ".word 0x4f98e27f  // sdot   v31.4s, v19.16b, v24.4b[0]\n"
-        ".word 0x4f97ea9d  // sdot   v29.4s, v20.16b, v23.4b[2]\n"
+        "add    %[function_params], x15, x16\n"
+        "add    x12, x12, x16\n"
+        "subs   w13, w13, #1\n"  // =1
+        "ushr   v22.2d, v22.2d, #32\n"
+        "ushr   v23.2d, v23.2d, #32\n"
+        ".word 0x4f98e25c  // sdot   v28.4s, v18.16b, v24.4b[0]\n"
+        ".word 0x4f98e27d  // sdot   v29.4s, v19.16b, v24.4b[0]\n"
         ".word 0x4f98e29e  // sdot   v30.4s, v20.16b, v24.4b[0]\n"
-        "sqrdmulh        v25.4s, v28.4s, v1.4s\n"
         ".word 0x4f98ea9f  // sdot   v31.4s, v20.16b, v24.4b[2]\n"
+        "add    x16, x16, x6\n"
+        "st1    { v8.s }[2], [%[function_params]]\n"
+        "st1    { v8.s }[3], [x12]\n"
+        "b.ne   " DC_KERNEL_MULT_9 "b\n"
+        // %bb.10:        // in Loop: Header=BB205_7 Depth=2
+        "ldr    w25, [sp, #164]\n"  // 4-byte Folded Reload
+        "ldp    x21, %[scratch_block_data], [sp, #192]\n"  // 16-byte Folded Reload
+        "ldr    %[function_params], [sp, #184]\n"  // 8-byte Folded Reload
+        "ldp    %[filter_workspace], %[bias_data], [sp, #216]\n"  // 16-byte Folded Reload
+        "ldr    x5, [sp, #208]\n"  // 8-byte Folded Reload
+        "add    x13, %[output_block_data], x16\n"
+        "mov    w12, w25\n"
+        "mov    x27, x28\n"
+        "mov    x28, x24\n"
+        "mov    x24, x8\n"
+        "mov    x8, x9\n"
+        "mov    x10, x15\n"
+        "mov    w15, #4\n"
+        "ldr    w16, [sp, #276]\n"  // 4-byte Folded Reload
+        "cmp    w12, w16\n"
+        "b.ge   " DC_KERNEL_MULT_6 "b\n"
+        DC_KERNEL_MULT_11 ":\n"  // in Loop: Header=BB205_7 Depth=2
+        "ldr    w12, [sp, #272]\n"  // 4-byte Folded Reload
+        "cmp    w12, #1\n"  // =1
+        "b.lt   " DC_KERNEL_MULT_6 "b\n"
+        // %bb.12:        // in Loop: Header=BB205_7 Depth=2
+        "add    x12, x14, #4\n"  // =4
+        "ldr    x14, [sp, #240]\n"  // 8-byte Folded Reload
+        "ldr    x16, [sp, #280]\n"  // 8-byte Folded Reload
+        "add    x14, x12, x14\n"
+        "ld1    { v23.s }[1], [x14]\n"
+        "ldr    x14, [sp, #232]\n"  // 8-byte Folded Reload
+        "add    x16, x12, x16\n"
+        "ld1    { v24.s }[1], [x16]\n"
+        "add    x14, x12, x14\n"
+        "ld1    { v23.s }[3], [x14]\n"
+        "ldp    x16, x14, [sp, #248]\n"  // 16-byte Folded Reload
+        "add    x16, x12, x16\n"
+        "ld1    { v24.s }[3], [x16]\n"
+        "ldr    x16, [sp, #40]\n"  // 8-byte Folded Reload
+        "ld1    { v22.s }[1], [x12], x14\n"
+        "ld1    { v22.s }[3], [x12]\n"
+        "ldr    w12, [sp, #272]\n"  // 4-byte Folded Reload
+        DC_KERNEL_MULT_13 ":\n"  // Parent Loop BB205_4 Depth=1
+        // Parent Loop BB205_7 Depth=2
+        // =>  This Inner Loop Header: Depth=3
+        ".word 0x4f96e29c  // sdot   v28.4s, v20.16b, v22.4b[0]\n"
+        ".word 0x4f96ea9d  // sdot   v29.4s, v20.16b, v22.4b[2]\n"
+        ".word 0x4f98ea7e  // sdot   v30.4s, v19.16b, v24.4b[2]\n"
+        ".word 0x4f96ea7c  // sdot   v28.4s, v19.16b, v22.4b[2]\n"
+        ".word 0x4f97e27f  // sdot   v31.4s, v19.16b, v23.4b[0]\n"
+        ".word 0x4f98ea5d  // sdot   v29.4s, v18.16b, v24.4b[2]\n"
+        ".word 0x4f97e25e  // sdot   v30.4s, v18.16b, v23.4b[0]\n"
+        "sqrdmulh        v25.4s, v28.4s, v1.4s\n"
+        ".word 0x4f97ea5f  // sdot   v31.4s, v18.16b, v23.4b[2]\n"
         "sqrdmulh        v26.4s, v29.4s, v1.4s\n"
         "sqrdmulh        v27.4s, v30.4s, v1.4s\n"
         "sqrshl v25.4s, v25.4s, v2.4s\n"
@@ -8717,85 +9187,107 @@ struct KernelMacroBlock<DepthwiseConvImplementation::kUseNeon3x3DotProduct,
         "sqxtun v25.8b, v25.8h\n"
         "sqxtun2        v25.16b, v26.8h\n"
         "umax   v25.16b, v25.16b, v3.16b\n"
-        "add    x20, x19, %[output_block_data]\n"
+        "add    x14, x13, x21\n"
         "umin   v25.16b, v25.16b, v4.16b\n"
-        "str    s25, [x12, %[output_block_data]]\n"
-        "st1    { v25.s }[1], [x20]\n"
-        "add    x20, x7, %[output_block_data]\n"
-        "st1    { v25.s }[2], [x20]\n"
-        "add    x20, x13, %[output_block_data]\n"
-        "ushr   v23.2d, v23.2d, #8\n"
-        "mov    v28.16b, v22.16b\n"
-        "mov    v29.16b, v22.16b\n"
-        "mov    v30.16b, v22.16b\n"
-        "mov    v31.16b, v22.16b\n"
-        "add    w6, w6, #1\n"  // =1
-        "ushr   v21.2d, v21.2d, #8\n"
+        "str    s25, [x13]\n"
+        "st1    { v25.s }[1], [x14]\n"
+        "add    x14, x13, %[function_params]\n"
         "ushr   v24.2d, v24.2d, #8\n"
-        "st1    { v25.s }[3], [x20]\n"
-        ".word 0x4f97e29c  // sdot   v28.4s, v20.16b, v23.4b[0]\n"
-        ".word 0x4f97e27d  // sdot   v29.4s, v19.16b, v23.4b[0]\n"
-        ".word 0x4f97e25e  // sdot   v30.4s, v18.16b, v23.4b[0]\n"
-        ".word 0x4f97ea5f  // sdot   v31.4s, v18.16b, v23.4b[2]\n"
-        "add    %[output_block_data], x3, x15\n"
-        DC_KERNEL_MULT_9 ":\n"  // Parent Loop BB205_22 Depth=1
-        // Parent Loop BB205_11 Depth=2
-        // =>  This Inner Loop Header: Depth=3
-        "cmp    w6, w17\n"
-        "b.lt   " DC_KERNEL_MULT_8 "b\n"
-        DC_KERNEL_MULT_10 ":\n"  // in Loop: Header=BB205_11 Depth=2
-        "ldp    x19, x12, [sp, #104]\n"  // 16-byte Folded Reload
-        "mov    v20.16b, v17.16b\n"
-        "mov    v19.16b, v16.16b\n"
-        "mov    v18.16b, v7.16b\n"
-        "add    x12, x12, #1\n"  // =1
-        "add    x19, x19, #4\n"  // =4
-        DC_KERNEL_MULT_11 ":\n"  // Parent Loop BB205_22 Depth=1
-        // =>  This Loop Header: Depth=2
-        // Child Loop BB205_5 Depth 3
-        // Child Loop BB205_9 Depth 3
-        "cmp    x12, #2\n"  // =2
-        "b.ne   " DC_KERNEL_MULT_3 "b\n"
-        "b      " DC_KERNEL_MULT_21 "f\n"
-        DC_KERNEL_MULT_12 ":\n"  // in Loop: Header=BB205_22 Depth=1
-        "ldr    x13, [sp, #40]\n"  // 8-byte Folded Reload
-        "ldp    q21, q22, [x13]\n"
-        "ldr    x13, [sp, #24]\n"  // 8-byte Folded Reload
-        "str    x13, [sp, #120]\n"  // 8-byte Folded Spill
-        "b      " DC_KERNEL_MULT_20 "f\n"
-        DC_KERNEL_MULT_13 ":\n"  // in Loop: Header=BB205_20 Depth=2
-        "madd   x6, x12, x11, %[scratch_block_data]\n"
-        "ldr    w13, [x6]\n"
-        "add    x7, x6, x11\n"
-        "mov    w3, wzr\n"
-        "fmov   s23, w13\n"
-        "mov    v23.s[1], w13\n"
-        "ld1    { v23.s }[2], [x7]\n"
-        "add    x7, x6, x8\n"
-        "ld1r   { v24.4s }, [x7]\n"
-        "ldr    x7, [sp, #120]\n"  // 8-byte Folded Reload
-        "mov    v23.s[3], w13\n"
+        "mov    v28.16b, v21.16b\n"
+        "mov    v29.16b, v21.16b\n"
+        "mov    v30.16b, v21.16b\n"
+        "mov    v31.16b, v21.16b\n"
+        "st1    { v25.s }[2], [x14]\n"
+        "add    x14, x13, x16\n"
+        "subs   w12, w12, #1\n"  // =1
+        "ushr   v22.2d, v22.2d, #8\n"
+        "ushr   v23.2d, v23.2d, #8\n"
+        ".word 0x4f98e25c  // sdot   v28.4s, v18.16b, v24.4b[0]\n"
+        ".word 0x4f98e27d  // sdot   v29.4s, v19.16b, v24.4b[0]\n"
+        ".word 0x4f98e29e  // sdot   v30.4s, v20.16b, v24.4b[0]\n"
+        "add    x13, x13, x17\n"
+        ".word 0x4f98ea9f  // sdot   v31.4s, v20.16b, v24.4b[2]\n"
+        "st1    { v25.s }[3], [x14]\n"
+        "b.ne   " DC_KERNEL_MULT_13 "b\n"
+        "b      " DC_KERNEL_MULT_6 "b\n"
+        DC_KERNEL_MULT_14 ":\n"  // in Loop: Header=BB205_7 Depth=2
+        "ldr    x11, [sp, #48]\n"  // 8-byte Folded Reload
+        "ldr    %[function_params], [sp, #184]\n"  // 8-byte Folded Reload
+        "mov    w12, wzr\n"
+        "mov    x14, %[scratch_block_data]\n"
+        "add    x13, x11, %[filter_workspace], lsl #2\n"
+        "ldr    w16, [sp, #276]\n"  // 4-byte Folded Reload
+        "cmp    w12, w16\n"
+        "b.ge   " DC_KERNEL_MULT_6 "b\n"
+        "b      " DC_KERNEL_MULT_11 "b\n"
+        DC_KERNEL_MULT_15 ":\n"  // in Loop: Header=BB205_4 Depth=1
+        "ldr    w14, [sp, #8]\n"  // 4-byte Folded Reload
+        "add    x11, %[bias_data], #32\n"  // =32
+        "tbnz   w14, #0,    " DC_KERNEL_MULT_2 "b\n"
+        // %bb.16:        // in Loop: Header=BB205_4 Depth=1
+        "ldp    q21, q22, [%[bias_data]]\n"
+        "ldr    %[filter_workspace], [sp, #48]\n"  // 8-byte Folded Reload
+        "mov    x14, xzr\n"
         "b      " DC_KERNEL_MULT_18 "f\n"
-        DC_KERNEL_MULT_14 ":\n"  // in Loop: Header=BB205_18 Depth=3
-        "add    x6, x6, #4\n"  // =4
-        "mov    x13, x6\n"
-        "ld1    { v23.s }[1], [x13], x8\n"
-        "add    x20, x6, x11\n"
-        "cmp    w3, w26\n"
-        "mov    w19, wzr\n"
-        "ld1    { v23.s }[3], [x20]\n"
-        "ld1    { v24.s }[1], [x13]\n"
-        "orr    w13, wzr, #0x4\n"
-        "csel   w13, w17, w13, eq\n"
-        "b      " DC_KERNEL_MULT_16 "f\n"
-        DC_KERNEL_MULT_15 ":\n"  // in Loop: Header=BB205_16 Depth=4
+        DC_KERNEL_MULT_17 ":\n"  // in Loop: Header=BB205_18 Depth=2
+        "ldr    x12, [sp, #56]\n"  // 8-byte Folded Reload
+        "ldp    x21, %[scratch_block_data], [sp, #192]\n"  // 16-byte Folded Reload
+        "add    x14, x14, #1\n"  // =1
+        "cmp    x14, x12\n"
+        "add    %[filter_workspace], x1, x21\n"
+        "b.eq   " DC_KERNEL_MULT_2 "b\n"
+        DC_KERNEL_MULT_18 ":\n"  // Parent Loop BB205_4 Depth=1
+        // =>  This Loop Header: Depth=2
+        // Child Loop BB205_20 Depth 3
+        // Child Loop BB205_21 Depth 4
+        "ldr    x16, [sp, #256]\n"  // 8-byte Folded Reload
+        "mov    w13, wzr\n"
+        "madd   x12, x14, x16, %[scratch_block_data]\n"
+        "mov    %[scratch_block_data], x16\n"
+        "ldr    w16, [x12]\n"
+        "add    %[function_params], x12, %[scratch_block_data]\n"
+        "fmov   s23, w16\n"
+        "mov    v23.s[1], w16\n"
+        "ld1    { v23.s }[2], [%[function_params]]\n"
+        "ldr    %[function_params], [sp, #280]\n"  // 8-byte Folded Reload
+        "mov    v23.s[3], w16\n"
+        "add    %[function_params], x12, %[function_params]\n"
+        "ld1r   { v24.4s }, [%[function_params]]\n"
+        "mov    x16, %[filter_workspace]\n"
+        "b      " DC_KERNEL_MULT_20 "f\n"
+        DC_KERNEL_MULT_19 ":\n"  // in Loop: Header=BB205_20 Depth=3
+        "ldr    w4, [sp, #276]\n"  // 4-byte Folded Reload
+        "add    w13, w13, #1\n"  // =1
+        "cmp    w13, w4\n"
+        "b.eq   " DC_KERNEL_MULT_17 "b\n"
+        DC_KERNEL_MULT_20 ":\n"  // Parent Loop BB205_4 Depth=1
+        // Parent Loop BB205_18 Depth=2
+        // =>  This Loop Header: Depth=3
+        // Child Loop BB205_21 Depth 4
+        "ldr    x21, [sp, #280]\n"  // 8-byte Folded Reload
+        "add    x12, x12, #4\n"  // =4
+        "mov    %[function_params], x12\n"
+        "ld1    { v23.s }[1], [%[function_params]], x21\n"
+        "ldr    w21, [sp, #268]\n"  // 4-byte Folded Reload
+        "ld1    { v24.s }[1], [%[function_params]]\n"
+        "ldr    w4, [sp, #272]\n"  // 4-byte Folded Reload
+        "cmp    w13, w21\n"
+        "add    x21, x12, %[scratch_block_data]\n"
+        "ld1    { v23.s }[3], [x21]\n"
+        "csel   w4, w4, w15, eq\n"
+        "cmp    w4, #1\n"  // =1
+        "b.lt   " DC_KERNEL_MULT_19 "b\n"
+        DC_KERNEL_MULT_21 ":\n"  // Parent Loop BB205_4 Depth=1
+        // Parent Loop BB205_18 Depth=2
+        // Parent Loop BB205_20 Depth=3
+        // =>  This Inner Loop Header: Depth=4
         "mov    v25.16b, v21.16b\n"
         "mov    v26.16b, v22.16b\n"
-        ".word 0x4f97e259  // sdot   v25.4s, v18.16b, v23.4b[0]\n"
+        ".word 0x4f97e299  // sdot   v25.4s, v20.16b, v23.4b[0]\n"
         ".word 0x4f97e0fa  // sdot   v26.4s, v7.16b, v23.4b[0]\n"
         ".word 0x4f97ea79  // sdot   v25.4s, v19.16b, v23.4b[2]\n"
         ".word 0x4f97ea1a  // sdot   v26.4s, v16.16b, v23.4b[2]\n"
-        ".word 0x4f98e299  // sdot   v25.4s, v20.16b, v24.4b[0]\n"
+        ".word 0x4f98e259  // sdot   v25.4s, v18.16b, v24.4b[0]\n"
         ".word 0x4f98e23a  // sdot   v26.4s, v17.16b, v24.4b[0]\n"
         "sqrdmulh        v25.4s, v25.4s, v1.4s\n"
         "sqrdmulh        v26.4s, v26.4s, v1.4s\n"
@@ -8807,63 +9299,16 @@ struct KernelMacroBlock<DepthwiseConvImplementation::kUseNeon3x3DotProduct,
         "sqxtun v25.8b, v25.8h\n"
         "umax   v25.8b, v25.8b, v5.8b\n"
         "umin   v25.8b, v25.8b, v6.8b\n"
+        "subs   w4, w4, #1\n"  // =1
         "ushr   v23.2d, v23.2d, #8\n"
         "ushr   v24.2d, v24.2d, #8\n"
-        "str    d25, [x7]\n"
-        "add    x7, x7, x15\n"
-        "add    w19, w19, #1\n"  // =1
-        DC_KERNEL_MULT_16 ":\n"  // Parent Loop BB205_22 Depth=1
-        // Parent Loop BB205_20 Depth=2
-        // Parent Loop BB205_18 Depth=3
-        // =>  This Inner Loop Header: Depth=4
-        "cmp    w19, w13\n"
-        "b.lt   " DC_KERNEL_MULT_15 "b\n"
-        // %bb.17:        // in Loop: Header=BB205_18 Depth=3
-        "add    w3, w3, #1\n"  // =1
-        DC_KERNEL_MULT_18 ":\n"  // Parent Loop BB205_22 Depth=1
-        // Parent Loop BB205_20 Depth=2
-        // =>  This Loop Header: Depth=3
-        // Child Loop BB205_16 Depth 4
-        "cmp    w3, w25\n"
-        "b.lt   " DC_KERNEL_MULT_14 "b\n"
-        // %bb.19:        // in Loop: Header=BB205_20 Depth=2
-        "ldr    x13, [sp, #80]\n"  // 8-byte Folded Reload
-        "ldr    %[output_block_data], [sp, #120]\n"  // 8-byte Folded Reload
-        "add    x12, x12, #1\n"  // =1
-        "add    %[output_block_data], x3, x13\n"
-        "str    %[output_block_data], [sp, #120]\n"  // 8-byte Folded Spill
-        DC_KERNEL_MULT_20 ":\n"  // Parent Loop BB205_22 Depth=1
-        // =>  This Loop Header: Depth=2
-        // Child Loop BB205_18 Depth 3
-        // Child Loop BB205_16 Depth 4
-        "ldr    x13, [sp, #88]\n"  // 8-byte Folded Reload
-        "cmp    x12, x13\n"
-        "b.lt   " DC_KERNEL_MULT_13 "b\n"
-        DC_KERNEL_MULT_21 ":\n"  // in Loop: Header=BB205_22 Depth=1
-        "ldr    x12, [sp, #40]\n"  // 8-byte Folded Reload
-        "ldr    w7, [sp, #12]\n"  // 4-byte Folded Reload
-        "add    x12, x12, #32\n"  // =32
-        "str    x12, [sp, #40]\n"  // 8-byte Folded Spill
-        "ldr    x12, [sp, #24]\n"  // 8-byte Folded Reload
-        "add    w7, w7, #1\n"  // =1
-        "add    x12, x12, #8\n"  // =8
-        "str    x12, [sp, #24]\n"  // 8-byte Folded Spill
-        "ldr    x12, [sp, #32]\n"  // 8-byte Folded Reload
-        "add    x12, x12, #8\n"  // =8
-        "str    x12, [sp, #32]\n"  // 8-byte Folded Spill
-        DC_KERNEL_MULT_22 ":\n"  // =>This Loop Header: Depth=1
-        // Child Loop BB205_20 Depth 2
-        // Child Loop BB205_18 Depth 3
-        // Child Loop BB205_16 Depth 4
-        // Child Loop BB205_11 Depth 2
-        // Child Loop BB205_5 Depth 3
-        // Child Loop BB205_9 Depth 3
-        "ldr    w12, [sp, #8]\n"  // 4-byte Folded Reload
-        "cmp    w7, w12\n"
-        "b.lt   " DC_KERNEL_MULT_1 "b\n"
-        // %bb.23:
-        // Compiled intrinsics total stack 266, now 176 for spillage only.
-        "add    sp, sp, #176\n"  // =288
+        "str    d25, [x16]\n"
+        "add    x16, x16, x17\n"
+        "b.ne   " DC_KERNEL_MULT_21 "b\n"
+        "b      " DC_KERNEL_MULT_19 "b\n"
+        DC_KERNEL_MULT_22 ":\n"
+        // Compiled intrinsics total stack 400, now 304 for spillage only.
+        "add    sp, sp, #304\n"  // =400
         :
         // Outputs.
         [ scratch_block_data ] "+r"(scratch_block_data),
@@ -8884,7 +9329,7 @@ struct KernelMacroBlock<DepthwiseConvImplementation::kUseNeon3x3DotProduct,
         // We use these general-purpose registers.
         "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
         "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26",
-        "x27", "x28", "x29", "x30");
+        "x27", "x28");
 #endif  // __linux__
   }  // NOLINT(readability/fn_size) Manually unrolled.
 
diff --git a/tensorflow/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_google.h b/tensorflow/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_google.h
index de10f2c9259..1eb65c5bd5c 100644
--- a/tensorflow/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_google.h
+++ b/tensorflow/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_google.h
@@ -91,7 +91,6 @@ typedef unsigned __int64 uint64_t;
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h"
-#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h"
@@ -149,7 +148,6 @@ typedef unsigned __int64 uint64_t;
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h"
-#include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h"
 #include "third_party/eigen3/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h"
diff --git a/tensorflow/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_oss.h b/tensorflow/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_oss.h
index 5b54024ac5a..027dd479af5 100644
--- a/tensorflow/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_oss.h
+++ b/tensorflow/lite/kernels/internal/optimized/eigen_tensor_reduced_instantiations_oss.h
@@ -91,7 +91,6 @@ typedef unsigned __int64 uint64_t;
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h"
-#include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorIndexList.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensionList.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h"
@@ -149,7 +148,6 @@ typedef unsigned __int64 uint64_t;
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorScan.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h"
-#include "unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorDevice.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h"
diff --git a/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv.h b/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv.h
index ac731ad152b..1ece0146a34 100644
--- a/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv.h
+++ b/tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv.h
@@ -21,6 +21,7 @@ limitations under the License.
 #include "tensorflow/lite/kernels/internal/optimized/cpu_check.h"
 #include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h"
 #include "tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_3x3_filter.h"
+#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
 #include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
 #include "tensorflow/lite/kernels/internal/types.h"
 
@@ -1768,97 +1769,11 @@ inline void DepthwiseConvGeneral(
         // the final 8bit form and store them.
         gemmlowp::ScopedProfilingLabel label("downquantize+store");
         const int num_output_values = output_depth * num_output_pixels;
-        int c = 0;
 
-        while (c < output_depth) {
-          int target_output_depth = output_depth;
-#ifdef USE_NEON
-          using gemmlowp::RoundingDivideByPOT;
-          const int32x4_t output_offset_vec = vdupq_n_s32(output_offset);
-          const int32x4_t output_activation_min_vec =
-              vdupq_n_s32(output_activation_min);
-          const int32x4_t output_activation_max_vec =
-              vdupq_n_s32(output_activation_max);
-          const int32x4_t ones = vdupq_n_s32(1);
-          const int32x4_t minus_ones = vdupq_n_s32(-1);
-          const int32x4_t zeros = vdupq_n_s32(0);
-
-          for (; c <= output_depth - 4; c += 4) {
-            int32x4_t out_shift = vld1q_s32(output_shift + c);
-            const bool out_shift_all_less_than_zero =
-                (vgetq_lane_s32(out_shift, 0) < 0) &&
-                (vgetq_lane_s32(out_shift, 1) < 0) &&
-                (vgetq_lane_s32(out_shift, 2) < 0) &&
-                (vgetq_lane_s32(out_shift, 3) < 0);
-            const bool out_shift_all_greater_equal_than_zero =
-                (vgetq_lane_s32(out_shift, 0) >= 0) &&
-                (vgetq_lane_s32(out_shift, 1) >= 0) &&
-                (vgetq_lane_s32(out_shift, 2) >= 0) &&
-                (vgetq_lane_s32(out_shift, 3) >= 0);
-            if (!out_shift_all_less_than_zero &&
-                !out_shift_all_greater_equal_than_zero) {
-              // Fallback to general path.
-              // Then go ahead for next 4.
-              target_output_depth = c + 4;
-              break;
-            }
-            int32x4_t out_mul = vld1q_s32(output_multiplier + c);
-            for (int n = 0; n < num_output_pixels; ++n) {
-              int loc = n * output_depth + c;
-              int32x4_t acc = vld1q_s32(acc_buffer + loc);
-              if (out_shift_all_less_than_zero) {  // output_shift all < 0 case.
-                acc = vqrdmulhq_s32(acc, out_mul);
-                // TODO(renjieliu): Optimize this path, also consider inverse
-                // output_shift since most models have output_shift < 0.
-                int32x4_t negative_out_shift = vmulq_n_s32(out_shift, -1);
-                int32x4_t mask =
-                    vaddq_s32(vshlq_s32(ones, negative_out_shift), minus_ones);
-                int32x4_t remainder = vandq_s32(acc, mask);
-                int32x4_t shifted_right_mask = vshlq_s32(mask, minus_ones);
-                int32x4_t temp = vandq_s32(
-                    vreinterpretq_s32_u32(vcltq_s32(acc, zeros)), ones);
-                int32x4_t threshold = vaddq_s32(shifted_right_mask, temp);
-                temp = vandq_s32(
-                    vreinterpretq_s32_u32(vcgtq_s32(remainder, threshold)),
-                    ones);
-                int32x4_t shifted_right_acc = vshlq_s32(acc, out_shift);
-                acc = vaddq_s32(shifted_right_acc, temp);
-              } else {  // output_shift all > 0 case.
-                int32x4_t multiplier_power_of_two = vshlq_s32(ones, out_shift);
-                acc = vmulq_s32(acc, multiplier_power_of_two);
-                acc = vqrdmulhq_s32(acc, out_mul);
-              }
-              // Add the output offset.
-              acc = vaddq_s32(acc, output_offset_vec);
-              // Apply the activation function.
-              acc = vmaxq_s32(acc, output_activation_min_vec);
-              acc = vminq_s32(acc, output_activation_max_vec);
-              // Saturating cast to int8 and store to destination.
-              const int16x4_t acc_s16 = vqmovn_s32(acc);
-              const int16x8_t res_s16 = vcombine_s16(acc_s16, acc_s16);
-              const int8x8_t res_s8 = vqmovn_s16(res_s16);
-              vst1_lane_s8(output_ptr + loc + 0, res_s8, 0);
-              vst1_lane_s8(output_ptr + loc + 1, res_s8, 1);
-              vst1_lane_s8(output_ptr + loc + 2, res_s8, 2);
-              vst1_lane_s8(output_ptr + loc + 3, res_s8, 3);
-            }
-          }
-
-#endif  // USE_NEON
-        // Handle leftover values, one by one. This is very slow.
-          for (; c < target_output_depth; c++) {
-            for (int n = 0; n < num_output_pixels; ++n) {
-              int loc = n * output_depth + c;
-              int32 acc = acc_buffer[loc];
-              acc = MultiplyByQuantizedMultiplier(acc, output_multiplier[c],
-                                                  output_shift[c]);
-              acc += output_offset;
-              acc = std::max(acc, output_activation_min);
-              acc = std::min(acc, output_activation_max);
-              output_ptr[loc] = static_cast<int8>(acc);
-            }
-          }
-        }
+        optimized_ops::Quantize(output_multiplier, output_shift, output_depth,
+                                num_output_values, output_offset,
+                                output_activation_min, output_activation_max,
+                                acc_buffer, output_ptr);
 
         output_ptr += num_output_values;
       }
diff --git a/tensorflow/lite/kernels/internal/optimized/integer_ops/transpose_conv.h b/tensorflow/lite/kernels/internal/optimized/integer_ops/transpose_conv.h
new file mode 100644
index 00000000000..2001bf648e4
--- /dev/null
+++ b/tensorflow/lite/kernels/internal/optimized/integer_ops/transpose_conv.h
@@ -0,0 +1,108 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_INTEGER_OPS_TRANSPOSE_CONV_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_INTEGER_OPS_TRANSPOSE_CONV_H_
+
+#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
+
+namespace tflite {
+namespace optimized_integer_ops {
+
+// TransposeConvV2 expect the weights in HWOI order.
+inline void TransposeConvV2(
+    const ConvParams& params, const int32* output_multiplier,
+    const int32* output_shift, const RuntimeShape& input_shape,
+    const int8_t* input_data, const RuntimeShape& hwoi_ordered_filter_shape,
+    const int8_t* hwoi_ordered_filter_data, const RuntimeShape& output_shape,
+    int8_t* output_data, const RuntimeShape& col2im_shape, int32_t* col2im_data,
+    int32_t* scratch_data, CpuBackendContext* cpu_backend_context) {
+  gemmlowp::ScopedProfilingLabel label("TransposeConvV2/int8");
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(hwoi_ordered_filter_shape.DimensionsCount(), 4);
+  const int batch_size = input_shape.Dims(0);
+  TFLITE_DCHECK(col2im_data);
+  TFLITE_DCHECK(hwoi_ordered_filter_data);
+
+  const int input_image_size = input_shape.Dims(1) * input_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
+  const int output_image_size = output_height * output_width;
+  const int input_depth =
+      MatchingDim(input_shape, 3, hwoi_ordered_filter_shape, 3);
+  const int output_depth =
+      MatchingDim(output_shape, 3, hwoi_ordered_filter_shape, 2);
+  const int input_offset = input_image_size * input_depth;
+  const int output_offset = output_image_size * output_depth;
+
+  const int filter_height = hwoi_ordered_filter_shape.Dims(0);
+  const int filter_width = hwoi_ordered_filter_shape.Dims(1);
+  const int padding_top = params.padding_values.height;
+  const int padding_bottom =
+      params.padding_values.height + params.padding_values.height_offset;
+  const int padding_left = params.padding_values.width;
+  const int padding_right =
+      params.padding_values.width + params.padding_values.width_offset;
+  const int stride_height = params.stride_height;
+  const int stride_width = params.stride_width;
+
+  const int hwoi_ordered_filter_total_size =
+      filter_height * filter_width * output_depth;
+
+  cpu_backend_gemm::MatrixParams<int8_t> lhs_params;
+  lhs_params.order = cpu_backend_gemm::Order::kRowMajor;
+  lhs_params.rows = hwoi_ordered_filter_total_size;
+  lhs_params.cols = input_depth;
+  // Since our weight is symmetric quantized, the zp will always be 0.
+  lhs_params.zero_point = 0;
+
+  int32_t* scratch_data_p = scratch_data;
+  std::fill_n(scratch_data, output_offset * batch_size, static_cast<int32>(0));
+  for (int i = 0; i < batch_size; ++i) {
+    cpu_backend_gemm::MatrixParams<int8_t> rhs_params;
+    rhs_params.order = cpu_backend_gemm::Order::kColMajor;
+    rhs_params.rows = input_depth;
+    rhs_params.cols = input_image_size;
+    rhs_params.zero_point = -params.input_offset;
+
+    cpu_backend_gemm::MatrixParams<int32_t> dst_params;
+    dst_params.order = cpu_backend_gemm::Order::kColMajor;
+    dst_params.rows = hwoi_ordered_filter_total_size;
+    dst_params.cols = input_image_size;
+
+    cpu_backend_gemm::GemmParams<int32_t, int32_t> gemm_params;
+    cpu_backend_gemm::Gemm(lhs_params, hwoi_ordered_filter_data, rhs_params,
+                           input_data + input_offset * i, dst_params,
+                           col2im_data, gemm_params, cpu_backend_context);
+
+    optimized_ops::Col2im(
+        col2im_data, output_depth, output_height, output_width, filter_height,
+        filter_width, padding_top, padding_left, padding_bottom, padding_right,
+        stride_height, stride_width, scratch_data_p);
+
+    scratch_data_p += output_offset;
+  }
+
+  const int32_t output_min = std::numeric_limits<int8_t>::min();
+  const int32_t output_max = std::numeric_limits<int8_t>::max();
+
+  optimized_ops::Quantize(output_multiplier, output_shift, output_depth,
+                          output_shape.FlatSize(), params.output_offset,
+                          output_min, output_max, scratch_data, output_data);
+}
+
+}  // namespace optimized_integer_ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_INTEGER_OPS_TRANSPOSE_CONV_H_
diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
index d5c1f227b9a..9622f30d2ea 100644
--- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
+++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
@@ -957,11 +957,6 @@ void NeonCpuBackendGemm(const int8_t* input, const int32_t* bias,
   using ::tflite::cpu_backend_gemm::Gemm;
   using ::tflite::cpu_backend_gemm::GemmParams;
   using ::tflite::cpu_backend_gemm::MatrixParams;
-  using ::tflite::cpu_backend_gemm::QuantizationFlavor;
-
-  ruy::Matrix<int8_t> ruy_lhs;
-  ruy::Matrix<int8_t> ruy_rhs;
-  ruy::Matrix<int32_t> ruy_dst;
 
   MatrixParams<int8_t> lhs_params;
   lhs_params.order = cpu_backend_gemm::Order::kRowMajor;
@@ -978,15 +973,10 @@ void NeonCpuBackendGemm(const int8_t* input, const int32_t* bias,
   dst_params.rows = n_output;
   dst_params.cols = n_batch;
 
-  cpu_backend_gemm::detail::MakeRuyMatrix(lhs_params, input_to_gate_weights,
-                                          &ruy_lhs);
-  cpu_backend_gemm::detail::MakeRuyMatrix(rhs_params, input, &ruy_rhs);
-  cpu_backend_gemm::detail::MakeRuyMatrix(dst_params, scratch, &ruy_dst);
-
-  ruy::BasicSpec<int32_t, int32_t> ruy_spec;
-  ruy_spec.bias = bias;
-  ruy::Mul<ruy::kAllPaths>(ruy_lhs, ruy_rhs, ruy_spec, context->ruy_context(),
-                           &ruy_dst);
+  GemmParams<int32, int32> gemm_params;
+  gemm_params.bias = bias;
+  cpu_backend_gemm::Gemm(lhs_params, input_to_gate_weights, rhs_params, input,
+                         dst_params, scratch, gemm_params, context);
 }
 
 void NeonMatrixBatchVectorMultiplyAccumulate(
@@ -1514,19 +1504,25 @@ void NeonApplyTanhImpl(const int16_t* input, int32_t n_batch, int32_t n_input,
   }
 }
 
-void NeonApplyTanh0(const int16_t* input, int32_t n_batch, int32_t n_input,
-                    int16_t* output) {
-  NeonApplyTanhImpl<0>(input, n_batch, n_input, output);
-}
-
-void NeonApplyTanh3(const int16_t* input, int32_t n_batch, int32_t n_input,
-                    int16_t* output) {
-  NeonApplyTanhImpl<3>(input, n_batch, n_input, output);
-}
-
-void NeonApplyTanh4(const int16_t* input, int32_t n_batch, int32_t n_input,
-                    int16_t* output) {
-  NeonApplyTanhImpl<4>(input, n_batch, n_input, output);
+void NeonApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch,
+                   int32_t n_input, int16_t* output) {
+  assert(integer_bits <= 6);
+#define DISPATCH_TANH(i)                                   \
+  case i:                                                  \
+    NeonApplyTanhImpl<i>(input, n_batch, n_input, output); \
+    break;
+  switch (integer_bits) {
+    DISPATCH_TANH(0);
+    DISPATCH_TANH(1);
+    DISPATCH_TANH(2);
+    DISPATCH_TANH(3);
+    DISPATCH_TANH(4);
+    DISPATCH_TANH(5);
+    DISPATCH_TANH(6);
+    default:
+      return;
+  }
+#undef DISPATCH_TANH
 }
 
 void NeonCwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
@@ -1831,13 +1827,14 @@ void NeonVectorVectorCwiseProduct(const float* vector1, const float* vector2,
   int v = 0;
   for (; v < postamble_start; v += kFloatValuesPerNeonVector) {
     // Load 4 float values from vector1 and vector2.
-    float32x4_t v1_f32x4 = vld1q_f32(vector1 + v);
-    float32x4_t v2_f32x4 = vld1q_f32(vector2 + v);
+    const float32x4_t v1_f32x4 = vld1q_f32(vector1 + v);
+    const float32x4_t v2_f32x4 = vld1q_f32(vector2 + v);
     // Vector multiply 4 float
-    float32x4_t mul_32x4 = vmulq_f32(v1_f32x4, v2_f32x4);
+    const float32x4_t mul_32x4 = vmulq_f32(v1_f32x4, v2_f32x4);
     // Save to result array.
-    vst1q_f32(&result[v], mul_32x4);
+    vst1q_f32(result + v, mul_32x4);
   }
+#pragma clang loop vectorize(disable) unroll(disable)
   for (; v < v_size; v++) {
     result[v] = vector1[v] * vector2[v];
   }
@@ -1854,83 +1851,20 @@ void NeonVectorVectorCwiseProductAccumulate(const float* vector1,
   int v = 0;
   for (; v < postamble_start; v += kFloatValuesPerNeonVector) {
     // Load 4 float values from vector1 and vector2 and accumulator.
-    float32x4_t v1_f32x4 = vld1q_f32(vector1 + v);
-    float32x4_t v2_f32x4 = vld1q_f32(vector2 + v);
+    const float32x4_t v1_f32x4 = vld1q_f32(vector1 + v);
+    const float32x4_t v2_f32x4 = vld1q_f32(vector2 + v);
     float32x4_t acc_32x4 = vld1q_f32(result + v);
     // Vector multiply-accumulate 4 float
     acc_32x4 = vmlaq_f32(acc_32x4, v1_f32x4, v2_f32x4);
     // Save to result array.
-    vst1q_f32(&result[v], acc_32x4);
+    vst1q_f32(result + v, acc_32x4);
   }
+#pragma clang loop vectorize(disable) unroll(disable)
   for (; v < v_size; v++) {
     result[v] += vector1[v] * vector2[v];
   }
 }
 
-void NeonVectorBatchVectorCwiseProduct(const float* vector, int v_size,
-                                       const float* batch_vector, int n_batch,
-                                       float* result) {
-  // If v_size is not divisible by the vector size, then we need to process the
-  // final few elements sequentially. postamble_start shows the start index
-  // where this should happen.
-  const int postamble_start =
-      RoundDownVectors<kFloatValuesPerNeonVector>(v_size);
-
-  for (int b = 0; b < n_batch; b++) {
-    int v = 0;
-    for (; v < postamble_start; v += kFloatValuesPerNeonVector) {
-      // Load from memory to vectors.
-      float32x4_t batch_vector_f32x4 = vld1q_f32(batch_vector + v);
-      float32x4_t vector_f32x4 = vld1q_f32(vector + v);
-      // Multiply.
-      float32x4_t result_f32x4 = vmulq_f32(batch_vector_f32x4, vector_f32x4);
-      // Store.
-      vst1q_f32(result + v, result_f32x4);
-    }
-    // Postamble loop
-    for (; v < v_size; v++) {
-      result[v] = vector[v] * batch_vector[v];
-    }
-    // Update the pointers.
-    result += v_size;
-    batch_vector += v_size;
-  }
-}
-
-void NeonVectorBatchVectorCwiseProductAccumulate(const float* vector,
-                                                 int v_size,
-                                                 const float* batch_vector,
-                                                 int n_batch, float* result) {
-  // If v_size is not divisible by the vector size, then we need to process the
-  // final few elements sequentially. postamble_start shows the start index
-  // where this should happen.
-  const int postamble_start =
-      RoundDownVectors<kFloatValuesPerNeonVector>(v_size);
-
-  float* result_ptr = result;
-  const float* batch_vector_ptr = batch_vector;
-  for (int b = 0; b < n_batch; b++) {
-    int v = 0;
-    for (; v < postamble_start; v += kFloatValuesPerNeonVector) {
-      // Load from memory to vectors.
-      float32x4_t result_f32x4 = vld1q_f32(result_ptr + v);
-      float32x4_t batch_vector_f32x4 = vld1q_f32(batch_vector_ptr + v);
-      float32x4_t vector_f32x4 = vld1q_f32(vector + v);
-      // Multiply-accumulate.
-      result_f32x4 = vmlaq_f32(result_f32x4, batch_vector_f32x4, vector_f32x4);
-      // Store.
-      vst1q_f32(result_ptr + v, result_f32x4);
-    }
-    // Postamble loop
-    for (; v < v_size; v++) {
-      result_ptr[v] += vector[v] * batch_vector_ptr[v];
-    }
-    // Update the pointers.
-    result_ptr += v_size;
-    batch_vector_ptr += v_size;
-  }
-}
-
 void NeonSub1Vector(const float* vector, int v_size, float* result) {
   // If v_size is not divisible by the vector size, then we need to process the
   // final few elements sequentially. postamble_start shows the start index
diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h
index 626afbe5d8d..cbb2cab36ac 100644
--- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.h
@@ -110,19 +110,9 @@ void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
   NEON_OR_PORTABLE(ApplySigmoid, input, n_batch, n_input, output);
 }
 
-void ApplyTanh0(const int16_t* input, int32_t n_batch, int32_t n_input,
-                int16_t* output) {
-  NEON_OR_PORTABLE(ApplyTanh0, input, n_batch, n_input, output);
-}
-
-void ApplyTanh3(const int16_t* input, int32_t n_batch, int32_t n_input,
-                int16_t* output) {
-  NEON_OR_PORTABLE(ApplyTanh3, input, n_batch, n_input, output);
-}
-
-void ApplyTanh4(const int16_t* input, int32_t n_batch, int32_t n_input,
-                int16_t* output) {
-  NEON_OR_PORTABLE(ApplyTanh4, input, n_batch, n_input, output);
+void ApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch,
+               int32_t n_input, int16_t* output) {
+  NEON_OR_PORTABLE(ApplyTanh, integer_bits, input, n_batch, n_input, output);
 }
 
 void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
@@ -172,18 +162,12 @@ void VectorVectorCwiseProductAccumulate(const float* vector1,
                    result);
 }
 
-void VectorBatchVectorCwiseProduct(const float* vector, int v_size,
-                                   const float* batch_vector, int n_batch,
-                                   float* result) {
-  NEON_OR_PORTABLE(VectorBatchVectorCwiseProduct, vector, v_size, batch_vector,
-                   n_batch, result);
-}
-
-void VectorBatchVectorCwiseProductAccumulate(const float* vector, int v_size,
-                                             const float* batch_vector,
-                                             int n_batch, float* result) {
-  NEON_OR_PORTABLE(VectorBatchVectorCwiseProductAccumulate, vector, v_size,
-                   batch_vector, n_batch, result);
+void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
+                                             const int16_t* batch_vector,
+                                             int n_batch, int32_t multiplier,
+                                             int shift, int16_t* result) {
+  PortableVectorBatchVectorCwiseProductAccumulate(
+      vector, v_size, batch_vector, n_batch, multiplier, shift, result);
 }
 
 float VectorVectorDotProduct(const float* vector1, const float* vector2,
diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h
index ecd124c315d..ec98185a7ba 100644
--- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h
+++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h
@@ -57,14 +57,8 @@ void NeonApplyLayerNorm(const int16_t* input, const int16_t* layer_norm_weights,
 void NeonApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
                       int16_t* output);
 
-void NeonApplyTanh0(const int16_t* input, int32_t n_batch, int32_t n_input,
-                    int16_t* output);
-
-void NeonApplyTanh3(const int16_t* input, int32_t n_batch, int32_t n_input,
-                    int16_t* output);
-
-void NeonApplyTanh4(const int16_t* input, int32_t n_batch, int32_t n_input,
-                    int16_t* output);
+void NeonApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch,
+                   int32_t n_input, int16_t* output);
 
 void NeonCwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
                   int n_input, int shift, int16_t* output);
@@ -127,19 +121,6 @@ void NeonVectorVectorCwiseProductAccumulate(const float* vector1,
 float NeonVectorVectorDotProduct(const float* vector1, const float* vector2,
                                  int v_size);
 
-// Cwise product of a vector and a batch-vector.
-void NeonVectorBatchVectorCwiseProduct(const float* vector, int v_size,
-                                       const float* batch_vector, int n_batch,
-                                       float* result);
-
-// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC
-// operation, the assumption here is that result array is initialized to valid
-// values.
-void NeonVectorBatchVectorCwiseProductAccumulate(const float* vector,
-                                                 int v_size,
-                                                 const float* batch_vector,
-                                                 int n_batch, float* result);
-
 // Compute "1.0f - elements of vector" (used in CIFG).
 void NeonSub1Vector(const float* vector, int v_size, float* result);
 
diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
index 26005e069a7..6236116ad95 100644
--- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
@@ -5617,6 +5617,114 @@ inline void Quantize(int32_t multiplier, int32_t shift, int32_t total_size,
   }
 }
 
+inline void Quantize(const int32_t* multiplier, const int32_t* shift,
+                     int32_t channel_size, int32_t total_size,
+                     int32_t output_zp, int32_t output_min, int32_t output_max,
+                     int32_t* scratch, int8_t* output) {
+  gemmlowp::ScopedProfilingLabel label("Quantize/int8");
+
+  // Here we're trying to quantize the raw accumulators:
+  //        output_channels
+  //       data data data data data
+  // rows  data data data data data
+  //       data data data data data
+  //          ....
+  //
+  // In order to minimize the reload of the multipliers & shifts, once we load
+  // the multipliers & shifts, we load & quantize the raw accumualtrs for every
+  // row.
+#ifdef USE_NEON
+  const int32x4_t output_offset_vec = vdupq_n_s32(output_zp);
+  const int32x4_t output_activation_min_vec = vdupq_n_s32(output_min);
+  const int32x4_t output_activation_max_vec = vdupq_n_s32(output_max);
+  const int32x4_t ones = vdupq_n_s32(1);
+  const int32x4_t minus_ones = vdupq_n_s32(-1);
+  const int32x4_t zeros = vdupq_n_s32(0);
+#endif
+
+  TFLITE_DCHECK_EQ(total_size % channel_size, 0);
+  const int32_t rows = total_size / channel_size;
+
+  int c = 0;
+
+  while (c < channel_size) {
+    int target_output_depth = channel_size;
+#ifdef USE_NEON
+    using gemmlowp::RoundingDivideByPOT;
+    for (; c <= channel_size - 4; c += 4) {
+      int32x4_t out_shift = vld1q_s32(shift + c);
+      const bool out_shift_all_less_than_zero =
+          (vgetq_lane_s32(out_shift, 0) < 0) &&
+          (vgetq_lane_s32(out_shift, 1) < 0) &&
+          (vgetq_lane_s32(out_shift, 2) < 0) &&
+          (vgetq_lane_s32(out_shift, 3) < 0);
+      const bool out_shift_all_greater_equal_than_zero =
+          (vgetq_lane_s32(out_shift, 0) >= 0) &&
+          (vgetq_lane_s32(out_shift, 1) >= 0) &&
+          (vgetq_lane_s32(out_shift, 2) >= 0) &&
+          (vgetq_lane_s32(out_shift, 3) >= 0);
+      if (!out_shift_all_less_than_zero &&
+          !out_shift_all_greater_equal_than_zero) {
+        // Fallback to general path.
+        // Then go ahead for next 4.
+        target_output_depth = c + 4;
+        break;
+      }
+      int32x4_t out_mul = vld1q_s32(multiplier + c);
+      for (int n = 0; n < rows; ++n) {
+        int loc = n * channel_size + c;
+        int32x4_t acc = vld1q_s32(scratch + loc);
+        if (out_shift_all_less_than_zero) {  // output_shift all < 0 case.
+          acc = vqrdmulhq_s32(acc, out_mul);
+          int32x4_t negative_out_shift = vmulq_n_s32(out_shift, -1);
+          int32x4_t mask =
+              vaddq_s32(vshlq_s32(ones, negative_out_shift), minus_ones);
+          int32x4_t remainder = vandq_s32(acc, mask);
+          int32x4_t shifted_right_mask = vshlq_s32(mask, minus_ones);
+          int32x4_t temp =
+              vandq_s32(vreinterpretq_s32_u32(vcltq_s32(acc, zeros)), ones);
+          int32x4_t threshold = vaddq_s32(shifted_right_mask, temp);
+          temp = vandq_s32(
+              vreinterpretq_s32_u32(vcgtq_s32(remainder, threshold)), ones);
+          int32x4_t shifted_right_acc = vshlq_s32(acc, out_shift);
+          acc = vaddq_s32(shifted_right_acc, temp);
+        } else {  // output_shift all > 0 case.
+          int32x4_t multiplier_power_of_two = vshlq_s32(ones, out_shift);
+          acc = vmulq_s32(acc, multiplier_power_of_two);
+          acc = vqrdmulhq_s32(acc, out_mul);
+        }
+        // Add the output offset.
+        acc = vaddq_s32(acc, output_offset_vec);
+        // Apply the activation function.
+        acc = vmaxq_s32(acc, output_activation_min_vec);
+        acc = vminq_s32(acc, output_activation_max_vec);
+        // Saturating cast to int8 and store to destination.
+        const int16x4_t acc_s16 = vqmovn_s32(acc);
+        const int16x8_t res_s16 = vcombine_s16(acc_s16, acc_s16);
+        const int8x8_t res_s8 = vqmovn_s16(res_s16);
+        vst1_lane_s8(output + loc + 0, res_s8, 0);
+        vst1_lane_s8(output + loc + 1, res_s8, 1);
+        vst1_lane_s8(output + loc + 2, res_s8, 2);
+        vst1_lane_s8(output + loc + 3, res_s8, 3);
+      }
+    }
+
+#endif  // USE_NEON
+    // Handle leftover values, one by one. This is very slow.
+    for (; c < target_output_depth; c++) {
+      for (int n = 0; n < rows; ++n) {
+        int loc = n * channel_size + c;
+        int32 acc = scratch[loc];
+        acc = MultiplyByQuantizedMultiplier(acc, multiplier[c], shift[c]);
+        acc += output_zp;
+        acc = std::max(acc, output_min);
+        acc = std::min(acc, output_max);
+        output[loc] = static_cast<int8>(acc);
+      }
+    }
+  }
+}
+
 // TransposeConvV2 expect the weights in HWOI order.
 inline void TransposeConvV2(
     const ConvParams& params, const RuntimeShape& input_shape,
diff --git a/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h b/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h
index 37c1c5ce05a..0127645539c 100644
--- a/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/optimized/sse_tensor_utils.h
@@ -120,19 +120,9 @@ void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
   PortableApplySigmoid(input, n_batch, n_input, output);
 }
 
-void ApplyTanh0(const int16_t* input, int32_t n_batch, int32_t n_input,
-                int16_t* output) {
-  PortableApplyTanh0(input, n_batch, n_input, output);
-}
-
-void ApplyTanh3(const int16_t* input, int32_t n_batch, int32_t n_input,
-                int16_t* output) {
-  PortableApplyTanh3(input, n_batch, n_input, output);
-}
-
-void ApplyTanh4(const int16_t* input, int32_t n_batch, int32_t n_input,
-                int16_t* output) {
-  PortableApplyTanh4(input, n_batch, n_input, output);
+void ApplyTanh(int32_t intger_bits, const int16_t* input, int32_t n_batch,
+               int32_t n_input, int16_t* output) {
+  PortableApplyTanh(intger_bits, input, n_batch, n_input, output);
 }
 
 void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
@@ -182,18 +172,12 @@ void VectorVectorCwiseProductAccumulate(const float* vector1,
                    result);
 }
 
-void VectorBatchVectorCwiseProduct(const float* vector, int v_size,
-                                   const float* batch_vector, int n_batch,
-                                   float* result) {
-  NEON_OR_PORTABLE(VectorBatchVectorCwiseProduct, vector, v_size, batch_vector,
-                   n_batch, result);
-}
-
-void VectorBatchVectorCwiseProductAccumulate(const float* vector, int v_size,
-                                             const float* batch_vector,
-                                             int n_batch, float* result) {
-  NEON_OR_PORTABLE(VectorBatchVectorCwiseProductAccumulate, vector, v_size,
-                   batch_vector, n_batch, result);
+void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
+                                             const int16_t* batch_vector,
+                                             int n_batch, int32_t multiplier,
+                                             int shift, int16_t* result) {
+  PortableVectorBatchVectorCwiseProductAccumulate(
+      vector, v_size, batch_vector, n_batch, multiplier, shift, result);
 }
 
 float VectorVectorDotProduct(const float* vector1, const float* vector2,
diff --git a/tensorflow/lite/kernels/internal/quantization_util_test.cc b/tensorflow/lite/kernels/internal/quantization_util_test.cc
index 132befbb020..053b3116a15 100644
--- a/tensorflow/lite/kernels/internal/quantization_util_test.cc
+++ b/tensorflow/lite/kernels/internal/quantization_util_test.cc
@@ -14,8 +14,11 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
 
+#include <limits>
+
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
+#include "tensorflow/lite/kernels/internal/common.h"
 
 namespace tflite {
 namespace {
@@ -397,6 +400,28 @@ TEST(QuantizationUtilTest, QuantizeMultiplierUnderflow) {
 }
 #endif
 
+TEST(QuantizationUtilTest, GetInvSqrtQuantizedMultiplierExp) {
+  auto inv_sqrt = [](std::int32_t input) {
+    int32_t output;
+    int output_shift;
+    GetInvSqrtQuantizedMultiplierExp(input, 1, &output, &output_shift);
+    return std::pair<int32_t, int>{output, output_shift};
+  };
+
+  const auto kInt32Max = std::numeric_limits<std::int32_t>::max();
+  EXPECT_THAT(inv_sqrt(0), Pair(kInt32Max, 0));
+  EXPECT_THAT(inv_sqrt(1), Pair(kInt32Max, 0));
+  EXPECT_THAT(inv_sqrt(2), Pair(1518498372, 0));
+  EXPECT_THAT(inv_sqrt(3), Pair(1239850284, 0));
+  EXPECT_THAT(inv_sqrt(4), Pair(1073741828, 0));
+  EXPECT_THAT(inv_sqrt(100), Pair(214748363, 0));
+  EXPECT_THAT(inv_sqrt(10000), Pair(343597361, 4));
+  EXPECT_THAT(inv_sqrt(1000000), Pair(274877901, 7));
+  EXPECT_THAT(inv_sqrt(100000000), Pair(219902323, 10));
+  EXPECT_THAT(inv_sqrt((1 << 30)), Pair(268435457, 12));
+  EXPECT_THAT(inv_sqrt(kInt32Max), Pair(189812531, 12));
+}
+
 TEST(QuantizationUtilTest, PreprocessSoftmaxScaling) {
   auto quantize = [](double beta, double scale, int integer_bits) {
     int32_t q;
diff --git a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
index 1ba34d45987..1b36144cdff 100644
--- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
+++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.cc
@@ -394,22 +394,10 @@ void PortableApplySigmoid(const int16_t* input, int32_t n_batch,
   }
 }
 
-void PortableApplyTanh0(const int16_t* input, int32_t n_batch, int32_t n_input,
-                        int16_t* output) {
-  using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
-  for (int batch = 0; batch < n_batch; ++batch) {
-    for (int i = 0; i < n_input; ++i) {
-      const int index = batch * n_input + i;
-      F0 tanh_input = F0::FromRaw(input[index]);
-      F0 tanh_output = gemmlowp::tanh(tanh_input);
-      output[index] = tanh_output.raw();
-    }
-  }
-}
-
-void PortableApplyTanh3(const int16_t* input, int32_t n_batch, int32_t n_input,
-                        int16_t* output) {
-  using FX = gemmlowp::FixedPoint<std::int16_t, 3>;
+template <int IntegerBits>
+void PortableApplyTanhImpl(const int16_t* input, int32_t n_batch,
+                           int32_t n_input, int16_t* output) {
+  using FX = gemmlowp::FixedPoint<std::int16_t, IntegerBits>;
   using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
   for (int batch = 0; batch < n_batch; ++batch) {
     for (int i = 0; i < n_input; ++i) {
@@ -421,18 +409,25 @@ void PortableApplyTanh3(const int16_t* input, int32_t n_batch, int32_t n_input,
   }
 }
 
-void PortableApplyTanh4(const int16_t* input, int32_t n_batch, int32_t n_input,
-                        int16_t* output) {
-  using FX = gemmlowp::FixedPoint<std::int16_t, 4>;
-  using F0 = gemmlowp::FixedPoint<std::int16_t, 0>;
-  for (int batch = 0; batch < n_batch; ++batch) {
-    for (int i = 0; i < n_input; ++i) {
-      const int index = batch * n_input + i;
-      FX tanh_input = FX::FromRaw(input[index]);
-      F0 tanh_output = gemmlowp::tanh(tanh_input);
-      output[index] = tanh_output.raw();
-    }
+void PortableApplyTanh(int32_t integer_bits, const int16_t* input,
+                       int32_t n_batch, int32_t n_input, int16_t* output) {
+  assert(integer_bits <= 6);
+#define DISPATCH_TANH(i)                                       \
+  case i:                                                      \
+    PortableApplyTanhImpl<i>(input, n_batch, n_input, output); \
+    break;
+  switch (integer_bits) {
+    DISPATCH_TANH(0);
+    DISPATCH_TANH(1);
+    DISPATCH_TANH(2);
+    DISPATCH_TANH(3);
+    DISPATCH_TANH(4);
+    DISPATCH_TANH(5);
+    DISPATCH_TANH(6);
+    default:
+      return;
   }
+#undef DISPATCH_TANH
 }
 
 void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
@@ -513,7 +508,7 @@ void PortableVectorVectorCwiseProduct(const float* vector1,
                                       const float* vector2, int v_size,
                                       float* result) {
   for (int v = 0; v < v_size; v++) {
-    *result++ = *vector1++ * *vector2++;
+    result[v] = vector1[v] * vector2[v];
   }
 }
 
@@ -554,28 +549,20 @@ void PortableVectorVectorCwiseProductAccumulate(const float* vector1,
                                                 const float* vector2,
                                                 int v_size, float* result) {
   for (int v = 0; v < v_size; v++) {
-    *result++ += *vector1++ * *vector2++;
+    result[v] += vector1[v] * vector2[v];
   }
 }
 
-void PortableVectorBatchVectorCwiseProduct(const float* vector, int v_size,
-                                           const float* batch_vector,
-                                           int n_batch, float* result) {
+void PortableVectorBatchVectorCwiseProductAccumulate(
+    const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
+    int32_t multiplier, int shift, int16_t* result) {
   for (int b = 0; b < n_batch; b++) {
     for (int v = 0; v < v_size; v++) {
-      *result++ = vector[v] * *batch_vector++;
-    }
-  }
-}
-
-void PortableVectorBatchVectorCwiseProductAccumulate(const float* vector,
-                                                     int v_size,
-                                                     const float* batch_vector,
-                                                     int n_batch,
-                                                     float* result) {
-  for (int b = 0; b < n_batch; b++) {
-    for (int v = 0; v < v_size; v++) {
-      *result++ += vector[v] * *batch_vector++;
+      int32_t prod = vector[v] * *batch_vector++;
+      prod = MultiplyByQuantizedMultiplier(prod, multiplier, shift);
+      int32_t output = prod + *result;
+      output = std::max(std::min(32767, output), -32768);
+      *result++ = output;
     }
   }
 }
diff --git a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h
index 587501fe2cb..f3f41f704e3 100644
--- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils.h
@@ -144,19 +144,9 @@ void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
   PortableApplySigmoid(input, n_batch, n_input, output);
 }
 
-void ApplyTanh0(const int16_t* input, int32_t n_batch, int32_t n_input,
-                int16_t* output) {
-  PortableApplyTanh0(input, n_batch, n_input, output);
-}
-
-void ApplyTanh3(const int16_t* input, int32_t n_batch, int32_t n_input,
-                int16_t* output) {
-  PortableApplyTanh3(input, n_batch, n_input, output);
-}
-
-void ApplyTanh4(const int16_t* input, int32_t n_batch, int32_t n_input,
-                int16_t* output) {
-  PortableApplyTanh4(input, n_batch, n_input, output);
+void ApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch,
+               int32_t n_input, int16_t* output) {
+  PortableApplyTanh(integer_bits, input, n_batch, n_input, output);
 }
 
 void CwiseMul(const int16_t* input_1, const int16_t* input_2, int n_batch,
@@ -197,18 +187,12 @@ void VectorVectorCwiseProductAccumulate(const float* vector1,
   PortableVectorVectorCwiseProductAccumulate(vector1, vector2, v_size, result);
 }
 
-void VectorBatchVectorCwiseProduct(const float* vector, int v_size,
-                                   const float* batch_vector, int n_batch,
-                                   float* result) {
-  PortableVectorBatchVectorCwiseProduct(vector, v_size, batch_vector, n_batch,
-                                        result);
-}
-
-void VectorBatchVectorCwiseProductAccumulate(const float* vector, int v_size,
-                                             const float* batch_vector,
-                                             int n_batch, float* result) {
-  PortableVectorBatchVectorCwiseProductAccumulate(vector, v_size, batch_vector,
-                                                  n_batch, result);
+void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
+                                             const int16_t* batch_vector,
+                                             int n_batch, int32_t multiplier,
+                                             int shift, int16_t* result) {
+  PortableVectorBatchVectorCwiseProductAccumulate(
+      vector, v_size, batch_vector, n_batch, multiplier, shift, result);
 }
 
 float VectorVectorDotProduct(const float* vector1, const float* vector2,
diff --git a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
index 954ef6716b6..0398edfa181 100644
--- a/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
+++ b/tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h
@@ -99,19 +99,9 @@ void PortableBatchVectorBatchVectorDotProduct(const int16_t* vector1,
                                               int32_t* result,
                                               int result_stride);
 
-// Cwise product of a vector and a batch-vector.
-void PortableVectorBatchVectorCwiseProduct(const float* vector, int v_size,
-                                           const float* batch_vector,
-                                           int n_batch, float* result);
-
-// Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC
-// operation, the assumption here is that result array is initialized to valid
-// values.
-void PortableVectorBatchVectorCwiseProductAccumulate(const float* vector,
-                                                     int v_size,
-                                                     const float* batch_vector,
-                                                     int n_batch,
-                                                     float* result);
+void PortableVectorBatchVectorCwiseProductAccumulate(
+    const int16_t* vector, int v_size, const int16_t* batch_vector, int n_batch,
+    int32_t multiplier, int shift, int16_t* result);
 
 void PortableMatrixBatchVectorMultiplyAccumulate(
     const int8_t* input, const int32_t* bias,
@@ -138,14 +128,8 @@ void PortableApplyLayerNorm(const int16_t* input,
 void PortableApplySigmoid(const int16_t* input, int32_t n_batch,
                           int32_t n_input, int16_t* output);
 
-void PortableApplyTanh0(const int16_t* input, int32_t n_batch, int32_t n_input,
-                        int16_t* output);
-
-void PortableApplyTanh3(const int16_t* input, int32_t n_batch, int32_t n_input,
-                        int16_t* output);
-
-void PortableApplyTanh4(const int16_t* input, int32_t n_batch, int32_t n_input,
-                        int16_t* output);
+void PortableApplyTanh(int32_t integer_bits, const int16_t* input,
+                       int32_t n_batch, int32_t n_input, int16_t* output);
 
 void PortableCwiseMul(const int16_t* input_1, const int16_t* input_2,
                       int n_batch, int n_input, int shift, int16_t* output);
diff --git a/tensorflow/lite/kernels/internal/tensor_utils.h b/tensorflow/lite/kernels/internal/tensor_utils.h
index fccd058bea5..76162e3d976 100644
--- a/tensorflow/lite/kernels/internal/tensor_utils.h
+++ b/tensorflow/lite/kernels/internal/tensor_utils.h
@@ -233,33 +233,15 @@ void ApplySigmoid(const int16_t* input, int32_t n_batch, int32_t n_input,
 
 // Apply Tanh to a quantized vector.
 // Parameters:
+//     - integer_bits: the integer bits of the input.
+//                     Currently supports 0, 1, 2, 3, 4, 5, 6.
 //     - input: batch vector of size n_batch * n_input; 16 bit.
 //     - n_batch: the number of batches.
 //     - n_input: the size for input and output.
 //     - output:  the 16 bit output
-// The input is in Q0.15 format and the output is in Q0.15 format.
-void ApplyTanh0(const int16_t* input, int32_t n_batch, int32_t n_input,
-                int16_t* output);
-
-// Apply Tanh to a quantized vector.
-// Parameters:
-//     - input: batch vector of size n_batch * n_input; 16 bit.
-//     - n_batch: the number of batches.
-//     - n_input: the size for input and output.
-//     - output:  the 16 bit output
-// The input is in Q3.12 format and the output is in Q0.15 format.
-void ApplyTanh3(const int16_t* input, int32_t n_batch, int32_t n_input,
-                int16_t* output);
-
-// Apply Tanh to a quantized vector.
-// Parameters:
-//     - input: batch vector of size n_batch * n_input; 16 bit.
-//     - n_batch: the number of batches.
-//     - n_input: the size for input and output.
-//     - output:  the 16 bit output
-// The input is in Q4.11 format and the output is in Q0.15 format.
-void ApplyTanh4(const int16_t* input, int32_t n_batch, int32_t n_input,
-                int16_t* output);
+// The input is in Qm.15-m format and the output is in Q0.15 format.
+void ApplyTanh(int32_t integer_bits, const int16_t* input, int32_t n_batch,
+               int32_t n_input, int16_t* output);
 
 // Element-wise multiplication of two quantized vectors.
 // Parameters:
@@ -379,16 +361,38 @@ void BatchVectorBatchVectorDotProduct(const int16_t* vector1,
                                       int result_stride);
 
 // Cwise product of a vector and a batch-vector.
-void VectorBatchVectorCwiseProduct(const float* vector, int v_size,
-                                   const float* batch_vector, int n_batch,
-                                   float* result);
+template <typename T>
+inline void VectorBatchVectorCwiseProduct(const T* vector, int v_size,
+                                          const T* batch_vector, int n_batch,
+                                          T* result) {
+  for (int b = 0; b < n_batch; b++) {
+    VectorVectorCwiseProduct(vector, batch_vector, v_size, result);
+    // Update the pointers.
+    result += v_size;
+    batch_vector += v_size;
+  }
+}
 
 // Cwise product and accumulate of a vector and a batch-vector. Since it's a MAC
 // operation, the assumption here is that result array is initialized to valid
 // values.
-void VectorBatchVectorCwiseProductAccumulate(const float* vector, int v_size,
-                                             const float* batch_vector,
-                                             int n_batch, float* result);
+template <typename T>
+inline void VectorBatchVectorCwiseProductAccumulate(const T* vector, int v_size,
+                                                    const T* batch_vector,
+                                                    int n_batch, T* result) {
+  for (int b = 0; b < n_batch; b++) {
+    VectorVectorCwiseProductAccumulate(vector, batch_vector, v_size, result);
+    // Update the pointers.
+    result += v_size;
+    batch_vector += v_size;
+  }
+}
+
+// Same as above, but inputs are 16bit integer and output is 16bit integer.
+void VectorBatchVectorCwiseProductAccumulate(const int16_t* vector, int v_size,
+                                             const int16_t* batch_vector,
+                                             int n_batch, int32_t multiplier,
+                                             int shift, int16_t* result);
 
 // Add another vector for each batch in the batch vector.
 void VectorBatchVectorAdd(const float* vector, int v_size, int n_batch,
diff --git a/tensorflow/lite/kernels/internal/tensor_utils_test.cc b/tensorflow/lite/kernels/internal/tensor_utils_test.cc
index a36e4fc4247..e6b76ee19a9 100644
--- a/tensorflow/lite/kernels/internal/tensor_utils_test.cc
+++ b/tensorflow/lite/kernels/internal/tensor_utils_test.cc
@@ -524,7 +524,7 @@ TEST(uKernels, QuantTanh0Test) {
       653,  -29, -53,  1058, -52, -164, -149, -635, 201,  -1297,
   };
   std::vector<int16_t> output(4 * 15, 0);
-  ApplyTanh0(input.data(), 4, 15, output.data());
+  ApplyTanh(0, input.data(), 4, 15, output.data());
   const std::vector<int16_t> expected_output = {
       -136, 904, -176, -40,  260, 292,  8,    28,   -44,  -1304,
       -120, 120, -24,  112,  376, -576, -308, 88,   -544, 544,
@@ -547,7 +547,7 @@ TEST(uKernels, QuantTanh3Test) {
       653,  -29, -53,  1058, -52, -164, -149, -635, 201,  -1297,
   };
   std::vector<int16_t> output(4 * 15, 0);
-  ApplyTanh3(input.data(), 4, 15, output.data());
+  ApplyTanh(3, input.data(), 4, 15, output.data());
   const std::vector<int16_t> expected_output = {
       -1156, 7076, -1412, -276, 2104, 2308,  64,    220,   -288,  -10132,
       -964,  1016, -120,  844,  2944, -4640, -2392, 736,   -4352, 4352,
@@ -568,7 +568,7 @@ TEST(uKernels, QuantTanh4Test) {
       -26, -36, 9,   -73, 25, 14, -2, -1, 29, -10, -12, -18, -29, 51, -92,
   };
   std::vector<int16_t> output(4 * 15, 0);
-  ApplyTanh4(input.data(), 4, 15, output.data());
+  ApplyTanh(4, input.data(), 4, 15, output.data());
   const std::vector<int16_t> expected_output = {
       -76,  2596, -496, -76, 856,  1436, 24,   36,   -64,   -672,
       -120, 456,  0,    752, 2400, -412, -576, 148,  -1168, 400,
@@ -1424,7 +1424,60 @@ TEST(uKernels, Sub1VectorInt16Test) {
       }));
 }
 
-TEST(uKernels, VectorBatchVectorCwiseProductAccumulate) {
+TEST(uKernels, VectorBatchVectorCwiseProductAccumulateInteger) {
+  constexpr int kVectorSize = 29;
+  constexpr int kBatchSize = 4;
+  static int16_t vector[kVectorSize] = {-10, 9,  8,  7,  6,  5,  4,  3,  2, 1,
+                                        0,   1,  2,  3,  4,  5,  6,  7,  8, 9,
+                                        10,  11, 12, 13, 14, 15, 16, 17, 18};
+  const std::vector<int16_t> batch_vector = {
+      /* batch 0 */
+      10, 11, 12, 13, 14, 15, 16, 17, 18, -10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 1,
+      2, 3, 4, 5, 6, 7, 8, 9,
+      /* batch 1 */
+      -10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 0, 1,
+      2, 3, 4, 5, 6, 7, 8, 9,
+      /* batch 2 */
+      0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 10, 11, 12,
+      13, 14, 15, 16, 17, 18,
+      /* batch 3 */
+      0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 10, 11, 12,
+      13, 14, 15, 16, 17, 18};
+  std::vector<int16_t> batch_output = {
+      /* batch 0 */
+      -10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 0, 1,
+      2, 3, 4, 5, 6, 7, 8, 9,
+      /* batch 1 */
+      2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -10, 9, 8, 7, 6, 5,
+      4, 3, 2, 1, 10, 11, 12,
+      /* batch 2 */
+      0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 10, 11, 12,
+      13, 14, 15, 16, 17, 18,
+      /* batch 3 */
+      10, 11, 12, 13, 14, 15, 16, 17, 18, -10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 1,
+      13, 14, 15, 16, 17, 18};
+  // Test with 0.25 scale, which is decomposed into (1073741824, -1).
+  VectorBatchVectorCwiseProductAccumulate(vector, kVectorSize,
+                                          batch_vector.data(), kBatchSize,
+                                          1073741824, -1, batch_output.data());
+
+  const std::vector<int16_t> expected_output = {
+      /* batch 0 */
+      -35, 34, 32, 30, 27, 24, 20, 16, 11, -2, 10, 13, 16, 18, 19, 20, 21, 21,
+      20, 0, 4, 8, 12, 17, 23, 29, 35, 42, 50,
+      /* batch 1 */
+      27, 24, 20, 18, 15, 14, 12, 12, 1, 2, 2, 6, 10, 15, 20, 26, 32, 39, 26, 9,
+      11, 13, 15, 18, 22, 26, 30, 35, 51,
+      /* batch 2 */
+      11, 15, 4, 7, 8, 10, 10, 11, 10, 10, 8, 12, -6, 15, 14, 14, 12, 11, 8, 6,
+      27, 32, 46, 54, 61, 70, 78, 88, 97,
+      /* batch 3 */
+      17, 21, 14, 17, 18, 20, 20, 21, 20, 20, 18, -7, 13, 14, 13, 13, 11, 10, 7,
+      5, 26, 31, 37, 56, 63, 72, 80, 90, 99};
+  EXPECT_THAT(batch_output, testing::ElementsAreArray(expected_output));
+}
+
+TEST(uKernels, VectorBatchVectorCwiseProductAccumulateFloat) {
   constexpr int kVectorSize = 29;
   constexpr int kBatchSize = 4;
   static float input[kVectorSize] = {
diff --git a/tensorflow/lite/kernels/lstm.cc b/tensorflow/lite/kernels/lstm.cc
index 0072faba358..678864cec60 100644
--- a/tensorflow/lite/kernels/lstm.cc
+++ b/tensorflow/lite/kernels/lstm.cc
@@ -289,8 +289,7 @@ TfLiteStatus PopulateQuantizedLstmParams(
       &context->tensors[op_data->cell_state_tensor_index];
   TF_LITE_ENSURE(context, CheckedLog2(cell_state->params.scale, &cell_scale));
 
-  // TODO(jianlijianli): remove this check once kernel has better tanh support.
-  TF_LITE_ENSURE(context, cell_scale == -11 || cell_scale == -15);
+  TF_LITE_ENSURE(context, cell_scale <= -9);
   quantized_lstm_param->cell_scale = cell_scale;
   input_scale = input->params.scale;
 
diff --git a/tensorflow/lite/kernels/lstm_eval.cc b/tensorflow/lite/kernels/lstm_eval.cc
index ba631a6ee24..a18af287b7d 100644
--- a/tensorflow/lite/kernels/lstm_eval.cc
+++ b/tensorflow/lite/kernels/lstm_eval.cc
@@ -702,8 +702,10 @@ inline void LstmStepWithAuxInput(
                                      forget_gate_scratch);
 
   // For each batch and cell: update the cell.
-  tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, cell_state_ptr,
-                                         n_batch * n_cell, cell_state_ptr);
+  if (!is_cell_state_all_zeros) {
+    tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, cell_state_ptr,
+                                           n_batch * n_cell, cell_state_ptr);
+  }
   if (is_layer_norm_lstm) {
     tensor_utils::MeanStddevNormalization(cell_scratch, cell_scratch, n_cell,
                                           n_batch);
@@ -1077,7 +1079,7 @@ inline void LstmStepQuantized(
                                  n_batch, n_cell, scratch_2_ptr);
   }
 
-  tensor_utils::ApplyTanh3(scratch_2_ptr, n_batch, n_cell, scratch_2_ptr);
+  tensor_utils::ApplyTanh(3, scratch_2_ptr, n_batch, n_cell, scratch_2_ptr);
 
   // Ouptut gate.
   tensor_utils::MatrixBatchVectorMultiplyAccumulate(
@@ -1139,12 +1141,8 @@ inline void LstmStepQuantized(
     tensor_utils::CwiseClipping(cell_ptr, quantized_cell_clip, n_batch, n_cell);
   }
 
-  // TODO(jianlijianli): swtich to a tempalte.
-  if (cell_scale == -11) {
-    tensor_utils::ApplyTanh4(cell_ptr, n_batch, n_cell, scratch_0_ptr);
-  } else if (cell_scale == -15) {
-    tensor_utils::ApplyTanh0(cell_ptr, n_batch, n_cell, scratch_0_ptr);
-  }
+  tensor_utils::ApplyTanh(15 + cell_scale, cell_ptr, n_batch, n_cell,
+                          scratch_0_ptr);
 
   tensor_utils::CwiseMul(scratch_3_ptr, scratch_0_ptr, effective_hidden_scale_a,
                          effective_hidden_scale_b, n_batch, n_cell, hidden_zp,
diff --git a/tensorflow/lite/kernels/transpose_conv.cc b/tensorflow/lite/kernels/transpose_conv.cc
index 0c62c305c0f..114b9ae48f4 100644
--- a/tensorflow/lite/kernels/transpose_conv.cc
+++ b/tensorflow/lite/kernels/transpose_conv.cc
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+
 #include <cassert>
 #include <cmath>
 #include <cstdio>
@@ -23,6 +24,8 @@ limitations under the License.
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/cpu_backend_context.h"
 #include "tensorflow/lite/kernels/eigen_support.h"
+// NOLINTNEXTLINE - This header file should't go to the top.
+#include "tensorflow/lite/kernels/internal/optimized/integer_ops/transpose_conv.h"
 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
 // NOLINTNEXTLINE - This header file should't go to the top.
 #include "tensorflow/lite/kernels/internal/reference/integer_ops/transpose_conv.h"
@@ -422,6 +425,7 @@ void EvalQuantized(TfLiteContext* context,
   }
 }
 
+template <KernelType kernel_type>
 void EvalQuantizedPerChannel(TfLiteContext* context,
                              const TfLiteTransposeConvParams* params,
                              OpData* data, const TfLiteTensor* input,
@@ -444,15 +448,29 @@ void EvalQuantizedPerChannel(TfLiteContext* context,
   op_params.quantized_activation_min = data->output_activation_min;
   op_params.quantized_activation_max = data->output_activation_max;
 
-  // TODO(b/143380105): Need to add optimized kernel for int8 quantized
-  // transpose conv.
-  reference_integer_ops::TransposeConv(
-      op_params, data->per_channel_output_multiplier.data(),
-      data->per_channel_output_shift.data(), GetTensorShape(input),
-      GetTensorData<int8>(input), GetTensorShape(weights),
-      GetTensorData<int8>(weights), GetTensorShape(output),
-      GetTensorData<int8>(output), GetTensorShape(col2im),
-      GetTensorData<int8>(col2im), GetTensorData<int32_t>(scratch_buffer));
+  switch (kernel_type) {
+    case kReference: {
+      reference_integer_ops::TransposeConv(
+          op_params, data->per_channel_output_multiplier.data(),
+          data->per_channel_output_shift.data(), GetTensorShape(input),
+          GetTensorData<int8>(input), GetTensorShape(weights),
+          GetTensorData<int8>(weights), GetTensorShape(output),
+          GetTensorData<int8>(output), GetTensorShape(col2im),
+          GetTensorData<int8>(col2im), GetTensorData<int32_t>(scratch_buffer));
+      break;
+    }
+    case kGenericOptimized: {
+      optimized_integer_ops::TransposeConvV2(
+          op_params, data->per_channel_output_multiplier.data(),
+          data->per_channel_output_shift.data(), GetTensorShape(input),
+          GetTensorData<int8>(input), GetTensorShape(transposed_weights),
+          GetTensorData<int8>(transposed_weights), GetTensorShape(output),
+          GetTensorData<int8>(output), GetTensorShape(col2im),
+          GetTensorData<int32>(col2im), GetTensorData<int32>(scratch_buffer),
+          CpuBackendContext::GetFromContext(context));
+      break;
+    }
+  }
 }
 
 template <KernelType kernel_type>
@@ -535,9 +553,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       if (data->weights_are_transposed && !IsConstantTensor(weights)) {
         ResizeAndTransposeWeights(context, weights, transposed_weights);
       }
-      EvalQuantizedPerChannel(context, params, data, input, weights,
-                              transposed_weights, col2im, output,
-                              scratch_buffer);
+      EvalQuantizedPerChannel<kernel_type>(context, params, data, input,
+                                           weights, transposed_weights, col2im,
+                                           output, scratch_buffer);
       break;
     }
     default:
diff --git a/tensorflow/lite/model.cc b/tensorflow/lite/model.cc
index d060289a0ee..a4287a57ea0 100644
--- a/tensorflow/lite/model.cc
+++ b/tensorflow/lite/model.cc
@@ -416,6 +416,77 @@ TfLiteStatus InterpreterBuilder::ParseQuantization(
   return kTfLiteOk;
 }
 
+// TODO(b/145614687): Add sparse tensor verification check in
+// lite/tools/verifier.cc.
+TfLiteStatus InterpreterBuilder::ParseSparsity(
+    const SparsityParameters* src_sparsity, TfLiteSparsity** sparsity_ptr) {
+  if (!src_sparsity) {
+    return kTfLiteOk;
+  }
+
+  auto* sparsity =
+      reinterpret_cast<TfLiteSparsity*>(malloc(sizeof(TfLiteSparsity)));
+  memset(sparsity, 0, sizeof(TfLiteSparsity));
+  *sparsity_ptr = sparsity;
+
+  if (src_sparsity->traversal_order()) {
+    const size_t traversal_order_size = src_sparsity->traversal_order()->size();
+    sparsity->traversal_order = TfLiteIntArrayCreate(traversal_order_size);
+    for (int i = 0; i < traversal_order_size; i++) {
+      sparsity->traversal_order->data[i] =
+          src_sparsity->traversal_order()->Get(i);
+    }
+  }
+
+  if (src_sparsity->block_map()) {
+    const size_t block_map_size = src_sparsity->block_map()->size();
+    sparsity->block_map = TfLiteIntArrayCreate(block_map_size);
+    for (int i = 0; i < block_map_size; i++) {
+      sparsity->block_map->data[i] = src_sparsity->block_map()->Get(i);
+    }
+  }
+
+  if (src_sparsity->dim_metadata()) {
+    const size_t dim_metadata_size = src_sparsity->dim_metadata()->size();
+    sparsity->dim_metadata_size = dim_metadata_size;
+    sparsity->dim_metadata = reinterpret_cast<TfLiteDimensionMetadata*>(
+        malloc(dim_metadata_size * sizeof(TfLiteDimensionMetadata)));
+    memset(sparsity->dim_metadata, 0,
+           dim_metadata_size * sizeof(TfLiteDimensionMetadata));
+
+    for (int i = 0; i < dim_metadata_size; i++) {
+      const auto* src_metadata = src_sparsity->dim_metadata()->Get(i);
+      auto* tgt_metadata = &sparsity->dim_metadata[i];
+
+      tgt_metadata->format =
+          static_cast<TfLiteDimensionType>(src_metadata->format());
+
+      if (tgt_metadata->format == kTfLiteDimDense) {
+        tgt_metadata->dense_size = src_metadata->dense_size();
+      } else if (tgt_metadata->format == kTfLiteDimSparseCSR) {
+        const int array_segments_size = src_metadata->array_segments()->size();
+        tgt_metadata->array_segments =
+            TfLiteIntArrayCreate(array_segments_size);
+        for (int j = 0; j < array_segments_size; j++) {
+          tgt_metadata->array_segments->data[j] =
+              src_metadata->array_segments()->Get(j);
+        }
+        const int array_indices_size = src_metadata->array_indices()->size();
+        tgt_metadata->array_indices = TfLiteIntArrayCreate(array_indices_size);
+        for (int j = 0; j < array_indices_size; j++) {
+          tgt_metadata->array_indices->data[j] =
+              src_metadata->array_indices()->Get(j);
+        }
+      } else {
+        error_reporter_->Report("Unsupported dimension type.");
+        return kTfLiteError;
+      }
+    }
+  }
+
+  return kTfLiteOk;
+}
+
 TfLiteStatus InterpreterBuilder::ParseTensors(
     const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers,
     const flatbuffers::Vector<flatbuffers::Offset<Tensor>>* tensors,
@@ -474,6 +545,13 @@ TfLiteStatus InterpreterBuilder::ParseTensors(
       continue;
     }
 
+    const auto* src_sparsity = tensor->sparsity();
+    TfLiteSparsity* sparsity = nullptr;
+    if (ParseSparsity(src_sparsity, &sparsity) != kTfLiteOk) {
+      status = kTfLiteError;
+      continue;
+    }
+
     bool is_variable = tensor->is_variable();
     if (buffer_ptr) {
       if (is_variable) {
@@ -486,12 +564,13 @@ TfLiteStatus InterpreterBuilder::ParseTensors(
 
       if (subgraph->SetTensorParametersReadOnly(
               i, type, get_name(tensor), dims, quantization, buffer_ptr,
-              buffer_size, allocation_) != kTfLiteOk) {
+              buffer_size, allocation_, sparsity) != kTfLiteOk) {
         error_reporter_->Report("Tensor %d is invalidly specified in schema.\n",
                                 i);
         status = kTfLiteError;
       }
     } else {
+      // TODO(b/144999664): Non-constant sparse tensor is not supported now.
       if (subgraph->SetTensorParametersReadWrite(i, type, get_name(tensor),
                                                  dims, quantization,
                                                  is_variable) != kTfLiteOk) {
diff --git a/tensorflow/lite/model.h b/tensorflow/lite/model.h
index fafb38ffd10..b8b4b4457da 100644
--- a/tensorflow/lite/model.h
+++ b/tensorflow/lite/model.h
@@ -223,6 +223,8 @@ class InterpreterBuilder {
   TfLiteStatus ParseQuantization(const QuantizationParameters* src_quantization,
                                  TfLiteQuantization* quantization,
                                  const std::vector<int>& dims);
+  TfLiteStatus ParseSparsity(const SparsityParameters* src_sparsity,
+                             TfLiteSparsity** sparsity);
 
   const ::tflite::Model* model_;
   const OpResolver& op_resolver_;
diff --git a/tensorflow/lite/model_test.cc b/tensorflow/lite/model_test.cc
index 7dc582b8862..2675715a613 100644
--- a/tensorflow/lite/model_test.cc
+++ b/tensorflow/lite/model_test.cc
@@ -331,6 +331,78 @@ TEST(BasicFlatBufferModel, TestReadRuntimeVersionFromModel) {
   ASSERT_EQ(model2->GetMinimumRuntime(), "1.10.0");
 }
 
+// The test model has the following tensor encoded in the TACO format:
+// [[1, 0, 2, 3],
+//  [0, 4, 0, 0],
+//  [0, 0, 5, 0],
+//  [0, 0, 0, 6]].
+// TACO supports multiple encodings like CSR, CSC, etc. We chose to use the one
+// similar to the blocked-CSR format with 2x2 row-major dense blocks.
+TEST(BasicFlatBufferModel, TestParseModelWithSparseTensor) {
+  // The model only has 1 sparse constant tensor.
+  auto model = FlatBufferModel::BuildFromFile(
+      "tensorflow/lite/testdata/sparse_tensor.bin");
+  ASSERT_TRUE(model);
+
+  std::unique_ptr<Interpreter> interpreter(new Interpreter);
+  ASSERT_EQ(InterpreterBuilder(*model, TrivialResolver())(&interpreter),
+            kTfLiteOk);
+  ASSERT_NE(interpreter, nullptr);
+  ASSERT_EQ(interpreter->tensors_size(), 1);
+  TfLiteTensor* t1 = interpreter->tensor(0);
+  ASSERT_EQ(t1->allocation_type, kTfLiteMmapRo);
+
+  TfLiteIntArray* traversal_order = TfLiteIntArrayCreate(4);
+  traversal_order->data[0] = 0;
+  traversal_order->data[1] = 1;
+  traversal_order->data[2] = 2;
+  traversal_order->data[3] = 3;
+  ASSERT_TRUE(
+      TfLiteIntArrayEqual(t1->sparsity->traversal_order, traversal_order));
+  TfLiteIntArrayFree(traversal_order);
+
+  TfLiteIntArray* block_map = TfLiteIntArrayCreate(2);
+  block_map->data[0] = 0;
+  block_map->data[1] = 1;
+  ASSERT_TRUE(TfLiteIntArrayEqual(t1->sparsity->block_map, block_map));
+  TfLiteIntArrayFree(block_map);
+
+  ASSERT_EQ(t1->sparsity->dim_metadata_size, 4);
+
+  ASSERT_EQ(t1->sparsity->dim_metadata[0].format, kTfLiteDimDense);
+  ASSERT_EQ(t1->sparsity->dim_metadata[0].dense_size, 2);
+  ASSERT_EQ(t1->sparsity->dim_metadata[0].array_segments, nullptr);
+  ASSERT_EQ(t1->sparsity->dim_metadata[0].array_indices, nullptr);
+
+  ASSERT_EQ(t1->sparsity->dim_metadata[1].format, kTfLiteDimSparseCSR);
+  ASSERT_EQ(t1->sparsity->dim_metadata[1].dense_size, 0);
+  TfLiteIntArray* array_segments = TfLiteIntArrayCreate(3);
+  array_segments->data[0] = 0;
+  array_segments->data[1] = 2;
+  array_segments->data[2] = 3;
+  ASSERT_TRUE(TfLiteIntArrayEqual(t1->sparsity->dim_metadata[1].array_segments,
+                                  array_segments));
+  TfLiteIntArrayFree(array_segments);
+
+  TfLiteIntArray* array_indices = TfLiteIntArrayCreate(3);
+  array_indices->data[0] = 0;
+  array_indices->data[1] = 1;
+  array_indices->data[2] = 1;
+  ASSERT_TRUE(TfLiteIntArrayEqual(t1->sparsity->dim_metadata[1].array_indices,
+                                  array_indices));
+  TfLiteIntArrayFree(array_indices);
+
+  ASSERT_EQ(t1->sparsity->dim_metadata[2].format, kTfLiteDimDense);
+  ASSERT_EQ(t1->sparsity->dim_metadata[2].dense_size, 2);
+  ASSERT_EQ(t1->sparsity->dim_metadata[2].array_segments, nullptr);
+  ASSERT_EQ(t1->sparsity->dim_metadata[2].array_indices, nullptr);
+
+  ASSERT_EQ(t1->sparsity->dim_metadata[3].format, kTfLiteDimDense);
+  ASSERT_EQ(t1->sparsity->dim_metadata[3].dense_size, 2);
+  ASSERT_EQ(t1->sparsity->dim_metadata[3].array_segments, nullptr);
+  ASSERT_EQ(t1->sparsity->dim_metadata[3].array_indices, nullptr);
+}
+
 // TODO(aselle): Add tests for serialization of builtin op data types.
 // These tests will occur with the evaluation tests of individual operators,
 // not here.
diff --git a/tensorflow/lite/models/smartreply/BUILD b/tensorflow/lite/models/smartreply/BUILD
index b07b07f6782..9819c804cc2 100644
--- a/tensorflow/lite/models/smartreply/BUILD
+++ b/tensorflow/lite/models/smartreply/BUILD
@@ -12,7 +12,7 @@ exports_files(["LICENSE"])
 
 gen_selected_ops(
     name = "smartreply_ops",
-    model = "@tflite_smartreply//:smartreply.tflite",
+    model = ["@tflite_smartreply//:smartreply.tflite"],
 )
 
 cc_library(
diff --git a/tensorflow/lite/nnapi/BUILD b/tensorflow/lite/nnapi/BUILD
index 0a687e83131..e26d9567337 100644
--- a/tensorflow/lite/nnapi/BUILD
+++ b/tensorflow/lite/nnapi/BUILD
@@ -57,7 +57,7 @@ cc_library(
         "//conditions:default": ["-lrt"],
     }),
     deps = [
-        ":nnapi_lib",
+        "//tensorflow/lite/nnapi:nnapi_lib",
     ],
 )
 
@@ -76,29 +76,7 @@ cc_test(
     name = "nnapi_implementation_test",
     srcs = ["nnapi_implementation_test.cc"],
     deps = [
-        ":nnapi_implementation",
-        "@com_google_googletest//:gtest_main",
-    ],
-)
-
-cc_library(
-    name = "nnapi_handler",
-    srcs = ["nnapi_handler.cc"],
-    hdrs = ["nnapi_handler.h"],
-    deps = [
-        ":nnapi_implementation",
-        ":nnapi_lib",
-        "//tensorflow/core/platform:logging",
-        "//tensorflow/lite:framework",
-    ],
-)
-
-cc_test(
-    name = "nnapi_handler_test",
-    srcs = ["nnapi_handler_test.cc"],
-    deps = [
-        ":nnapi_handler",
-        ":nnapi_implementation",
+        "//tensorflow/lite/nnapi:nnapi_implementation",
         "@com_google_googletest//:gtest_main",
     ],
 )
diff --git a/tensorflow/lite/nnapi/NeuralNetworksTypes.h b/tensorflow/lite/nnapi/NeuralNetworksTypes.h
index b4ec12ee14d..8c99f6f25bb 100644
--- a/tensorflow/lite/nnapi/NeuralNetworksTypes.h
+++ b/tensorflow/lite/nnapi/NeuralNetworksTypes.h
@@ -18,6 +18,8 @@ limitations under the License.
 #include <stdint.h>
 #include <stdio.h>
 
+#include <string>
+
 typedef struct AHardwareBuffer AHardwareBuffer;
 
 // NN api types based on NNAPI header file
@@ -159,6 +161,7 @@ enum {
 /**
  * Result codes.
  */
+// LINT.IfChange
 enum {
   ANEURALNETWORKS_NO_ERROR = 0,
   ANEURALNETWORKS_OUT_OF_MEMORY = 1,
@@ -171,6 +174,7 @@ enum {
   ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE = 8,
   ANEURALNETWORKS_UNAVAILABLE_DEVICE = 9,
 };
+// LINT.ThenChange(//tensorflow/lite/delegates/nnapi/nnapi_delegate.cc:NnApiErrorDescription)
 
 /**
  * Implicit padding algorithms.
diff --git a/tensorflow/lite/nnapi/nnapi_handler.cc b/tensorflow/lite/nnapi/nnapi_handler.cc
deleted file mode 100644
index 354ad66463c..00000000000
--- a/tensorflow/lite/nnapi/nnapi_handler.cc
+++ /dev/null
@@ -1,44 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/nnapi/nnapi_handler.h"
-
-#include <cstdio>
-
-#include "tensorflow/lite/nnapi/nnapi_implementation.h"
-
-namespace tflite {
-namespace nnapi {
-
-const NnApi* NnApiPassthroughInstance() {
-  static const NnApi orig_nnapi_copy = *NnApiImplementation();
-  return &orig_nnapi_copy;
-}
-
-// static
-NnApiHandler* NnApiHandler::Instance() {
-  // Ensuring that the original copy of nnapi is saved before we return
-  // access to NnApiHandler
-  NnApiPassthroughInstance();
-  static NnApiHandler handler{const_cast<NnApi*>(NnApiImplementation())};
-  return &handler;
-}
-
-void NnApiHandler::Reset() {
-  // Restores global NNAPI to original value
-  *nnapi_ = *NnApiPassthroughInstance();
-}
-
-}  // namespace nnapi
-}  // namespace tflite
diff --git a/tensorflow/lite/nnapi/nnapi_handler.h b/tensorflow/lite/nnapi/nnapi_handler.h
deleted file mode 100644
index 70406ba2c6e..00000000000
--- a/tensorflow/lite/nnapi/nnapi_handler.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#ifndef TENSORFLOW_LITE_NNAPI_NNAPI_HANDLER_H_
-#define TENSORFLOW_LITE_NNAPI_NNAPI_HANDLER_H_
-
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/lite/nnapi/nnapi_implementation.h"
-
-namespace tflite {
-namespace nnapi {
-
-// Offers an interface to alter the behaviour of the NNAPI instance.
-// As for NNAPI, it is designed to be a singleton.
-// It allows to change the behaviour of some of the methods with some stub
-// implementation and then to reset the behavior to the original one using
-// Reset().
-//
-class NnApiHandler {
- public:
-  // No destructor defined to allow this class to be used as singleton.
-
-  // Factory method, only one instance per process/jni library.
-  static NnApiHandler* Instance();
-
-  // Makes the current object a transparent proxy again, resetting any
-  // applied changes to its methods.
-  void Reset();
-
-  // Using templates in the ...Returns methods because the functions need to be
-  // stateless and the template generated code is more readable than using a
-  // file-local variable in the method implementation to store the configured
-  // result.
-
-  template <int Value>
-  void GetDeviceCountReturns() {
-    nnapi_->ANeuralNetworks_getDeviceCount = [](uint32_t* numDevices) -> int {
-      *numDevices = 2;
-      return Value;
-    };
-  }
-
-  void StubGetDeviceCountWith(int(stub)(uint32_t*)) {
-    nnapi_->ANeuralNetworks_getDeviceCount = stub;
-  }
-
-  template <int Value>
-  void ModelCreateReturns() {
-    nnapi_->ANeuralNetworksModel_create = [](ANeuralNetworksModel** model) {
-      *model = reinterpret_cast<ANeuralNetworksModel*>(1);
-      return Value;
-    };
-  }
-
-  template <int Value>
-  void AddOperandReturns() {
-    nnapi_->ANeuralNetworksModel_addOperand =
-        [](ANeuralNetworksModel* model,
-           const ANeuralNetworksOperandType* type) { return Value; };
-  }
-
-  template <int Value>
-  void SetOperandValueReturns() {
-    nnapi_->ANeuralNetworksModel_setOperandValue =
-        [](ANeuralNetworksModel* model, int32_t index, const void* buffer,
-           size_t length) { return Value; };
-  }
-
-  template <int Value>
-  void AddOperationReturns() {
-    nnapi_->ANeuralNetworksModel_addOperation =
-        [](ANeuralNetworksModel* model, ANeuralNetworksOperationType type,
-           uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount,
-           const uint32_t* outputs) { return Value; };
-  }
-
-  template <int Value>
-  void IdentifyInputAndOutputsReturns() {
-    nnapi_->ANeuralNetworksModel_identifyInputsAndOutputs =
-        [](ANeuralNetworksModel* model, uint32_t inputCount,
-           const uint32_t* inputs, uint32_t outputCount,
-           const uint32_t* outputs) { return Value; };
-  }
-
-  template <int Value>
-  void RelaxComputationFloatReturns() {
-    nnapi_->ANeuralNetworksModel_relaxComputationFloat32toFloat16 =
-        [](ANeuralNetworksModel* model, bool allow) { return Value; };
-  }
-
-  template <int Value>
-  void ModelFinishReturns() {
-    nnapi_->ANeuralNetworksModel_finish = [](ANeuralNetworksModel* model) {
-      return Value;
-    };
-  }
-
-  template <int Value>
-  void MemoryCreateFromFdReturns() {
-    nnapi_->ANeuralNetworksMemory_createFromFd =
-        [](size_t size, int protect, int fd, size_t offset,
-           ANeuralNetworksMemory** memory) {
-          *memory = reinterpret_cast<ANeuralNetworksMemory*>(2);
-          return Value;
-        };
-  }
-
-  template <int Value>
-  void CompilationCreateReturns() {
-    nnapi_->ANeuralNetworksCompilation_create =
-        [](ANeuralNetworksModel* model,
-           ANeuralNetworksCompilation** compilation) {
-          *compilation = reinterpret_cast<ANeuralNetworksCompilation*>(3);
-          return Value;
-        };
-  }
-
-  template <int Value>
-  void CompilationFinishReturns() {
-    nnapi_->ANeuralNetworksCompilation_finish =
-        [](ANeuralNetworksCompilation* compilation) { return Value; };
-  }
-
-  template <int Value>
-  void ExecutionCreateReturns() {
-    nnapi_->ANeuralNetworksExecution_create =
-        [](ANeuralNetworksCompilation* compilation,
-           ANeuralNetworksExecution** execution) {
-          if (compilation == nullptr) return 1;
-          *execution = reinterpret_cast<ANeuralNetworksExecution*>(4);
-          return Value;
-        };
-  }
-  template <int Value>
-  void ExecutionSetInputFromMemoryReturns() {
-    nnapi_->ANeuralNetworksExecution_setInputFromMemory =
-        [](ANeuralNetworksExecution* execution, int32_t index,
-           const ANeuralNetworksOperandType* type,
-           const ANeuralNetworksMemory* memory, size_t offset,
-           size_t length) { return Value; };
-  }
-  template <int Value>
-  void ExecutionSetOutputFromMemoryReturns() {
-    nnapi_->ANeuralNetworksExecution_setOutputFromMemory =
-        [](ANeuralNetworksExecution* execution, int32_t index,
-           const ANeuralNetworksOperandType* type,
-           const ANeuralNetworksMemory* memory, size_t offset,
-           size_t length) { return Value; };
-  }
-
-  template <int Value>
-  void ExecutionComputeReturns() {
-    nnapi_->ANeuralNetworksExecution_compute =
-        [](ANeuralNetworksExecution* execution) { return Value; };
-  }
-
- protected:
-  explicit NnApiHandler(NnApi* nnapi) : nnapi_(nnapi) { DCHECK(nnapi); }
-
-  NnApi* nnapi_;
-};
-
-// Returns a pointer to an unaltered instance of NNAPI. Is intended
-// to be used by stub methods when wanting to pass-through to original
-// implementation for example:
-//
-// NnApiTestUtility()->StubGetDeviceWith(
-//  [](uint32_t devIndex, ANeuralNetworksDevice** device) -> int {
-//        static int count = 0;
-//        if (count++ < 1) {
-//          NnApiPassthroughInstance()->ANeuralNetworks_getDevice(
-//                devIndex, device);
-//        } else {
-//            return ANEURALNETWORKS_BAD_DATA;
-//        }
-//   });
-const NnApi* NnApiPassthroughInstance();
-
-// Returns an instance of NnApiProxy that can be used to alter
-// the behaviour of the TFLite wide instance of NnApi.
-NnApiHandler* NnApiProxyInstance();
-
-}  // namespace nnapi
-}  // namespace tflite
-
-#endif  // TENSORFLOW_LITE_NNAPI_NNAPI_HANDLER_H_
diff --git a/tensorflow/lite/nnapi/nnapi_handler_test.cc b/tensorflow/lite/nnapi/nnapi_handler_test.cc
deleted file mode 100644
index aea766ef036..00000000000
--- a/tensorflow/lite/nnapi/nnapi_handler_test.cc
+++ /dev/null
@@ -1,143 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-#include "tensorflow/lite/nnapi/nnapi_handler.h"
-
-#include <cstdint>
-#include <cstdio>
-
-#include <gmock/gmock.h>
-#include <gtest/gtest.h>
-#include "tensorflow/lite/nnapi/nnapi_implementation.h"
-
-namespace tflite {
-namespace nnapi {
-
-using testing::Eq;
-using testing::Ne;
-using testing::NotNull;
-
-void ExpectEquals(const NnApi& left, const NnApi& right);
-
-class NnApiHandlerTest : public ::testing::Test {
- protected:
-  ~NnApiHandlerTest() override { NnApiHandler::Instance()->Reset(); }
-};
-
-TEST_F(NnApiHandlerTest, ShouldAlterNnApiInstanceBehaviour) {
-  const NnApi* nnapi = NnApiImplementation();
-
-  const auto device_count_stub = [](uint32_t* device_count) -> int {
-    *device_count = 999;
-    return ANEURALNETWORKS_NO_ERROR;
-  };
-
-  NnApiHandler::Instance()->StubGetDeviceCountWith(device_count_stub);
-
-  ASSERT_THAT(nnapi->ANeuralNetworks_getDeviceCount, NotNull());
-
-  uint32_t device_count = 0;
-  nnapi->ANeuralNetworks_getDeviceCount(&device_count);
-  EXPECT_THAT(device_count, Eq(999));
-}
-
-TEST_F(NnApiHandlerTest, ShouldRestoreNnApiToItsOriginalValueWithReset) {
-  NnApi nnapi_orig_copy = *NnApiImplementation();
-
-  auto device_count_override = [](uint32_t* device_count) -> int {
-    *device_count = 777;
-    return ANEURALNETWORKS_NO_ERROR;
-  };
-
-  NnApiHandler::Instance()->StubGetDeviceCountWith(device_count_override);
-
-  EXPECT_THAT(nnapi_orig_copy.ANeuralNetworks_getDeviceCount,
-              Ne(NnApiImplementation()->ANeuralNetworks_getDeviceCount));
-
-  NnApiHandler::Instance()->Reset();
-
-  ExpectEquals(nnapi_orig_copy, *NnApiImplementation());
-}
-
-int (*device_count_ptr)(uint32_t*);
-TEST_F(NnApiHandlerTest, ShouldSupportPassthroughCalls) {
-  const NnApi* nnapi = NnApiImplementation();
-  device_count_ptr = nnapi->ANeuralNetworks_getDeviceCount;
-
-  NnApiHandler::Instance()->StubGetDeviceCountWith(
-      [](uint32_t* device_count) -> int {
-        return NnApiPassthroughInstance()->ANeuralNetworks_getDeviceCount ==
-               device_count_ptr;
-      });
-
-  uint32_t device_count = 0;
-  EXPECT_THAT(nnapi->ANeuralNetworks_getDeviceCount(&device_count), Eq(1));
-}
-
-void ExpectEquals(const NnApi& left, const NnApi& right) {
-#define EXPECT_NNAPI_MEMBER_EQ(name) EXPECT_EQ(left.name, right.name)
-
-  EXPECT_NNAPI_MEMBER_EQ(nnapi_exists);
-  EXPECT_NNAPI_MEMBER_EQ(android_sdk_version);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksMemory_createFromFd);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksMemory_free);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksModel_create);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksModel_free);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksModel_finish);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksModel_addOperand);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksModel_setOperandValue);
-  EXPECT_NNAPI_MEMBER_EQ(
-      ANeuralNetworksModel_setOperandSymmPerChannelQuantParams);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksModel_setOperandValueFromMemory);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksModel_addOperation);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksModel_identifyInputsAndOutputs);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksModel_relaxComputationFloat32toFloat16);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksCompilation_create);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksCompilation_free);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksCompilation_setPreference);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksCompilation_finish);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksExecution_create);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksExecution_free);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksExecution_setInput);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksExecution_setInputFromMemory);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksExecution_setOutput);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksExecution_setOutputFromMemory);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksExecution_startCompute);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksEvent_wait);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksEvent_free);
-  EXPECT_NNAPI_MEMBER_EQ(ASharedMemory_create);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworks_getDeviceCount);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworks_getDevice);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksDevice_getName);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksDevice_getVersion);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksDevice_getFeatureLevel);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksDevice_getType);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksModel_getSupportedOperationsForDevices);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksCompilation_createForDevices);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksCompilation_setCaching);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksExecution_compute);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksExecution_getOutputOperandRank);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksExecution_getOutputOperandDimensions);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksBurst_create);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksBurst_free);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksExecution_burstCompute);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksMemory_createFromAHardwareBuffer);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksExecution_setMeasureTiming);
-  EXPECT_NNAPI_MEMBER_EQ(ANeuralNetworksExecution_getDuration);
-
-#undef EXPECT_NNAPI_MEMBER_EQ
-}
-
-}  // namespace nnapi
-}  // namespace tflite
diff --git a/tensorflow/lite/python/interpreter.py b/tensorflow/lite/python/interpreter.py
index 8a2f3f05e75..30f224e652b 100644
--- a/tensorflow/lite/python/interpreter.py
+++ b/tensorflow/lite/python/interpreter.py
@@ -157,11 +157,6 @@ def load_delegate(library, options=None):
     ValueError: Delegate failed to load.
     RuntimeError: If delegate loading is used on unsupported platform.
   """
-
-  # TODO(b/137299813): Fix darwin support for delegates.
-  if sys.platform == 'darwin':
-    raise RuntimeError('Dynamic loading of delegates on Darwin not supported.')
-
   try:
     delegate = Delegate(library, options)
   except ValueError as e:
diff --git a/tensorflow/lite/python/interpreter_test.py b/tensorflow/lite/python/interpreter_test.py
index 99c5cf1fd4b..bfe34591a02 100644
--- a/tensorflow/lite/python/interpreter_test.py
+++ b/tensorflow/lite/python/interpreter_test.py
@@ -299,17 +299,12 @@ class InterpreterDelegateTest(test_util.TensorFlowTestCase):
 
   def _TestInterpreter(self, model_path, options=None):
     """Test wrapper function that creates an interpreter with the delegate."""
-    # TODO(b/137299813): Enable when we fix for mac
-    if sys.platform == 'darwin': return
     delegate = interpreter_wrapper.load_delegate(self._delegate_file, options)
     return interpreter_wrapper.Interpreter(
         model_path=model_path, experimental_delegates=[delegate])
 
   def testDelegate(self):
     """Tests the delegate creation and destruction."""
-    # TODO(b/137299813): Enable when we fix for mac
-    if sys.platform == 'darwin': return
-
     interpreter = self._TestInterpreter(model_path=self._model_file)
     lib = interpreter._delegates[0]._library
 
@@ -324,9 +319,6 @@ class InterpreterDelegateTest(test_util.TensorFlowTestCase):
     self.assertEqual(lib.get_num_delegates_invoked(), 1)
 
   def testMultipleInterpreters(self):
-    # TODO(b/137299813): Enable when we fix for mac
-    if sys.platform == 'darwin': return
-
     delegate = interpreter_wrapper.load_delegate(self._delegate_file)
     lib = delegate._library
 
@@ -365,8 +357,6 @@ class InterpreterDelegateTest(test_util.TensorFlowTestCase):
     """Make sure internal _interpreter object is destroyed before delegate."""
     self.skipTest('TODO(b/142136355): fix flakiness and re-enable')
     # Track which order destructions were doned in
-    # TODO(b/137299813): Enable when we fix for mac
-    if sys.platform == 'darwin': return
     destructions = []
     def register_destruction(x):
       destructions.append(
@@ -396,8 +386,6 @@ class InterpreterDelegateTest(test_util.TensorFlowTestCase):
     self.assertEqual(destructions, ['interpreter', 'test_delegate'])
 
   def testOptions(self):
-    # TODO(b/137299813): Enable when we fix for mac
-    if sys.platform == 'darwin': return
     delegate_a = interpreter_wrapper.load_delegate(self._delegate_file)
     lib = delegate_a._library
 
@@ -427,8 +415,6 @@ class InterpreterDelegateTest(test_util.TensorFlowTestCase):
     self.assertEqual(lib.get_options_counter(), 2)
 
   def testFail(self):
-    # TODO(b/137299813): Enable when we fix for mac
-    if sys.platform == 'darwin': return
     with self.assertRaisesRegexp(
         ValueError, 'Failed to load delegate from .*\nFail argument sent.'):
       interpreter_wrapper.load_delegate(
diff --git a/tensorflow/lite/python/lite.py b/tensorflow/lite/python/lite.py
index 57a0f21e72e..52628218997 100644
--- a/tensorflow/lite/python/lite.py
+++ b/tensorflow/lite/python/lite.py
@@ -76,6 +76,10 @@ from tensorflow.python.util import deprecation as _deprecation
 from tensorflow.python.util.tf_export import tf_export as _tf_export
 
 
+# The default value of `experimental_new_converter`.
+_USE_EXPERIMENTAL_NEW_CONVERTER = False
+
+
 @_tf_export("lite.Optimize")
 class Optimize(enum.Enum):
   """Enum defining the optimizations to apply when generating tflite graphs.
@@ -167,7 +171,7 @@ class TFLiteConverterBase(object):
     self.target_spec = TargetSpec()
     self.optimizations = []
     self.representative_dataset = None
-    self.experimental_new_converter = False
+    self.experimental_new_converter = _USE_EXPERIMENTAL_NEW_CONVERTER
     self.experimental_new_quantizer = False
     # The 'GraphDebugInfo'  contains the stack traces of all the original nodes
     # in the `GraphDef` to the converter.
diff --git a/tensorflow/lite/python/tflite_convert.py b/tensorflow/lite/python/tflite_convert.py
index 59e43be807a..5a3e9961e5a 100644
--- a/tensorflow/lite/python/tflite_convert.py
+++ b/tensorflow/lite/python/tflite_convert.py
@@ -205,9 +205,10 @@ def _convert_tf1_model(flags):
   if flags.conversion_summary_dir:
     converter.conversion_summary_dir = flags.conversion_summary_dir
 
-  # TODO(b/145312675): Enable the new converter by default. It requires to
-  # add a new command line argument like `experimental_legacy_converter`.
-  converter.experimental_new_converter = flags.experimental_new_converter
+  if flags.experimental_new_converter:
+    converter.experimental_new_converter = True
+  if flags.experimental_legacy_converter:
+    converter.experimental_new_converter = False
 
   # Convert model.
   output_data = converter.convert()
@@ -231,9 +232,10 @@ def _convert_tf2_model(flags):
     model = keras.models.load_model(flags.keras_model_file)
     converter = lite.TFLiteConverterV2.from_keras_model(model)
 
-  # TODO(b/145312675): Enable the new converter by default. It requires to
-  # add a new command line argument like `experimental_legacy_converter`.
-  converter.experimental_new_converter = flags.experimental_new_converter
+  if flags.experimental_new_converter:
+    converter.experimental_new_converter = True
+  if flags.experimental_legacy_converter:
+    converter.experimental_new_converter = False
 
   # Convert the model.
   tflite_model = converter.convert()
@@ -308,6 +310,10 @@ def _check_tf1_flags(flags, unparsed):
                      "--experimental_new_converter")
   if flags.custom_opdefs and not flags.allow_custom_ops:
     raise ValueError("--custom_opdefs must be used with --allow_custom_ops")
+  if flags.experimental_new_converter and flags.experimental_legacy_converter:
+    raise ValueError(
+        "--experimental_new_converter and experimental_legacy_converter "
+        "cannot be used together")
 
 
 def _check_tf2_flags(flags):
@@ -322,6 +328,10 @@ def _check_tf2_flags(flags):
   if not flags.keras_model_file and not flags.saved_model_dir:
     raise ValueError("one of the arguments --saved_model_dir "
                      "--keras_model_file is required")
+  if flags.experimental_new_converter and flags.experimental_legacy_converter:
+    raise ValueError(
+        "--experimental_new_converter and experimental_legacy_converter "
+        "cannot be used together")
 
 
 def _get_tf1_flags(parser):
@@ -554,12 +564,20 @@ def _get_parser(use_v2_converter):
   else:
     _get_tf1_flags(parser)
 
-  # Enable MLIR-TFLite converter.
+  # Note: When neither of the following command line argument is passed,
+  # it will use the default behavior defined in `lite.py`.
+  # Enable MLIR-based TFLite converter.
   parser.add_argument(
       "--experimental_new_converter",
       action="store_true",
       help=("Experimental flag, subject to change. Enables MLIR-based "
             "conversion instead of TOCO conversion."))
+  # Explicitly disable the MLIR-based TFLite converter.
+  parser.add_argument(
+      "--experimental_legacy_converter",
+      action="store_true",
+      help=("Experimental flag, subject to change. Disable MLIR-based "
+            "conversion and use the legacy converter."))
   return parser
 
 
diff --git a/tensorflow/lite/python/tflite_convert_test.py b/tensorflow/lite/python/tflite_convert_test.py
index 610f5c5e98b..298b606cfe7 100644
--- a/tensorflow/lite/python/tflite_convert_test.py
+++ b/tensorflow/lite/python/tflite_convert_test.py
@@ -101,6 +101,27 @@ class TfLiteConvertV1Test(TestModels):
     self._run(flags_str, should_succeed=True)
     os.remove(graph_def_file)
 
+  # Run `tflite_convert` explicitly with the legacy converter.
+  # Before the new converter is enabled by default, this flag has no real
+  # effects.
+  def testFrozenGraphDefWithLegacyConverter(self):
+    with ops.Graph().as_default():
+      in_tensor = array_ops.placeholder(
+          shape=[1, 16, 16, 3], dtype=dtypes.float32)
+      _ = in_tensor + in_tensor
+      sess = session.Session()
+
+    # Write graph to file.
+    graph_def_file = self._getFilepath('model.pb')
+    write_graph(sess.graph_def, '', graph_def_file, False)
+    sess.close()
+
+    flags_str = ('--graph_def_file={0} --input_arrays={1} '
+                 '--output_arrays={2} --experimental_legacy_converter'.format(
+                     graph_def_file, 'Placeholder', 'add'))
+    self._run(flags_str, should_succeed=True)
+    os.remove(graph_def_file)
+
   def testFrozenGraphDefNonPlaceholder(self):
     with ops.Graph().as_default():
       in_tensor = random_ops.random_normal(shape=[1, 16, 16, 3], name='random')
diff --git a/tensorflow/lite/schema/schema.fbs b/tensorflow/lite/schema/schema.fbs
index f1fbfc655d6..7e70f986998 100644
--- a/tensorflow/lite/schema/schema.fbs
+++ b/tensorflow/lite/schema/schema.fbs
@@ -80,6 +80,82 @@ table QuantizationParameters {
   quantized_dimension:int;
 }
 
+// Sparse tensors.
+// To encode a conceptual n-dimensional dense tensor with dims (d0, ..., dn-1),
+// potentially with a k-dimensional block (0 <= k <= n) with dims
+// (dn, ..., dn+k-1), the format needs to specify:
+//   1. In what order to traverse these dimensions. For example, to store a 2-D
+//      matrix in row major order, the traversal order would be (d0, d1),
+//      whereas to store it in column major order, the traversal order would be
+//      (d1, d0). If the 2-D matrix has a 2-D inner block, the traversal order
+//      could be (d0, d1, d2, d3).
+//   2. In the order of (d0, ..., dn-1, dn, ..., dn+k-1), whether each dimension
+//      is DENSE or SPARSE.
+//   3. How each block dimension in (dn, ..., dn+k-1) maps to the original
+//      tensor dimension in (d0, ..., dn-1).
+//   4. Index metadata for each dimension. For a dense dimension, this is just
+//      the size of that dimension. For a sparse dimension, it's the same as
+//      the compressed index defined in the Compressed Sparse Row (CSR) format.
+//      (http://scipy-lectures.org/advanced/scipy_sparse/csr_matrix.html)
+
+// The storage type for a dimension. Currently we support:
+//   1. DENSE: each coordinate in this dimension is stored implicitly.
+//   2. SPARSE_CSR: only the coordinates with non-zero elements are stored. The
+//      compression technique is the same what CSR uses.
+// More types like a sparse dimension with a different compression technique
+// could be added to the list in the future.
+enum DimensionType : byte {
+  DENSE = 0,
+  SPARSE_CSR = 1,
+}
+
+table DimensionMetadata {
+  // Whether each dimension is dense or sparse.
+  format:DimensionType;
+  // Index metadata used for each dimension.
+  //   - If format is DimensionType.DENSE then we use the dense_size field to
+  //     store the size of that dimension. Each index in that dimension is
+  //     stored implicitly.
+  //   - If format is DimensionType.SPARSE_CSR then we use array_segments and
+  //     array_indices to encode that dimension. array_segments represents how
+  //     to segment the indices array, each segment corresponds to one element
+  //     in the previous dimension. array_indices represents the index of the
+  //     non-zero elements within this dimension (as those in the CSR matrix
+  //     format, where the first array is row pointers and the second array is
+  //     column indices).
+  dense_size:int;
+  array_segments:[int];
+  array_indices:[int];
+}
+
+// Parameters to encode a sparse TfLite tensor.
+table SparsityParameters {
+  // The traversal order of the dimensions defined in the `shape` field of the
+  // conceptual dense tensor. For a n-dimensional tensors with dims (d0, d1,
+  // ..., dn-1),
+  //   - if not block sparse, the traversal_order is just a permutation of (d0,
+  //     ..., dn-1). For example, a 2-D matrix stored in row-major order would
+  //     have traversal_order = (d0, d1).
+  //   - if block sparse with a k-dimensional block (0 <= k <= n), the
+  //     traversal_order has n + k elements. The first n elements are still a
+  //     permutation of (d0, ..., dn-1). The lask k elements are a permutation
+  //     of (dn, ..., dn+k-1), defining how to traverse a block internally. For
+  //     example, a 2-D matrix with 2-D blocks, both stored in row-major order
+  //     would have traversal_order = (d0, d1, d2, d3).
+  traversal_order:[int];
+  // For an n-dimensional tensor with a k-dimensional block (0 <= k <= n),
+  // stores how a block dimension in (dn, ..., dn+k-1) maps to the original
+  // tensor dimension in (d0, ..., dn).
+  // It's stored in the order of (dn, ..., dn+k-1).
+  // If not block-sparse, this field is NULL.
+  block_map:[int];
+  // In the order of (d0, ..., dn-1, dn, ..., dn+k-1), the metadata needed for
+  // each dimension to locate the non-zero values in the original dense tensor.
+  // The size of the dim_metadata array = the size of the traversal_order array
+  // = n + k.
+  dim_metadata:[DimensionMetadata];
+}
+
 table Tensor {
   // The tensor shape. The meaning of each entry is operator-specific but
   // builtin ops use: [batch size, height, width, number of channels] (That's
@@ -99,12 +175,17 @@ table Tensor {
   quantization:QuantizationParameters;  // Optional.
 
   is_variable:bool = false;
+
+  // Parameters to encode a sparse tensor. See the example in
+  // tensorflow/lite/testdata/sparse_tensor.json.
+  sparsity:SparsityParameters;  // Optional.
 }
 
 // A list of builtin operators. Builtin operators are slightly faster than custom
 // ones, but not by much. Moreover, while custom operators accept an opaque
 // object containing configuration parameters, builtins have a predetermined
 // set of acceptable options.
+
 enum BuiltinOperator : byte {
   ADD = 0,
   AVERAGE_POOL_2D = 1,
@@ -238,6 +319,7 @@ enum BuiltinOperator : byte {
   SCATTER_ND = 122
 }
 
+
 // Options for the builtin operators.
 union BuiltinOptions {
   Conv2DOptions,
diff --git a/tensorflow/lite/schema/schema_generated.h b/tensorflow/lite/schema/schema_generated.h
index ea2f1cc0b8b..ae523cc7d5a 100755
--- a/tensorflow/lite/schema/schema_generated.h
+++ b/tensorflow/lite/schema/schema_generated.h
@@ -28,6 +28,12 @@ struct CustomQuantizationT;
 struct QuantizationParameters;
 struct QuantizationParametersT;
 
+struct DimensionMetadata;
+struct DimensionMetadataT;
+
+struct SparsityParameters;
+struct SparsityParametersT;
+
 struct Tensor;
 struct TensorT;
 
@@ -477,6 +483,36 @@ struct QuantizationDetailsUnion {
 bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const void *obj, QuantizationDetails type);
 bool VerifyQuantizationDetailsVector(flatbuffers::Verifier &verifier, const flatbuffers::Vector<flatbuffers::Offset<void>> *values, const flatbuffers::Vector<uint8_t> *types);
 
+enum DimensionType {
+  DimensionType_DENSE = 0,
+  DimensionType_SPARSE_CSR = 1,
+  DimensionType_MIN = DimensionType_DENSE,
+  DimensionType_MAX = DimensionType_SPARSE_CSR
+};
+
+inline const DimensionType (&EnumValuesDimensionType())[2] {
+  static const DimensionType values[] = {
+    DimensionType_DENSE,
+    DimensionType_SPARSE_CSR
+  };
+  return values;
+}
+
+inline const char * const *EnumNamesDimensionType() {
+  static const char * const names[] = {
+    "DENSE",
+    "SPARSE_CSR",
+    nullptr
+  };
+  return names;
+}
+
+inline const char *EnumNameDimensionType(DimensionType e) {
+  if (e < DimensionType_DENSE || e > DimensionType_SPARSE_CSR) return "";
+  const size_t index = static_cast<size_t>(e);
+  return EnumNamesDimensionType()[index];
+}
+
 enum BuiltinOperator {
   BuiltinOperator_ADD = 0,
   BuiltinOperator_AVERAGE_POOL_2D = 1,
@@ -2867,6 +2903,206 @@ inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParametersD
 
 flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(flatbuffers::FlatBufferBuilder &_fbb, const QuantizationParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
+struct DimensionMetadataT : public flatbuffers::NativeTable {
+  typedef DimensionMetadata TableType;
+  DimensionType format;
+  int32_t dense_size;
+  std::vector<int32_t> array_segments;
+  std::vector<int32_t> array_indices;
+  DimensionMetadataT()
+      : format(DimensionType_DENSE),
+        dense_size(0) {
+  }
+};
+
+struct DimensionMetadata FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef DimensionMetadataT NativeTableType;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_FORMAT = 4,
+    VT_DENSE_SIZE = 6,
+    VT_ARRAY_SEGMENTS = 8,
+    VT_ARRAY_INDICES = 10
+  };
+  DimensionType format() const {
+    return static_cast<DimensionType>(GetField<int8_t>(VT_FORMAT, 0));
+  }
+  int32_t dense_size() const {
+    return GetField<int32_t>(VT_DENSE_SIZE, 0);
+  }
+  const flatbuffers::Vector<int32_t> *array_segments() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_ARRAY_SEGMENTS);
+  }
+  const flatbuffers::Vector<int32_t> *array_indices() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_ARRAY_INDICES);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyField<int8_t>(verifier, VT_FORMAT) &&
+           VerifyField<int32_t>(verifier, VT_DENSE_SIZE) &&
+           VerifyOffset(verifier, VT_ARRAY_SEGMENTS) &&
+           verifier.VerifyVector(array_segments()) &&
+           VerifyOffset(verifier, VT_ARRAY_INDICES) &&
+           verifier.VerifyVector(array_indices()) &&
+           verifier.EndTable();
+  }
+  DimensionMetadataT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(DimensionMetadataT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<DimensionMetadata> Pack(flatbuffers::FlatBufferBuilder &_fbb, const DimensionMetadataT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct DimensionMetadataBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_format(DimensionType format) {
+    fbb_.AddElement<int8_t>(DimensionMetadata::VT_FORMAT, static_cast<int8_t>(format), 0);
+  }
+  void add_dense_size(int32_t dense_size) {
+    fbb_.AddElement<int32_t>(DimensionMetadata::VT_DENSE_SIZE, dense_size, 0);
+  }
+  void add_array_segments(flatbuffers::Offset<flatbuffers::Vector<int32_t>> array_segments) {
+    fbb_.AddOffset(DimensionMetadata::VT_ARRAY_SEGMENTS, array_segments);
+  }
+  void add_array_indices(flatbuffers::Offset<flatbuffers::Vector<int32_t>> array_indices) {
+    fbb_.AddOffset(DimensionMetadata::VT_ARRAY_INDICES, array_indices);
+  }
+  explicit DimensionMetadataBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  DimensionMetadataBuilder &operator=(const DimensionMetadataBuilder &);
+  flatbuffers::Offset<DimensionMetadata> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<DimensionMetadata>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<DimensionMetadata> CreateDimensionMetadata(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    DimensionType format = DimensionType_DENSE,
+    int32_t dense_size = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> array_segments = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> array_indices = 0) {
+  DimensionMetadataBuilder builder_(_fbb);
+  builder_.add_array_indices(array_indices);
+  builder_.add_array_segments(array_segments);
+  builder_.add_dense_size(dense_size);
+  builder_.add_format(format);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<DimensionMetadata> CreateDimensionMetadataDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    DimensionType format = DimensionType_DENSE,
+    int32_t dense_size = 0,
+    const std::vector<int32_t> *array_segments = nullptr,
+    const std::vector<int32_t> *array_indices = nullptr) {
+  auto array_segments__ = array_segments ? _fbb.CreateVector<int32_t>(*array_segments) : 0;
+  auto array_indices__ = array_indices ? _fbb.CreateVector<int32_t>(*array_indices) : 0;
+  return tflite::CreateDimensionMetadata(
+      _fbb,
+      format,
+      dense_size,
+      array_segments__,
+      array_indices__);
+}
+
+flatbuffers::Offset<DimensionMetadata> CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb, const DimensionMetadataT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
+struct SparsityParametersT : public flatbuffers::NativeTable {
+  typedef SparsityParameters TableType;
+  std::vector<int32_t> traversal_order;
+  std::vector<int32_t> block_map;
+  std::vector<std::unique_ptr<DimensionMetadataT>> dim_metadata;
+  SparsityParametersT() {
+  }
+};
+
+struct SparsityParameters FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
+  typedef SparsityParametersT NativeTableType;
+  enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
+    VT_TRAVERSAL_ORDER = 4,
+    VT_BLOCK_MAP = 6,
+    VT_DIM_METADATA = 8
+  };
+  const flatbuffers::Vector<int32_t> *traversal_order() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_TRAVERSAL_ORDER);
+  }
+  const flatbuffers::Vector<int32_t> *block_map() const {
+    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_BLOCK_MAP);
+  }
+  const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata() const {
+    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>> *>(VT_DIM_METADATA);
+  }
+  bool Verify(flatbuffers::Verifier &verifier) const {
+    return VerifyTableStart(verifier) &&
+           VerifyOffset(verifier, VT_TRAVERSAL_ORDER) &&
+           verifier.VerifyVector(traversal_order()) &&
+           VerifyOffset(verifier, VT_BLOCK_MAP) &&
+           verifier.VerifyVector(block_map()) &&
+           VerifyOffset(verifier, VT_DIM_METADATA) &&
+           verifier.VerifyVector(dim_metadata()) &&
+           verifier.VerifyVectorOfTables(dim_metadata()) &&
+           verifier.EndTable();
+  }
+  SparsityParametersT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  void UnPackTo(SparsityParametersT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
+  static flatbuffers::Offset<SparsityParameters> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SparsityParametersT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+};
+
+struct SparsityParametersBuilder {
+  flatbuffers::FlatBufferBuilder &fbb_;
+  flatbuffers::uoffset_t start_;
+  void add_traversal_order(flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order) {
+    fbb_.AddOffset(SparsityParameters::VT_TRAVERSAL_ORDER, traversal_order);
+  }
+  void add_block_map(flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map) {
+    fbb_.AddOffset(SparsityParameters::VT_BLOCK_MAP, block_map);
+  }
+  void add_dim_metadata(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata) {
+    fbb_.AddOffset(SparsityParameters::VT_DIM_METADATA, dim_metadata);
+  }
+  explicit SparsityParametersBuilder(flatbuffers::FlatBufferBuilder &_fbb)
+        : fbb_(_fbb) {
+    start_ = fbb_.StartTable();
+  }
+  SparsityParametersBuilder &operator=(const SparsityParametersBuilder &);
+  flatbuffers::Offset<SparsityParameters> Finish() {
+    const auto end = fbb_.EndTable(start_);
+    auto o = flatbuffers::Offset<SparsityParameters>(end);
+    return o;
+  }
+};
+
+inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> traversal_order = 0,
+    flatbuffers::Offset<flatbuffers::Vector<int32_t>> block_map = 0,
+    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DimensionMetadata>>> dim_metadata = 0) {
+  SparsityParametersBuilder builder_(_fbb);
+  builder_.add_dim_metadata(dim_metadata);
+  builder_.add_block_map(block_map);
+  builder_.add_traversal_order(traversal_order);
+  return builder_.Finish();
+}
+
+inline flatbuffers::Offset<SparsityParameters> CreateSparsityParametersDirect(
+    flatbuffers::FlatBufferBuilder &_fbb,
+    const std::vector<int32_t> *traversal_order = nullptr,
+    const std::vector<int32_t> *block_map = nullptr,
+    const std::vector<flatbuffers::Offset<DimensionMetadata>> *dim_metadata = nullptr) {
+  auto traversal_order__ = traversal_order ? _fbb.CreateVector<int32_t>(*traversal_order) : 0;
+  auto block_map__ = block_map ? _fbb.CreateVector<int32_t>(*block_map) : 0;
+  auto dim_metadata__ = dim_metadata ? _fbb.CreateVector<flatbuffers::Offset<DimensionMetadata>>(*dim_metadata) : 0;
+  return tflite::CreateSparsityParameters(
+      _fbb,
+      traversal_order__,
+      block_map__,
+      dim_metadata__);
+}
+
+flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(flatbuffers::FlatBufferBuilder &_fbb, const SparsityParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
+
 struct TensorT : public flatbuffers::NativeTable {
   typedef Tensor TableType;
   std::vector<int32_t> shape;
@@ -2875,6 +3111,7 @@ struct TensorT : public flatbuffers::NativeTable {
   std::string name;
   std::unique_ptr<QuantizationParametersT> quantization;
   bool is_variable;
+  std::unique_ptr<SparsityParametersT> sparsity;
   TensorT()
       : type(TensorType_FLOAT32),
         buffer(0),
@@ -2890,7 +3127,8 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
     VT_BUFFER = 8,
     VT_NAME = 10,
     VT_QUANTIZATION = 12,
-    VT_IS_VARIABLE = 14
+    VT_IS_VARIABLE = 14,
+    VT_SPARSITY = 16
   };
   const flatbuffers::Vector<int32_t> *shape() const {
     return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_SHAPE);
@@ -2910,6 +3148,9 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   bool is_variable() const {
     return GetField<uint8_t>(VT_IS_VARIABLE, 0) != 0;
   }
+  const SparsityParameters *sparsity() const {
+    return GetPointer<const SparsityParameters *>(VT_SPARSITY);
+  }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyOffset(verifier, VT_SHAPE) &&
@@ -2921,6 +3162,8 @@ struct Tensor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
            VerifyOffset(verifier, VT_QUANTIZATION) &&
            verifier.VerifyTable(quantization()) &&
            VerifyField<uint8_t>(verifier, VT_IS_VARIABLE) &&
+           VerifyOffset(verifier, VT_SPARSITY) &&
+           verifier.VerifyTable(sparsity()) &&
            verifier.EndTable();
   }
   TensorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
@@ -2949,6 +3192,9 @@ struct TensorBuilder {
   void add_is_variable(bool is_variable) {
     fbb_.AddElement<uint8_t>(Tensor::VT_IS_VARIABLE, static_cast<uint8_t>(is_variable), 0);
   }
+  void add_sparsity(flatbuffers::Offset<SparsityParameters> sparsity) {
+    fbb_.AddOffset(Tensor::VT_SPARSITY, sparsity);
+  }
   explicit TensorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
         : fbb_(_fbb) {
     start_ = fbb_.StartTable();
@@ -2968,8 +3214,10 @@ inline flatbuffers::Offset<Tensor> CreateTensor(
     uint32_t buffer = 0,
     flatbuffers::Offset<flatbuffers::String> name = 0,
     flatbuffers::Offset<QuantizationParameters> quantization = 0,
-    bool is_variable = false) {
+    bool is_variable = false,
+    flatbuffers::Offset<SparsityParameters> sparsity = 0) {
   TensorBuilder builder_(_fbb);
+  builder_.add_sparsity(sparsity);
   builder_.add_quantization(quantization);
   builder_.add_name(name);
   builder_.add_buffer(buffer);
@@ -2986,7 +3234,8 @@ inline flatbuffers::Offset<Tensor> CreateTensorDirect(
     uint32_t buffer = 0,
     const char *name = nullptr,
     flatbuffers::Offset<QuantizationParameters> quantization = 0,
-    bool is_variable = false) {
+    bool is_variable = false,
+    flatbuffers::Offset<SparsityParameters> sparsity = 0) {
   auto shape__ = shape ? _fbb.CreateVector<int32_t>(*shape) : 0;
   auto name__ = name ? _fbb.CreateString(name) : 0;
   return tflite::CreateTensor(
@@ -2996,7 +3245,8 @@ inline flatbuffers::Offset<Tensor> CreateTensorDirect(
       buffer,
       name__,
       quantization,
-      is_variable);
+      is_variable,
+      sparsity);
 }
 
 flatbuffers::Offset<Tensor> CreateTensor(flatbuffers::FlatBufferBuilder &_fbb, const TensorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
@@ -9726,6 +9976,73 @@ inline flatbuffers::Offset<QuantizationParameters> CreateQuantizationParameters(
       _quantized_dimension);
 }
 
+inline DimensionMetadataT *DimensionMetadata::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new DimensionMetadataT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void DimensionMetadata::UnPackTo(DimensionMetadataT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = format(); _o->format = _e; };
+  { auto _e = dense_size(); _o->dense_size = _e; };
+  { auto _e = array_segments(); if (_e) { _o->array_segments.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->array_segments[_i] = _e->Get(_i); } } };
+  { auto _e = array_indices(); if (_e) { _o->array_indices.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->array_indices[_i] = _e->Get(_i); } } };
+}
+
+inline flatbuffers::Offset<DimensionMetadata> DimensionMetadata::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DimensionMetadataT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateDimensionMetadata(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<DimensionMetadata> CreateDimensionMetadata(flatbuffers::FlatBufferBuilder &_fbb, const DimensionMetadataT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DimensionMetadataT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _format = _o->format;
+  auto _dense_size = _o->dense_size;
+  auto _array_segments = _o->array_segments.size() ? _fbb.CreateVector(_o->array_segments) : 0;
+  auto _array_indices = _o->array_indices.size() ? _fbb.CreateVector(_o->array_indices) : 0;
+  return tflite::CreateDimensionMetadata(
+      _fbb,
+      _format,
+      _dense_size,
+      _array_segments,
+      _array_indices);
+}
+
+inline SparsityParametersT *SparsityParameters::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
+  auto _o = new SparsityParametersT();
+  UnPackTo(_o, _resolver);
+  return _o;
+}
+
+inline void SparsityParameters::UnPackTo(SparsityParametersT *_o, const flatbuffers::resolver_function_t *_resolver) const {
+  (void)_o;
+  (void)_resolver;
+  { auto _e = traversal_order(); if (_e) { _o->traversal_order.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->traversal_order[_i] = _e->Get(_i); } } };
+  { auto _e = block_map(); if (_e) { _o->block_map.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->block_map[_i] = _e->Get(_i); } } };
+  { auto _e = dim_metadata(); if (_e) { _o->dim_metadata.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->dim_metadata[_i] = std::unique_ptr<DimensionMetadataT>(_e->Get(_i)->UnPack(_resolver)); } } };
+}
+
+inline flatbuffers::Offset<SparsityParameters> SparsityParameters::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SparsityParametersT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
+  return CreateSparsityParameters(_fbb, _o, _rehasher);
+}
+
+inline flatbuffers::Offset<SparsityParameters> CreateSparsityParameters(flatbuffers::FlatBufferBuilder &_fbb, const SparsityParametersT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
+  (void)_rehasher;
+  (void)_o;
+  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SparsityParametersT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
+  auto _traversal_order = _o->traversal_order.size() ? _fbb.CreateVector(_o->traversal_order) : 0;
+  auto _block_map = _o->block_map.size() ? _fbb.CreateVector(_o->block_map) : 0;
+  auto _dim_metadata = _o->dim_metadata.size() ? _fbb.CreateVector<flatbuffers::Offset<DimensionMetadata>> (_o->dim_metadata.size(), [](size_t i, _VectorArgs *__va) { return CreateDimensionMetadata(*__va->__fbb, __va->__o->dim_metadata[i].get(), __va->__rehasher); }, &_va ) : 0;
+  return tflite::CreateSparsityParameters(
+      _fbb,
+      _traversal_order,
+      _block_map,
+      _dim_metadata);
+}
+
 inline TensorT *Tensor::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new TensorT();
   UnPackTo(_o, _resolver);
@@ -9741,6 +10058,7 @@ inline void Tensor::UnPackTo(TensorT *_o, const flatbuffers::resolver_function_t
   { auto _e = name(); if (_e) _o->name = _e->str(); };
   { auto _e = quantization(); if (_e) _o->quantization = std::unique_ptr<QuantizationParametersT>(_e->UnPack(_resolver)); };
   { auto _e = is_variable(); _o->is_variable = _e; };
+  { auto _e = sparsity(); if (_e) _o->sparsity = std::unique_ptr<SparsityParametersT>(_e->UnPack(_resolver)); };
 }
 
 inline flatbuffers::Offset<Tensor> Tensor::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TensorT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
@@ -9757,6 +10075,7 @@ inline flatbuffers::Offset<Tensor> CreateTensor(flatbuffers::FlatBufferBuilder &
   auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name);
   auto _quantization = _o->quantization ? CreateQuantizationParameters(_fbb, _o->quantization.get(), _rehasher) : 0;
   auto _is_variable = _o->is_variable;
+  auto _sparsity = _o->sparsity ? CreateSparsityParameters(_fbb, _o->sparsity.get(), _rehasher) : 0;
   return tflite::CreateTensor(
       _fbb,
       _shape,
@@ -9764,7 +10083,8 @@ inline flatbuffers::Offset<Tensor> CreateTensor(flatbuffers::FlatBufferBuilder &
       _buffer,
       _name,
       _quantization,
-      _is_variable);
+      _is_variable,
+      _sparsity);
 }
 
 inline Conv2DOptionsT *Conv2DOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
@@ -12544,7 +12864,7 @@ inline bool VerifyQuantizationDetails(flatbuffers::Verifier &verifier, const voi
       auto ptr = reinterpret_cast<const CustomQuantization *>(obj);
       return verifier.VerifyTable(ptr);
     }
-    default: return false;
+    default: return true;
   }
 }
 
@@ -12997,7 +13317,7 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob
       auto ptr = reinterpret_cast<const ScatterNdOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
-    default: return false;
+    default: return true;
   }
 }
 
diff --git a/tensorflow/lite/testdata/sparse_tensor.bin b/tensorflow/lite/testdata/sparse_tensor.bin
new file mode 100644
index 00000000000..d1445ac6480
Binary files /dev/null and b/tensorflow/lite/testdata/sparse_tensor.bin differ
diff --git a/tensorflow/lite/testdata/sparse_tensor.json b/tensorflow/lite/testdata/sparse_tensor.json
new file mode 100644
index 00000000000..3e743c68c11
--- /dev/null
+++ b/tensorflow/lite/testdata/sparse_tensor.json
@@ -0,0 +1,63 @@
+{
+  "version": 3,
+  "operator_codes": [
+  ],
+  "subgraphs": [
+    {
+      "tensors": [
+        {
+          "shape": [
+            4,
+            4
+          ],
+          "name": "sparse_tensor",
+          "buffer": 1,
+          "type": "FLOAT32",
+          "quantization": {
+          },
+          "is_variable": "false",
+          "sparsity": {
+            "traversal_order": [0, 1, 2, 3],
+            "block_map": [0, 1],
+            "dim_metadata": [
+              {
+                "format": "DENSE",
+                "dense_size": 2
+              },
+              {
+                "format": "SPARSE_CSR",
+                "array_segments": [0, 2, 3],
+                "array_indices": [0, 1, 1]
+              },
+              {
+                "format": "DENSE",
+                "dense_size": 2
+              },
+              {
+                "format": "DENSE",
+                "dense_size": 2
+              }
+            ]
+          }
+        }
+      ],
+      "inputs": [
+      ],
+      "outputs": [
+      ],
+      "operators": [
+      ]
+    }
+  ],
+  "buffers": [
+    {
+    },
+    {
+      "data": [
+        1, 0, 0, 4,
+        2, 3, 0, 0,
+        5, 0, 0, 6
+      ]
+    }
+  ]
+}
diff --git a/tensorflow/lite/testing/BUILD b/tensorflow/lite/testing/BUILD
index 25da7cedf01..c10c015c0cb 100644
--- a/tensorflow/lite/testing/BUILD
+++ b/tensorflow/lite/testing/BUILD
@@ -6,6 +6,7 @@ load(
     "merged_test_models",
 )
 load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite")
+load("//tensorflow/lite/testing:tflite_model_test.bzl", "tflite_model_test")
 load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc")
 load(
     "//tensorflow:tensorflow.bzl",
@@ -21,7 +22,10 @@ package(
     licenses = ["notice"],  # Apache 2.0
 )
 
-exports_files(["generated_examples_zip_test.cc"])
+exports_files([
+    "generated_examples_zip_test.cc",
+    "tflite_diff_example_test.cc",
+])
 
 [gen_zip_test(
     name = "zip_test_%s" % test_name,
@@ -309,13 +313,22 @@ cc_library(
         ":join",
         ":split",
         ":test_runner",
-        "//tensorflow/core:core_cpu",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core:tensorflow",
-        "//tensorflow/lite:string_util",
         "@com_google_absl//absl/strings",
-    ],
+        "//tensorflow/lite:string_util",
+    ] + select({
+        "//conditions:default": [
+            "//tensorflow/core:core_cpu",
+            "//tensorflow/core:framework",
+            "//tensorflow/core:lib",
+            "//tensorflow/core:tensorflow",
+        ],
+        "//tensorflow:android": [
+            "//tensorflow/core:android_tensorflow_lib",
+        ],
+        "//tensorflow:ios": [
+            "//tensorflow/core:ios_tensorflow_lib",
+        ],
+    }),
 )
 
 tf_cc_test(
@@ -342,9 +355,18 @@ cc_library(
         ":join",
         ":split",
         ":tf_driver",
-        "//tensorflow/core:framework",
         "//tensorflow/lite:string",
-    ],
+    ] + select({
+        "//conditions:default": [
+            "//tensorflow/core:framework",
+        ],
+        "//tensorflow:android": [
+            "//tensorflow/core:android_tensorflow_lib",
+        ],
+        "//tensorflow:ios": [
+            "//tensorflow/core:ios_tensorflow_lib",
+        ],
+    }),
 )
 
 tf_cc_test(
@@ -404,6 +426,7 @@ cc_library(
         ":split",
         ":tflite_diff_util",
         ":tflite_driver",
+        "@com_google_absl//absl/strings",
     ] + select({
         "//conditions:default": [
             "//tensorflow/core:framework_internal",
@@ -453,6 +476,20 @@ tf_cc_binary(
     ],
 )
 
+tflite_model_test(
+    name = "tflite_model_example_test",
+    input_layer = "a,b,c,d",
+    input_layer_shape = "1,8,8,3:1,8,8,3:1,8,8,3:1,8,8,3",
+    input_layer_type = "float,float,float,float",
+    output_layer = "x,y",
+    tags = [
+        "no_cuda_on_cpu_tap",
+        "no_oss",  # needs test data
+        "tflite_not_portable",  # TODO(b/134772701): Enable after making this a proper GTest.
+    ],
+    tensorflow_model_file = "//tensorflow/lite:testdata/multi_add.pb",
+)
+
 cc_library(
     name = "string_util_lib",
     srcs = ["string_util.cc"],
@@ -513,6 +550,7 @@ edgetpu_ops = [
     "sub",
     "sum",  # high error
     "tanh",
+    "transpose",
     "transpose_conv",
 ]
 
diff --git a/tensorflow/lite/testing/generate_testspec.cc b/tensorflow/lite/testing/generate_testspec.cc
index 74e4d254983..99021c9f317 100644
--- a/tensorflow/lite/testing/generate_testspec.cc
+++ b/tensorflow/lite/testing/generate_testspec.cc
@@ -13,34 +13,31 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include <iostream>
-
 #include "tensorflow/lite/testing/generate_testspec.h"
+
+#include <iostream>
+#include <random>
+
+#include "tensorflow/core/framework/types.h"
 #include "tensorflow/lite/testing/join.h"
 #include "tensorflow/lite/testing/split.h"
 #include "tensorflow/lite/testing/tf_driver.h"
-#include "tensorflow/core/framework/types.h"
 
 namespace tflite {
 namespace testing {
+namespace {
 
-template <typename T>
-void GenerateCsv(const std::vector<int>& shape, float min, float max,
-                 string* out) {
-  auto random_float = [](float min, float max) {
-    static unsigned int seed;
-    return min + (max - min) * static_cast<float>(rand_r(&seed)) / RAND_MAX;
-  };
-
-  std::function<T(int)> random_t = [&](int) {
-    return static_cast<T>(random_float(min, max));
-  };
-  std::vector<T> data = GenerateRandomTensor(shape, random_t);
+template <typename T, typename RandomEngine, typename RandomDistribution>
+void GenerateCsv(const std::vector<int>& shape, RandomEngine* engine,
+                 RandomDistribution distribution, string* out) {
+  std::vector<T> data =
+      GenerateRandomTensor<T>(shape, [&]() { return distribution(*engine); });
   *out = Join(data.data(), data.size(), ",");
 }
 
+template <typename RandomEngine>
 std::vector<string> GenerateInputValues(
-    const std::vector<string>& input_layer,
+    RandomEngine* engine, const std::vector<string>& input_layer,
     const std::vector<string>& input_layer_type,
     const std::vector<string>& input_layer_shape) {
   std::vector<string> input_values;
@@ -52,19 +49,29 @@ std::vector<string> GenerateInputValues(
 
     switch (type) {
       case tensorflow::DT_FLOAT:
-        GenerateCsv<float>(shape, -0.5, 0.5, &input_values[i]);
+        GenerateCsv<float>(shape, engine,
+                           std::uniform_real_distribution<float>(-0.5, 0.5),
+                           &input_values[i]);
         break;
       case tensorflow::DT_UINT8:
-        GenerateCsv<uint8_t>(shape, 0, 255, &input_values[i]);
+        GenerateCsv<uint8_t>(shape, engine,
+                             std::uniform_int_distribution<uint8_t>(0, 255),
+                             &input_values[i]);
         break;
       case tensorflow::DT_INT32:
-        GenerateCsv<int32_t>(shape, -100, 100, &input_values[i]);
+        GenerateCsv<int32_t>(shape, engine,
+                             std::uniform_int_distribution<int32_t>(-100, 100),
+                             &input_values[i]);
         break;
       case tensorflow::DT_INT64:
-        GenerateCsv<int64_t>(shape, -100, 100, &input_values[i]);
+        GenerateCsv<int64_t>(shape, engine,
+                             std::uniform_int_distribution<int64_t>(-100, 100),
+                             &input_values[i]);
         break;
       case tensorflow::DT_BOOL:
-        GenerateCsv<int>(shape, 0.01, 1.99, &input_values[i]);
+        GenerateCsv<int>(shape, engine,
+                         std::uniform_int_distribution<int>(0, 1),
+                         &input_values[i]);
         break;
       default:
         fprintf(stderr, "Unsupported type %d (%s) when generating testspec.\n",
@@ -76,6 +83,8 @@ std::vector<string> GenerateInputValues(
   return input_values;
 }
 
+}  // namespace
+
 bool GenerateTestSpecFromTensorflowModel(
     std::iostream& stream, const string& tensorflow_model_path,
     const string& tflite_model_path, int num_invocations,
@@ -109,11 +118,12 @@ bool GenerateTestSpecFromTensorflowModel(
   stream << "}\n";
 
   // Generate inputs.
+  std::mt19937 random_engine;
   for (int i = 0; i < num_invocations; ++i) {
     // Note that the input values are random, so each invocation will have a
     // different set.
-    std::vector<string> input_values =
-        GenerateInputValues(input_layer, input_layer_type, input_layer_shape);
+    std::vector<string> input_values = GenerateInputValues(
+        &random_engine, input_layer, input_layer_type, input_layer_shape);
     if (input_values.empty()) {
       std::cerr << "Unable to generate input values for the TensorFlow model. "
                    "Make sure the correct values are defined for "
diff --git a/tensorflow/lite/testing/generate_testspec.h b/tensorflow/lite/testing/generate_testspec.h
index fe7e6ddb3fb..58f8065972b 100644
--- a/tensorflow/lite/testing/generate_testspec.h
+++ b/tensorflow/lite/testing/generate_testspec.h
@@ -15,6 +15,7 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_TESTING_GENERATE_TESTSPEC_H_
 #define TENSORFLOW_LITE_TESTING_GENERATE_TESTSPEC_H_
 
+#include <algorithm>
 #include <functional>
 #include <iostream>
 #include <vector>
@@ -46,19 +47,16 @@ bool GenerateTestSpecFromTensorflowModel(
     const std::vector<string>& output_layer);
 
 // Generates random values that are filled into the tensor.
-// random_func returns the generated random element at given index.
-template <typename T>
+template <typename T, typename RandomFunction>
 std::vector<T> GenerateRandomTensor(const std::vector<int>& shape,
-                                    const std::function<T(int)>& random_func) {
+                                    RandomFunction random_func) {
   int64_t num_elements = 1;
   for (const int dim : shape) {
     num_elements *= dim;
   }
 
   std::vector<T> result(num_elements);
-  for (int i = 0; i < num_elements; i++) {
-    result[i] = random_func(i);
-  }
+  std::generate_n(result.data(), num_elements, random_func);
   return result;
 }
 
diff --git a/tensorflow/lite/testing/generate_testspec_test.cc b/tensorflow/lite/testing/generate_testspec_test.cc
index 4450da289d2..1887c8f0cd0 100644
--- a/tensorflow/lite/testing/generate_testspec_test.cc
+++ b/tensorflow/lite/testing/generate_testspec_test.cc
@@ -14,6 +14,8 @@ limitations under the License.
 ==============================================================================*/
 #include "tensorflow/lite/testing/generate_testspec.h"
 
+#include <random>
+
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
@@ -22,16 +24,16 @@ namespace testing {
 namespace {
 
 TEST(GenerateRandomTensor, FloatValue) {
-  static unsigned int seed = 0;
-  std::function<float(int)> float_rand = [](int idx) {
-    return static_cast<float>(rand_r(&seed)) / RAND_MAX - 0.5f;
+  std::mt19937 random_engine;
+  auto random_func = [&]() {
+    return std::uniform_real_distribution<float>(-0.5, 0.5)(random_engine);
   };
 
   std::set<float> values;
   float sum_x_square = 0.0f;
   float sum_x = 0.0f;
   for (int i = 0; i < 100; i++) {
-    const auto& data = GenerateRandomTensor<float>({1, 3, 4}, float_rand);
+    const auto& data = GenerateRandomTensor<float>({1, 3, 4}, random_func);
     for (float value : data) {
       values.insert(value);
       sum_x_square += value * value;
diff --git a/tensorflow/lite/testing/op_tests/reduce.py b/tensorflow/lite/testing/op_tests/reduce.py
index ae4513ef952..1437bdffbfe 100644
--- a/tensorflow/lite/testing/op_tests/reduce.py
+++ b/tensorflow/lite/testing/op_tests/reduce.py
@@ -105,7 +105,7 @@ def make_reduce_tests(reduce_op,
             "input_shape": [[1, 8, 8, 4], [1, 8, 8, 3]],
             "axis": [
                 0, 1, 2, 3, [0], [1], [2], [3], [-1], [-2], [-3], [1, 2],
-                [0, 3], [1, 2, 3]
+                [0, 3], [1, 2, 3], [1, 3], [2, 3]
             ],
             "const_axis": [True],
             "keepdims": [True, False],
diff --git a/tensorflow/lite/testing/op_tests/transpose.py b/tensorflow/lite/testing/op_tests/transpose.py
index ba32783e3ac..9b7e026269f 100644
--- a/tensorflow/lite/testing/op_tests/transpose.py
+++ b/tensorflow/lite/testing/op_tests/transpose.py
@@ -34,16 +34,31 @@ def make_transpose_tests(options):
       "input_shape": [[2, 2, 3]],
       "perm": [[0, 1, 2], [0, 2, 1]],
       "constant_perm": [True, False],
+      "fully_quantize": [False],
   }, {
       "dtype": [tf.float32],
       "input_shape": [[1, 2, 3, 4]],
       "perm": [[0, 1, 2, 3], [3, 0, 1, 2]],
       "constant_perm": [True, False],
+      "fully_quantize": [False],
   }, {
       "dtype": [tf.float32],
       "input_shape": [[1, 2, 3, 4, 5]],
       "perm": [[4, 3, 2, 1, 0]],
       "constant_perm": [True, False],
+      "fully_quantize": [False],
+  }, {
+      "dtype": [tf.float32],
+      "input_shape": [[2, 2, 3]],
+      "perm": [[0, 1, 2], [0, 2, 1]],
+      "constant_perm": [True],
+      "fully_quantize": [True],
+  }, {
+      "dtype": [tf.float32],
+      "input_shape": [[1, 2, 3, 4]],
+      "perm": [[0, 1, 2, 3], [3, 0, 1, 2]],
+      "constant_perm": [True],
+      "fully_quantize": [True],
   }]
 
   def build_graph(parameters):
@@ -66,7 +81,8 @@ def make_transpose_tests(options):
 
   def build_inputs(parameters, sess, inputs, outputs):
     values = [
-        create_tensor_data(parameters["dtype"], parameters["input_shape"])
+        create_tensor_data(parameters["dtype"], parameters["input_shape"],
+                           min_value=-1, max_value=1)
     ]
     if not parameters["constant_perm"]:
       values.append(np.array(parameters["perm"]))
diff --git a/tensorflow/lite/testing/tflite_diff_flags.h b/tensorflow/lite/testing/tflite_diff_flags.h
index 8b1205e58d7..7022cb03ad1 100644
--- a/tensorflow/lite/testing/tflite_diff_flags.h
+++ b/tensorflow/lite/testing/tflite_diff_flags.h
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <cstring>
 
+#include "absl/strings/match.h"
 #include "tensorflow/core/util/command_line_flags.h"
 #include "tensorflow/lite/testing/split.h"
 #include "tensorflow/lite/testing/tflite_diff_util.h"
@@ -76,11 +77,11 @@ DiffOptions ParseTfliteDiffFlags(int* argc, char** argv) {
 
   TfLiteDriver::DelegateType delegate = TfLiteDriver::DelegateType::kNone;
   if (!values.delegate_name.empty()) {
-    if (delegate_name == "NNAPI") {
+    if (absl::EqualsIgnoreCase(values.delegate_name, "nnapi")) {
       delegate = TfLiteDriver::DelegateType::kNnapi;
-    } else if (values.delegate_name == "GPU") {
+    } else if (absl::EqualsIgnoreCase(values.delegate_name, "gpu")) {
       delegate = TfLiteDriver::DelegateType::kGpu;
-    } else if (values.delegate_name == "FLEX") {
+    } else if (absl::EqualsIgnoreCase(values.delegate_name, "flex")) {
       delegate = TfLiteDriver::DelegateType::kFlex;
     } else {
       fprintf(stderr, "%s", tensorflow::Flags::Usage(argv[0], flags).c_str());
diff --git a/tensorflow/lite/testing/tflite_driver.cc b/tensorflow/lite/testing/tflite_driver.cc
index 47293016ab6..3d988eb624a 100644
--- a/tensorflow/lite/testing/tflite_driver.cc
+++ b/tensorflow/lite/testing/tflite_driver.cc
@@ -256,11 +256,11 @@ bool TfLiteDriver::DataExpectation::QuantizedCheck(bool verbose,
   auto* quantization =
       reinterpret_cast<TfLiteAffineQuantization*>(tensor.quantization.params);
   const float scale = quantization->scale->data[0];
-  const int32 zero_point = quantization->zero_point->data[0];
+  const int32_t zero_point = quantization->zero_point->data[0];
 
   bool good_result = true;
   for (int i = 0; i < tensor.bytes; i++) {
-    const int32 computed = tensor.data.int8[i];
+    const int32_t computed = tensor.data.int8[i];
     const float dequantized =
         static_cast<float>(scale * (computed - zero_point));
     const float reference = Value<float>(data_.get(), i);
diff --git a/tensorflow/lite/testing/tflite_model_test.bzl b/tensorflow/lite/testing/tflite_model_test.bzl
new file mode 100644
index 00000000000..a68d5e649b1
--- /dev/null
+++ b/tensorflow/lite/testing/tflite_model_test.bzl
@@ -0,0 +1,152 @@
+"""Definition for tflite_model_test rule that runs a TF Lite model accuracy test.
+
+This rule generates targets to run a diff-based model accuracy test against
+synthetic, random inputs. Future work will allow injection of "golden" inputs,
+as well as more robust execution on mobile devices.
+
+Example usage:
+
+tflite_model_test(
+    name = "simple_diff_test",
+    tensorflow_model_file = "//tensorflow/lite:testdata/multi_add.pb",
+    input_layer = "a,b,c,d",
+    input_layer_shape = "1,8,8,3:1,8,8,3:1,8,8,3:1,8,8,3",
+    input_layer_type = "float,float,float,float",
+    output_layer = "x,y",
+)
+"""
+
+load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+
+def tflite_model_test(
+        name,
+        tensorflow_model_file,
+        input_layer,
+        input_layer_type,
+        input_layer_shape,
+        output_layer,
+        inference_type = "float",
+        extra_conversion_flags = [],
+        num_runs = 20,
+        tags = [],
+        size = "large"):
+    """Create test targets for validating TFLite model execution relative to TF.
+
+    Args:
+      name: Generated test target name. Note that multiple targets may be
+          created if `delegates` are provided.
+      tensorflow_model_file: The binary GraphDef proto to run the benchmark on.
+      input_layer: A list of input tensors to use in the test.
+      input_layer_shape: The shape of the input layer in csv format.
+      input_layer_type: The data type of the input layer(s) (int, float, etc).
+      output_layer: The layer that output should be read from.
+      inference_type: The data type for inference and output.
+      extra_conversion_flags: Extra flags to append to those used for converting
+          models to the tflite format.
+      num_runs: Number of synthetic test cases to run.
+      tags: Extra tags to apply to the test targets.
+      size: The test size to use.
+    """
+
+    conversion_flags = [
+        "--input_shapes=%s" % input_layer_shape,
+        "--input_arrays=%s" % input_layer,
+        "--output_arrays=%s" % output_layer,
+    ] + extra_conversion_flags
+
+    tflite_model_file = make_tflite_files(
+        target_name = "tflite_" + name + "_model",
+        model_file = tensorflow_model_file,
+        conversion_flags = conversion_flags,
+        inference_type = inference_type,
+    )
+
+    diff_args = [
+        # TODO(b/134772701): Find a better way to extract the absolute path from
+        # a target without relying on $(location), which doesn't work with some
+        # mobile test variants. For now we use $(location), but something like
+        # the following is what we want for mobile tests:
+        # "--tensorflow_model=%s" % tensorflow_model_file.replace("//", "").replace(":", "/"),
+        # "--tflite_model=%s" % tflite_model_file.replace("//", "").replace(":", "/"),
+        "--tensorflow_model=$(location %s)" % tensorflow_model_file,
+        "--tflite_model=$(location %s)" % tflite_model_file,
+        "--input_layer=%s" % input_layer,
+        "--input_layer_type=%s" % input_layer_type,
+        "--input_layer_shape=%s" % input_layer_shape,
+        "--output_layer=%s" % output_layer,
+        "--num_runs_per_pass=%s" % num_runs,
+    ]
+
+    tf_cc_test(
+        name = name,
+        size = size,
+        srcs = ["//tensorflow/lite/testing:tflite_diff_example_test.cc"],
+        args = diff_args,
+        data = [
+            tensorflow_model_file,
+            tflite_model_file,
+        ],
+        tags = tags,
+        deps = [
+            "//tensorflow/lite/testing:init_tensorflow",
+            "//tensorflow/lite/testing:tflite_diff_flags",
+            "//tensorflow/lite/testing:tflite_diff_util",
+        ],
+    )
+
+def make_tflite_files(
+        target_name,
+        model_file,
+        conversion_flags,
+        inference_type):
+    """Uses TFLite to convert and input proto to tflite flatbuffer format.
+
+    Args:
+      target_name: Generated target name.
+      model_file: the path to the input file.
+      conversion_flags: parameters to pass to tflite for conversion.
+      inference_type: The data type for inference and output.
+    Returns:
+      The name of the generated file.
+    """
+    flags = [] + conversion_flags
+    if inference_type == "float":
+        flags += [
+            "--inference_type=FLOAT",
+            "--inference_input_type=FLOAT",
+        ]
+    elif inference_type == "quantized":
+        flags += [
+            "--inference_type=QUANTIZED_UINT8",
+            "--inference_input_type=QUANTIZED_UINT8",
+        ]
+    else:
+        fail("Invalid inference type (%s). Expected 'float' or 'quantized'" % inference_type)
+
+    srcs = [model_file]
+
+    # Convert from Tensorflow graphdef to tflite model.
+    output_file = target_name + ".fb"
+
+    tool = "//tensorflow/lite/python:tflite_convert"
+    cmd = ("$(location %s) " +
+           " --graph_def_file=$(location %s)" +
+           " --output_file=$(location %s)" +
+           " --input_format=TENSORFLOW_GRAPHDEF" +
+           " --output_format=TFLITE " +
+           " ".join(flags)
+               .replace("std_value", "std_dev_value")
+               .replace("quantize_weights=true", "quantize_weights"))
+
+    native.genrule(
+        name = target_name,
+        srcs = srcs,
+        tags = ["manual"],
+        outs = [
+            output_file,
+        ],
+        cmd = cmd % (tool, model_file, output_file),
+        tools = [tool],
+        visibility = ["//tensorflow/lite/testing:__subpackages__"],
+    )
+    return "//%s:%s" % (native.package_name(), output_file)
diff --git a/tensorflow/lite/tools/benchmark/BUILD b/tensorflow/lite/tools/benchmark/BUILD
index 4514ca8a2d3..97d021c6326 100644
--- a/tensorflow/lite/tools/benchmark/BUILD
+++ b/tensorflow/lite/tools/benchmark/BUILD
@@ -9,6 +9,8 @@ package(
     licenses = ["notice"],  # Apache 2.0
 )
 
+exports_files(["logging.h"])
+
 common_copts = ["-Wall"] + tflite_copts()
 
 cc_library(
diff --git a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
index dc4a43ee6cb..2edbbd06ec4 100644
--- a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
+++ b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
@@ -81,10 +81,16 @@ class ProfilingListener : public BenchmarkListener {
       : interpreter_(interpreter), profiler_(max_num_entries) {
     TFLITE_BENCHMARK_CHECK(interpreter);
     interpreter_->SetProfiler(&profiler_);
+
+    // We start profiling here in order to catch events that are recorded during
+    // the benchmark run preparation stage where TFLite interpreter is
+    // initialized and model graph is prepared.
     profiler_.Reset();
     profiler_.StartProfiling();
   }
 
+  void OnBenchmarkStart(const BenchmarkParams& params) override;
+
   void OnSingleRunStart(RunType run_type) override;
 
   void OnSingleRunEnd() override;
@@ -94,7 +100,8 @@ class ProfilingListener : public BenchmarkListener {
  private:
   Interpreter* interpreter_;
   profiling::BufferedProfiler profiler_;
-  profiling::ProfileSummarizer summarizer_;
+  profiling::ProfileSummarizer run_summarizer_;
+  profiling::ProfileSummarizer init_summarizer_;
 };
 
 // Dumps gemmlowp profiling events if gemmlowp profiling is enabled.
@@ -105,29 +112,39 @@ class GemmlowpProfilingListener : public BenchmarkListener {
   void OnBenchmarkEnd(const BenchmarkResults& results) override;
 };
 
+void ProfilingListener::OnBenchmarkStart(const BenchmarkParams& params) {
+  // At this point, we have completed the prepration for benchmark runs
+  // including TFLite interpreter initialization etc. So we are going to process
+  // profiling events recorded during this stage.
+  profiler_.StopProfiling();
+  auto profile_events = profiler_.GetProfileEvents();
+  init_summarizer_.ProcessProfiles(profile_events, *interpreter_);
+  profiler_.Reset();
+}
+
 void ProfilingListener::OnSingleRunStart(RunType run_type) {
-  // Note: we have started profiling when this listener is created. In order
-  // not to count events during the WARMUP phase, we need to stop profiling and
-  // process already-recorded profile events when the WARMUP run starts and
-  // restart profiling at the REGULAR run.
-  if (run_type == WARMUP) {
-    OnSingleRunEnd();
-  } else if (run_type == REGULAR) {
+  if (run_type == REGULAR) {
     profiler_.Reset();
     profiler_.StartProfiling();
   }
 }
 
 void ProfilingListener::OnBenchmarkEnd(const BenchmarkResults& results) {
-  if (summarizer_.HasProfiles()) {
-    TFLITE_LOG(INFO) << summarizer_.GetOutputString();
+  if (init_summarizer_.HasProfiles()) {
+    TFLITE_LOG(INFO) << "Profiling Info for Benchmark Initialization:";
+    TFLITE_LOG(INFO) << init_summarizer_.GetOutputString();
+  }
+  if (run_summarizer_.HasProfiles()) {
+    TFLITE_LOG(INFO)
+        << "Operator-wise Profiling Info for Regular Benchmark Runs:";
+    TFLITE_LOG(INFO) << run_summarizer_.GetOutputString();
   }
 }
 
 void ProfilingListener::OnSingleRunEnd() {
   profiler_.StopProfiling();
   auto profile_events = profiler_.GetProfileEvents();
-  summarizer_.ProcessProfiles(profile_events, *interpreter_);
+  run_summarizer_.ProcessProfiles(profile_events, *interpreter_);
 }
 
 void GemmlowpProfilingListener::OnBenchmarkStart(
@@ -553,6 +570,12 @@ TfLiteStatus BenchmarkTfLiteModel::Init() {
     return kTfLiteError;
   }
 
+  // Install profilers if necessary right after interpreter is created so that
+  // any memory allocations inside the TFLite runtime could be recorded if the
+  // installed profiler profile memory usage information.
+  profiling_listener_ = MayCreateProfilingListener();
+  if (profiling_listener_) AddListener(profiling_listener_.get());
+
   interpreter_->UseNNAPI(params_.Get<bool>("use_legacy_nnapi"));
   interpreter_->SetAllowFp16PrecisionForFp32(params_.Get<bool>("allow_fp16"));
 
@@ -590,8 +613,8 @@ TfLiteStatus BenchmarkTfLiteModel::Init() {
 
   if (!inputs_.empty()) {
     TFLITE_BENCHMARK_CHECK_EQ(inputs_.size(), interpreter_inputs.size())
-        << "Inputs mismatch: Model inputs #:" << interpreter_inputs.size()
-        << " expected: " << inputs_.size();
+        << "Inputs mismatch: Model inputs #:" << inputs_.size()
+        << " expected: " << interpreter_inputs.size();
   }
 
   // Check if the tensor names match, and log a warning if it doesn't.
@@ -617,16 +640,6 @@ TfLiteStatus BenchmarkTfLiteModel::Init() {
     }
   }
 
-  // Install profilers if necessary but *before* any memory allocations inside
-  // the TFLite interpreter because the installed profiler might profile memory
-  // usage information.
-  if (params_.Get<bool>("enable_op_profiling")) {
-    profiling_listener_.reset(new ProfilingListener(
-        interpreter_.get(),
-        params_.Get<int32_t>("max_profiling_buffer_entries")));
-    AddListener(profiling_listener_.get());
-  }
-
   if (interpreter_->AllocateTensors() != kTfLiteOk) {
     TFLITE_LOG(ERROR) << "Failed to allocate tensors!";
     return kTfLiteError;
@@ -754,6 +767,14 @@ std::unique_ptr<tflite::OpResolver> BenchmarkTfLiteModel::GetOpResolver()
   return std::unique_ptr<tflite::OpResolver>(resolver);
 }
 
+std::unique_ptr<BenchmarkListener>
+BenchmarkTfLiteModel::MayCreateProfilingListener() const {
+  if (!params_.Get<bool>("enable_op_profiling")) return nullptr;
+  return std::unique_ptr<BenchmarkListener>(new ProfilingListener(
+      interpreter_.get(),
+      params_.Get<int32_t>("max_profiling_buffer_entries")));
+}
+
 TfLiteStatus BenchmarkTfLiteModel::RunImpl() { return interpreter_->Invoke(); }
 
 }  // namespace benchmark
diff --git a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h
index a6fc38a6180..3778cc968bd 100644
--- a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h
+++ b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h
@@ -71,6 +71,10 @@ class BenchmarkTfLiteModel : public BenchmarkModel {
   // Allow subclasses to create a customized Op resolver during init.
   virtual std::unique_ptr<tflite::OpResolver> GetOpResolver() const;
 
+  // Create a BenchmarkListener that's specifically for TFLite profiling if
+  // necessary.
+  virtual std::unique_ptr<BenchmarkListener> MayCreateProfilingListener() const;
+
   void CleanUp();
 
   std::unique_ptr<tflite::FlatBufferModel> model_;
@@ -103,8 +107,8 @@ class BenchmarkTfLiteModel : public BenchmarkModel {
 
   std::vector<InputLayerInfo> inputs_;
   std::vector<InputTensorData> inputs_data_;
-  std::unique_ptr<BenchmarkListener> profiling_listener_;
-  std::unique_ptr<BenchmarkListener> gemmlowp_profiling_listener_;
+  std::unique_ptr<BenchmarkListener> profiling_listener_ = nullptr;
+  std::unique_ptr<BenchmarkListener> gemmlowp_profiling_listener_ = nullptr;
   TfLiteDelegatePtrMap delegates_;
 
   std::mt19937 random_engine_;
diff --git a/tensorflow/lite/tools/benchmark/experimental/c/BUILD b/tensorflow/lite/tools/benchmark/experimental/c/BUILD
index 28bbd3fdfe6..2bd26e8e127 100644
--- a/tensorflow/lite/tools/benchmark/experimental/c/BUILD
+++ b/tensorflow/lite/tools/benchmark/experimental/c/BUILD
@@ -12,6 +12,11 @@ package_group(
     ],
 )
 
+exports_files(
+    ["benchmark_c_api.h"],
+    visibility = ["//tensorflow/lite/tools/benchmark/experimental/c:benchmark"],
+)
+
 cc_library(
     name = "benchmark_c_api",
     srcs = ["benchmark_c_api.cc"],
diff --git a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h
index b3b0ddc059d..332b9b68881 100644
--- a/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h
+++ b/tensorflow/lite/tools/benchmark/experimental/c/c_api_types.h
@@ -303,6 +303,29 @@ enum {
   kTfLiteNullBufferHandle = -1,
 };
 
+// Storage format of each dimension in a sparse tensor.
+typedef enum {
+  kTfLiteDimDense = 0,
+  kTfLiteDimSparseCSR,
+} TfLiteDimensionType;
+
+// Metadata to encode each dimension in a sparse tensor.
+typedef struct {
+  TfLiteDimensionType format;
+  int dense_size;
+  TfLiteIntArray* array_segments;
+  TfLiteIntArray* array_indices;
+} TfLiteDimensionMetadata;
+
+// Parameters used to encode a sparse tensor. For detailed explanation of each
+// field please refer to lite/schema/schema.fbs.
+typedef struct {
+  TfLiteIntArray* traversal_order;
+  TfLiteIntArray* block_map;
+  TfLiteDimensionMetadata* dim_metadata;
+  int dim_metadata_size;
+} TfLiteSparsity;
+
 // An tensor in the interpreter system which is a wrapper around a buffer of
 // data including a dimensionality (or NULL if not currently defined).
 typedef struct {
@@ -357,6 +380,11 @@ typedef struct {
 
   // Quantization information. Replaces params field above.
   TfLiteQuantization quantization;
+
+  // Parameters used to encode a sparse tensor.
+  // This is optional. The field is NULL if a tensor is dense.
+  // WARNING: This is an experimental interface that is subject to change.
+  TfLiteSparsity* sparsity;
 } TfLiteTensor;
 
 // Free data memory of tensor `t`.
@@ -365,6 +393,9 @@ void TfLiteTensorDataFree(TfLiteTensor* t);
 // Free quantization data.
 void TfLiteQuantizationFree(TfLiteQuantization* quantization);
 
+// Free sparsity parameters.
+void TfLiteSparsityFree(TfLiteSparsity* sparsity);
+
 // Free memory of tensor `t`.
 void TfLiteTensorFree(TfLiteTensor* t);
 
diff --git a/tensorflow/lite/tools/make/download_dependencies.sh b/tensorflow/lite/tools/make/download_dependencies.sh
index 4b4df1e9f9d..25e7d6b7894 100755
--- a/tensorflow/lite/tools/make/download_dependencies.sh
+++ b/tensorflow/lite/tools/make/download_dependencies.sh
@@ -29,7 +29,7 @@ if [ ! -f $BZL_FILE_PATH ]; then
   exit 1;
 fi
 
-EIGEN_URL="$(grep -o 'http.*bitbucket.org/eigen/eigen/get/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.tensorflow | head -n1)"
+EIGEN_URL="$(grep -o 'https.*gitlab.com/libeigen/eigen/-/archive/.*tar\.gz' "${BZL_FILE_PATH}" | grep -v mirror.tensorflow | head -n1)"
 GEMMLOWP_URL="$(grep -o 'https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/gemmlowp/.*zip' "${BZL_FILE_PATH}" | head -n1)"
 GOOGLETEST_URL="https://github.com/google/googletest/archive/release-1.8.0.tar.gz"
 ABSL_URL="$(grep -o 'https://github.com/abseil/abseil-cpp/.*tar.gz' "${BZL_FILE_PATH}" | head -n1)"
diff --git a/tensorflow/lite/tools/optimize/BUILD b/tensorflow/lite/tools/optimize/BUILD
index 71c368679aa..a25bd0c4a25 100644
--- a/tensorflow/lite/tools/optimize/BUILD
+++ b/tensorflow/lite/tools/optimize/BUILD
@@ -234,7 +234,9 @@ tf_cc_test(
         "//tensorflow/lite/tools/optimize:testdata/concat.bin",
         "//tensorflow/lite/tools/optimize:testdata/fc.bin",
         "//tensorflow/lite/tools/optimize:testdata/lstm_calibrated.bin",
+        "//tensorflow/lite/tools/optimize:testdata/lstm_calibrated2.bin",
         "//tensorflow/lite/tools/optimize:testdata/lstm_quantized.bin",
+        "//tensorflow/lite/tools/optimize:testdata/lstm_quantized2.bin",
         "//tensorflow/lite/tools/optimize:testdata/mixed.bin",
         "//tensorflow/lite/tools/optimize:testdata/multi_input_add_reshape.bin",
         "//tensorflow/lite/tools/optimize:testdata/single_avg_pool_min_minus_5_max_plus_5.bin",
diff --git a/tensorflow/lite/tools/optimize/operator_property.cc b/tensorflow/lite/tools/optimize/operator_property.cc
index 6a8258fc27c..55979719842 100644
--- a/tensorflow/lite/tools/optimize/operator_property.cc
+++ b/tensorflow/lite/tools/optimize/operator_property.cc
@@ -26,6 +26,9 @@ namespace {
 // TODO(jianlijianli): extend it to support ops that has multiple variants.
 struct OpVariant {
   BuiltinOperator op_code;
+  bool use_layer_norm = false;
+  bool use_projection = false;
+  bool use_peephole = false;
 };
 
 const OpVariant GetOperatorVariant(const ModelT* model, int subgraph_index,
@@ -34,6 +37,19 @@ const OpVariant GetOperatorVariant(const ModelT* model, int subgraph_index,
   OperatorT* op =
       model->subgraphs.at(subgraph_index)->operators[op_index].get();
   op_variant.op_code = model->operator_codes[op->opcode_index]->builtin_code;
+  if (op_variant.op_code == BuiltinOperator_LSTM) {
+    const int cell_to_output_weight_index = 11;
+    const int forget_layer_norm_coefficients_index = 21;
+    const int projection_weights_index = 16;
+    op_variant.use_projection = op->inputs[projection_weights_index] != -1;
+    op_variant.use_peephole = op->inputs[cell_to_output_weight_index] != -1;
+    if (op->inputs.size() == 20) {
+      op_variant.use_layer_norm = false;
+    } else {
+      op_variant.use_layer_norm =
+          op->inputs[forget_layer_norm_coefficients_index] != -1;
+    }
+  }
   return op_variant;
 }
 }  // namespace
@@ -180,72 +196,545 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
       // TODO(jianlijianli): extend this to other variants of LSTM.
       // LSTM needs 5 intermediate tensors. This agrees with the fully quantized
       // kernels in lstm_eval.cc
-      static const float alpha = static_cast<float>(std::pow(2, -10));
+      if (op_variant.use_layer_norm && op_variant.use_projection &&
+          op_variant.use_peephole) {
+        static const float alpha = static_cast<float>(std::pow(2, -10));
+        TensorProperty tensor_property_9;
+        tensor_property_9.number_of_bits = 16;
+        tensor_property_9.symmetric = true;
+        TensorProperty tensor_property_12;
+        tensor_property_12.use_derived_scale = true;
+        tensor_property_12.number_of_bits = 32;
+        tensor_property_12.derived_scale = {{20}, {}, {alpha}};
+        TensorProperty tensor_property_13;
+        tensor_property_13.use_derived_scale = true;
+        tensor_property_13.number_of_bits = 32;
+        tensor_property_13.derived_scale = {{21}, {}, {alpha}};
+        TensorProperty tensor_property_14;
+        tensor_property_14.use_derived_scale = true;
+        tensor_property_14.number_of_bits = 32;
+        tensor_property_14.derived_scale = {{22}, {}, {alpha}};
+        TensorProperty tensor_property_15;
+        tensor_property_15.use_derived_scale = true;
+        tensor_property_15.number_of_bits = 32;
+        tensor_property_15.derived_scale = {{23}, {}, {alpha}};
+        TensorProperty tensor_property_17;
+        tensor_property_17.use_derived_scale = true;
+        tensor_property_17.number_of_bits = 32;
+        tensor_property_17.derived_scale = {{16}, {4}, {}};
+        TensorProperty tensor_property_19;
+        tensor_property_19.extend_to_power_of_two = true;
+        tensor_property_19.number_of_bits = 16;
+        tensor_property_19.state_tensor = true;
+        tensor_property_19.symmetric = true;
+        TensorProperty tensor_property_20;
+        tensor_property_20.number_of_bits = 16;
+        tensor_property_20.symmetric = true;
 
-      TensorProperty tensor_property_12;
-      tensor_property_12.use_derived_scale = true;
-      tensor_property_12.number_of_bits = 32;
-      tensor_property_12.derived_scale = {{20}, {}, {alpha}};
-      TensorProperty tensor_property_13;
-      tensor_property_13.use_derived_scale = true;
-      tensor_property_13.number_of_bits = 32;
-      tensor_property_13.derived_scale = {{21}, {}, {alpha}};
-      TensorProperty tensor_property_14;
-      tensor_property_14.use_derived_scale = true;
-      tensor_property_14.number_of_bits = 32;
-      tensor_property_14.derived_scale = {{22}, {}, {alpha}};
-      TensorProperty tensor_property_15;
-      tensor_property_15.use_derived_scale = true;
-      tensor_property_15.number_of_bits = 32;
-      tensor_property_15.derived_scale = {{23}, {}, {alpha}};
-      TensorProperty tensor_property_17;
-      tensor_property_17.use_derived_scale = true;
-      tensor_property_17.number_of_bits = 32;
-      tensor_property_17.derived_scale = {{16}, {4}, {}};
-      TensorProperty tensor_property_19;
-      tensor_property_19.extend_to_power_of_two = true;
-      tensor_property_19.number_of_bits = 16;
-      tensor_property_19.state_tensor = true;
-      tensor_property_19.symmetric = true;
-      TensorProperty tensor_property_20;
-      tensor_property_20.number_of_bits = 16;
-      tensor_property_20.symmetric = true;
+        property.inputs = {
+            {0, {}},
+            {1, {}},
+            {2, {}},
+            {3, {}},
+            {4, {}},
+            {5, {}},
+            {6, {}},
+            {7, {}},
+            {8, {}},
+            {9, tensor_property_9},
+            {10, tensor_property_9},
+            {11, tensor_property_9},
+            {16, {}},
+            {19, tensor_property_19},
+            {20, tensor_property_20},
+            {21, tensor_property_20},
+            {22, tensor_property_20},
+            {23, tensor_property_20},
+            {12, tensor_property_12},
+            {13, tensor_property_13},
+            {14, tensor_property_14},
+            {15, tensor_property_15},
+            {17, tensor_property_17},
+        };
+        property.outputs = {{0, {}}};
+        property.intermediates = {
+            {0, tensor_property_20},
+            {1, tensor_property_20},
+            {2, tensor_property_20},
+            {3, tensor_property_20},
+            {4, {}},
+        };
+        property.restrict_scale = {{18, 0}};
+        property.version = 2;
+      }
+      if (op_variant.use_layer_norm && op_variant.use_projection &&
+          !op_variant.use_peephole) {
+        static const float alpha = static_cast<float>(std::pow(2, -10));
 
-      property.inputs = {
-          {0, {}},
-          {1, {}},
-          {2, {}},
-          {3, {}},
-          {4, {}},
-          {5, {}},
-          {6, {}},
-          {7, {}},
-          {8, {}},
-          {9, {}},
-          {10, {}},
-          {11, {}},
-          {16, {}},
-          {19, tensor_property_19},
-          {20, tensor_property_20},
-          {21, tensor_property_20},
-          {22, tensor_property_20},
-          {23, tensor_property_20},
-          {12, tensor_property_12},
-          {13, tensor_property_13},
-          {14, tensor_property_14},
-          {15, tensor_property_15},
-          {17, tensor_property_17},
-      };
-      property.outputs = {{0, {}}};
-      property.intermediates = {
-          {0, tensor_property_20},
-          {1, tensor_property_20},
-          {2, tensor_property_20},
-          {3, tensor_property_20},
-          {4, {}},
-      };
-      property.restrict_scale = {{18, 0}};
-      property.version = 2;
+        TensorProperty tensor_property_12;
+        tensor_property_12.use_derived_scale = true;
+        tensor_property_12.number_of_bits = 32;
+        tensor_property_12.derived_scale = {{20}, {}, {alpha}};
+        TensorProperty tensor_property_13;
+        tensor_property_13.use_derived_scale = true;
+        tensor_property_13.number_of_bits = 32;
+        tensor_property_13.derived_scale = {{21}, {}, {alpha}};
+        TensorProperty tensor_property_14;
+        tensor_property_14.use_derived_scale = true;
+        tensor_property_14.number_of_bits = 32;
+        tensor_property_14.derived_scale = {{22}, {}, {alpha}};
+        TensorProperty tensor_property_15;
+        tensor_property_15.use_derived_scale = true;
+        tensor_property_15.number_of_bits = 32;
+        tensor_property_15.derived_scale = {{23}, {}, {alpha}};
+        TensorProperty tensor_property_17;
+        tensor_property_17.use_derived_scale = true;
+        tensor_property_17.number_of_bits = 32;
+        tensor_property_17.derived_scale = {{16}, {4}, {}};
+        TensorProperty tensor_property_19;
+        tensor_property_19.extend_to_power_of_two = true;
+        tensor_property_19.number_of_bits = 16;
+        tensor_property_19.state_tensor = true;
+        tensor_property_19.symmetric = true;
+        TensorProperty tensor_property_20;
+        tensor_property_20.number_of_bits = 16;
+        tensor_property_20.symmetric = true;
+
+        property.inputs = {
+            {0, {}},
+            {1, {}},
+            {2, {}},
+            {3, {}},
+            {4, {}},
+            {5, {}},
+            {6, {}},
+            {7, {}},
+            {8, {}},
+            {16, {}},
+            {19, tensor_property_19},
+            {20, tensor_property_20},
+            {21, tensor_property_20},
+            {22, tensor_property_20},
+            {23, tensor_property_20},
+            {12, tensor_property_12},
+            {13, tensor_property_13},
+            {14, tensor_property_14},
+            {15, tensor_property_15},
+            {17, tensor_property_17},
+        };
+        property.outputs = {{0, {}}};
+        property.intermediates = {
+            {0, tensor_property_20},
+            {1, tensor_property_20},
+            {2, tensor_property_20},
+            {3, tensor_property_20},
+            {4, {}},
+        };
+        property.restrict_scale = {{18, 0}};
+        property.version = 2;
+      }
+      if (op_variant.use_layer_norm && !op_variant.use_projection &&
+          op_variant.use_peephole) {
+        static const float alpha = static_cast<float>(std::pow(2, -10));
+        TensorProperty tensor_property_9;
+        tensor_property_9.number_of_bits = 16;
+        tensor_property_9.symmetric = true;
+        TensorProperty tensor_property_12;
+        tensor_property_12.use_derived_scale = true;
+        tensor_property_12.number_of_bits = 32;
+        tensor_property_12.derived_scale = {{20}, {}, {alpha}};
+        TensorProperty tensor_property_13;
+        tensor_property_13.use_derived_scale = true;
+        tensor_property_13.number_of_bits = 32;
+        tensor_property_13.derived_scale = {{21}, {}, {alpha}};
+        TensorProperty tensor_property_14;
+        tensor_property_14.use_derived_scale = true;
+        tensor_property_14.number_of_bits = 32;
+        tensor_property_14.derived_scale = {{22}, {}, {alpha}};
+        TensorProperty tensor_property_15;
+        tensor_property_15.use_derived_scale = true;
+        tensor_property_15.number_of_bits = 32;
+        tensor_property_15.derived_scale = {{23}, {}, {alpha}};
+        TensorProperty tensor_property_19;
+        tensor_property_19.extend_to_power_of_two = true;
+        tensor_property_19.number_of_bits = 16;
+        tensor_property_19.state_tensor = true;
+        tensor_property_19.symmetric = true;
+        TensorProperty tensor_property_20;
+        tensor_property_20.number_of_bits = 16;
+        tensor_property_20.symmetric = true;
+
+        property.inputs = {
+            {0, {}},
+            {1, {}},
+            {2, {}},
+            {3, {}},
+            {4, {}},
+            {5, {}},
+            {6, {}},
+            {7, {}},
+            {8, {}},
+            {9, tensor_property_9},
+            {10, tensor_property_9},
+            {11, tensor_property_9},
+            {19, tensor_property_19},
+            {20, tensor_property_20},
+            {21, tensor_property_20},
+            {22, tensor_property_20},
+            {23, tensor_property_20},
+            {12, tensor_property_12},
+            {13, tensor_property_13},
+            {14, tensor_property_14},
+            {15, tensor_property_15},
+        };
+        property.outputs = {{0, {}}};
+        property.intermediates = {
+            {0, tensor_property_20},
+            {1, tensor_property_20},
+            {2, tensor_property_20},
+            {3, tensor_property_20},
+            // Without projection, hidden state (4), output (0) and input
+            // activation state (18) are the same except that the very first
+            // inference of input activation is not captured in hidden and
+            // output.
+            // This is not an issue because this intermediate tensor is not used
+            // in the kernel and its quantization parameters are ignored.
+            {4, {}},
+        };
+        property.restrict_scale = {{18, 0}};
+        property.version = 2;
+      }
+      if (op_variant.use_layer_norm && !op_variant.use_projection &&
+          !op_variant.use_peephole) {
+        static const float alpha = static_cast<float>(std::pow(2, -10));
+        TensorProperty tensor_property_12;
+        tensor_property_12.use_derived_scale = true;
+        tensor_property_12.number_of_bits = 32;
+        tensor_property_12.derived_scale = {{20}, {}, {alpha}};
+        TensorProperty tensor_property_13;
+        tensor_property_13.use_derived_scale = true;
+        tensor_property_13.number_of_bits = 32;
+        tensor_property_13.derived_scale = {{21}, {}, {alpha}};
+        TensorProperty tensor_property_14;
+        tensor_property_14.use_derived_scale = true;
+        tensor_property_14.number_of_bits = 32;
+        tensor_property_14.derived_scale = {{22}, {}, {alpha}};
+        TensorProperty tensor_property_15;
+        tensor_property_15.use_derived_scale = true;
+        tensor_property_15.number_of_bits = 32;
+        tensor_property_15.derived_scale = {{23}, {}, {alpha}};
+        TensorProperty tensor_property_19;
+        tensor_property_19.extend_to_power_of_two = true;
+        tensor_property_19.number_of_bits = 16;
+        tensor_property_19.state_tensor = true;
+        tensor_property_19.symmetric = true;
+        TensorProperty tensor_property_20;
+        tensor_property_20.number_of_bits = 16;
+        tensor_property_20.symmetric = true;
+
+        property.inputs = {
+            {0, {}},
+            {1, {}},
+            {2, {}},
+            {3, {}},
+            {4, {}},
+            {5, {}},
+            {6, {}},
+            {7, {}},
+            {8, {}},
+            {19, tensor_property_19},
+            {20, tensor_property_20},
+            {21, tensor_property_20},
+            {22, tensor_property_20},
+            {23, tensor_property_20},
+            {12, tensor_property_12},
+            {13, tensor_property_13},
+            {14, tensor_property_14},
+            {15, tensor_property_15},
+        };
+        property.outputs = {{0, {}}};
+        property.intermediates = {
+            {0, tensor_property_20},
+            {1, tensor_property_20},
+            {2, tensor_property_20},
+            {3, tensor_property_20},
+            // Without projection, hidden state (4), output (0) and input
+            // activation state (18) are the same except that the very first
+            // inference of input activation is not captured in hidden and
+            // output.
+            // This is not an issue because this intermediate tensor is not used
+            // in the kernel and its quantization parameters are ignored.
+            {4, {}},
+        };
+        property.restrict_scale = {{18, 0}};
+        property.version = 2;
+      }
+      if (!op_variant.use_layer_norm && op_variant.use_projection &&
+          op_variant.use_peephole) {
+        TensorProperty tensor_property_9;
+        tensor_property_9.number_of_bits = 16;
+        tensor_property_9.symmetric = true;
+        // Without layer norm, we choose to quantize bias with the scale of
+        // input and its correpsonding weight. The other choice will
+        // be to ues the scale of recurrent and its correpsonding weight but we
+        // choose to use the smaller scale, which means higher resolution.
+        TensorProperty tensor_property_12;
+        tensor_property_12.use_derived_scale = true;
+        tensor_property_12.number_of_bits = 32;
+        tensor_property_12.derived_scale = {{0, 1}, {}, {}};
+        TensorProperty tensor_property_13;
+        tensor_property_13.use_derived_scale = true;
+        tensor_property_13.number_of_bits = 32;
+        tensor_property_13.derived_scale = {{0, 2}, {}, {}};
+        TensorProperty tensor_property_14;
+        tensor_property_14.use_derived_scale = true;
+        tensor_property_14.number_of_bits = 32;
+        tensor_property_14.derived_scale = {{0, 3}, {}, {}};
+        TensorProperty tensor_property_15;
+        tensor_property_15.use_derived_scale = true;
+        tensor_property_15.number_of_bits = 32;
+        tensor_property_15.derived_scale = {{0, 4}, {}, {}};
+        TensorProperty tensor_property_17;
+        tensor_property_17.use_derived_scale = true;
+        tensor_property_17.number_of_bits = 32;
+        tensor_property_17.derived_scale = {{16}, {4}, {}};
+        TensorProperty tensor_property_19;
+        tensor_property_19.extend_to_power_of_two = true;
+        tensor_property_19.number_of_bits = 16;
+        tensor_property_19.state_tensor = true;
+        tensor_property_19.symmetric = true;
+
+        property.inputs = {
+            {0, {}},
+            {1, {}},
+            {2, {}},
+            {3, {}},
+            {4, {}},
+            {5, {}},
+            {6, {}},
+            {7, {}},
+            {8, {}},
+            {9, tensor_property_9},
+            {10, tensor_property_9},
+            {11, tensor_property_9},
+            {16, {}},
+            {19, tensor_property_19},
+            {12, tensor_property_12},
+            {13, tensor_property_13},
+            {14, tensor_property_14},
+            {15, tensor_property_15},
+            {17, tensor_property_17},
+        };
+        property.outputs = {{0, {}}};
+        property.intermediates = {
+            // Without layer normliazation, intermediate tensors 0, 1, 2, 3 are
+            // not used and and their quantization parameters are ignored.
+            {0, {}},
+            {1, {}},
+            {2, {}},
+            {3, {}},
+            // Hidden state is quantized as usual.
+            {4, {}},
+        };
+        property.restrict_scale = {{18, 0}};
+        property.version = 2;
+      }
+      if (!op_variant.use_layer_norm && op_variant.use_projection &&
+          !op_variant.use_peephole) {
+        // Without layer norm, we choose to quantize bias with the scale of
+        // input and its correpsonding weight. The other choice will
+        // be to ues the scale of recurrent and its correpsonding weight but we
+        // choose to use the smaller scale, which means higher resolution.
+        TensorProperty tensor_property_12;
+        tensor_property_12.use_derived_scale = true;
+        tensor_property_12.number_of_bits = 32;
+        tensor_property_12.derived_scale = {{0, 1}, {}, {}};
+        TensorProperty tensor_property_13;
+        tensor_property_13.use_derived_scale = true;
+        tensor_property_13.number_of_bits = 32;
+        tensor_property_13.derived_scale = {{0, 2}, {}, {}};
+        TensorProperty tensor_property_14;
+        tensor_property_14.use_derived_scale = true;
+        tensor_property_14.number_of_bits = 32;
+        tensor_property_14.derived_scale = {{0, 3}, {}, {}};
+        TensorProperty tensor_property_15;
+        tensor_property_15.use_derived_scale = true;
+        tensor_property_15.number_of_bits = 32;
+        tensor_property_15.derived_scale = {{0, 4}, {}, {}};
+        TensorProperty tensor_property_17;
+        tensor_property_17.use_derived_scale = true;
+        tensor_property_17.number_of_bits = 32;
+        tensor_property_17.derived_scale = {{16}, {4}, {}};
+        TensorProperty tensor_property_19;
+        tensor_property_19.extend_to_power_of_two = true;
+        tensor_property_19.number_of_bits = 16;
+        tensor_property_19.state_tensor = true;
+        tensor_property_19.symmetric = true;
+
+        property.inputs = {
+            {0, {}},
+            {1, {}},
+            {2, {}},
+            {3, {}},
+            {4, {}},
+            {5, {}},
+            {6, {}},
+            {7, {}},
+            {8, {}},
+            {16, {}},
+            {19, tensor_property_19},
+            {12, tensor_property_12},
+            {13, tensor_property_13},
+            {14, tensor_property_14},
+            {15, tensor_property_15},
+            {17, tensor_property_17},
+        };
+        property.outputs = {{0, {}}};
+        property.intermediates = {
+            // Without layer normliazation, intermediate tensors 0, 1, 2, 3 are
+            // not used and their quantization parameters are ignored.
+            {0, {}},
+            {1, {}},
+            {2, {}},
+            {3, {}},
+            // Hidden state is quantized as usual.
+            {4, {}},
+        };
+        property.restrict_scale = {{18, 0}};
+        property.version = 2;
+      }
+      if (!op_variant.use_layer_norm && !op_variant.use_projection &&
+          op_variant.use_peephole) {
+        TensorProperty tensor_property_9;
+        tensor_property_9.number_of_bits = 16;
+        tensor_property_9.symmetric = true;
+        // Without layer norm, we choose to quantize bias with the scale of
+        // input and its correpsonding weight. The other choice will
+        // be to ues the scale of recurrent and its correpsonding weight but we
+        // choose to use the smaller scale, which means higher resolution.
+        TensorProperty tensor_property_12;
+        tensor_property_12.use_derived_scale = true;
+        tensor_property_12.number_of_bits = 32;
+        tensor_property_12.derived_scale = {{0, 1}, {}, {}};
+        TensorProperty tensor_property_13;
+        tensor_property_13.use_derived_scale = true;
+        tensor_property_13.number_of_bits = 32;
+        tensor_property_13.derived_scale = {{0, 2}, {}, {}};
+        TensorProperty tensor_property_14;
+        tensor_property_14.use_derived_scale = true;
+        tensor_property_14.number_of_bits = 32;
+        tensor_property_14.derived_scale = {{0, 3}, {}, {}};
+        TensorProperty tensor_property_15;
+        tensor_property_15.use_derived_scale = true;
+        tensor_property_15.number_of_bits = 32;
+        tensor_property_15.derived_scale = {{0, 4}, {}, {}};
+        TensorProperty tensor_property_19;
+        tensor_property_19.extend_to_power_of_two = true;
+        tensor_property_19.number_of_bits = 16;
+        tensor_property_19.state_tensor = true;
+        tensor_property_19.symmetric = true;
+
+        property.inputs = {
+            {0, {}},
+            {1, {}},
+            {2, {}},
+            {3, {}},
+            {4, {}},
+            {5, {}},
+            {6, {}},
+            {7, {}},
+            {8, {}},
+            {9, tensor_property_9},
+            {10, tensor_property_9},
+            {11, tensor_property_9},
+            {19, tensor_property_19},
+            {12, tensor_property_12},
+            {13, tensor_property_13},
+            {14, tensor_property_14},
+            {15, tensor_property_15},
+        };
+        property.outputs = {{0, {}}};
+        property.intermediates = {
+            // Without layer normliazation, intermediate tensors 0, 1, 2, 3 are
+            // not used and their quantization parameters are ignored.
+            {0, {}},
+            {1, {}},
+            {2, {}},
+            {3, {}},
+            // Without projection, hidden state (4), output (0) and input
+            // activation state (18) are the same except that the very first
+            // inference of input activation is not captured in hidden and
+            // output.
+            // This is not an issue because this intermediate tensor is not used
+            // in the kernel and its quantization parameters are ignored.
+            {4, {}},
+        };
+        property.restrict_scale = {{18, 0}};
+        property.version = 2;
+      }
+      if (!op_variant.use_layer_norm && !op_variant.use_projection &&
+          !op_variant.use_peephole) {
+        // Without layer norm, we choose to quantize bias with the scale of
+        // input and its correpsonding weight. The other choice will
+        // be to ues the scale of recurrent and its correpsonding weight but we
+        // choose to use the smaller scale, which means higher resolution.
+        TensorProperty tensor_property_12;
+        tensor_property_12.use_derived_scale = true;
+        tensor_property_12.number_of_bits = 32;
+        tensor_property_12.derived_scale = {{0, 1}, {}, {}};
+        TensorProperty tensor_property_13;
+        tensor_property_13.use_derived_scale = true;
+        tensor_property_13.number_of_bits = 32;
+        tensor_property_13.derived_scale = {{0, 2}, {}, {}};
+        TensorProperty tensor_property_14;
+        tensor_property_14.use_derived_scale = true;
+        tensor_property_14.number_of_bits = 32;
+        tensor_property_14.derived_scale = {{0, 3}, {}, {}};
+        TensorProperty tensor_property_15;
+        tensor_property_15.use_derived_scale = true;
+        tensor_property_15.number_of_bits = 32;
+        tensor_property_15.derived_scale = {{0, 4}, {}, {}};
+        TensorProperty tensor_property_19;
+        tensor_property_19.extend_to_power_of_two = true;
+        tensor_property_19.number_of_bits = 16;
+        tensor_property_19.state_tensor = true;
+        tensor_property_19.symmetric = true;
+
+        property.inputs = {
+            {0, {}},
+            {1, {}},
+            {2, {}},
+            {3, {}},
+            {4, {}},
+            {5, {}},
+            {6, {}},
+            {7, {}},
+            {8, {}},
+            {19, tensor_property_19},
+            {12, tensor_property_12},
+            {13, tensor_property_13},
+            {14, tensor_property_14},
+            {15, tensor_property_15},
+        };
+        property.outputs = {{0, {}}};
+        property.intermediates = {
+            // Without layer normliazation, intermediate tensors 0, 1, 2, 3 are
+            // not used and their quantization parameters are ignored.
+            {0, {}},
+            {1, {}},
+            {2, {}},
+            {3, {}},
+            // Without projection, hidden state (4), output (0) and input
+            // activation state (18) are the same except that the very first
+            // inference of input activation is not captured in hidden and
+            // output.
+            // This is not an issue because this intermediate tensor is not used
+            // in the kernel and its quantization parameters are ignored.
+            {4, {}},
+        };
+        property.restrict_scale = {{18, 0}};
+        property.version = 2;
+      }
       break;
     }
     case BuiltinOperator_L2_NORMALIZATION: {
diff --git a/tensorflow/lite/tools/optimize/quantize_model_test.cc b/tensorflow/lite/tools/optimize/quantize_model_test.cc
index 95c71fe0861..02177ff52e3 100644
--- a/tensorflow/lite/tools/optimize/quantize_model_test.cc
+++ b/tensorflow/lite/tools/optimize/quantize_model_test.cc
@@ -1038,6 +1038,65 @@ TEST_F(QuantizeLSTMTest, VerifyLSTM) {
   }
 }
 
+class QuantizeLSTM2Test : public QuantizeModelTest {
+ protected:
+  QuantizeLSTM2Test() {
+    input_model_ = ReadModel(internal::kLstmCalibrated2);
+    readonly_model_ = input_model_->GetModel();
+    readonly_model_->UnPackTo(&model_);
+  }
+};
+
+TEST_F(QuantizeLSTM2Test, VerifyLSTM) {
+  // Quantize model.
+  auto status = QuantizeModel(&builder_, &model_, TensorType_FLOAT32,
+                              TensorType_FLOAT32, &error_reporter_);
+  ASSERT_EQ(kTfLiteOk, status);
+
+  // Read expected model.
+  auto expected_fb_model = ReadModel(internal::kLstmQuantized2);
+  auto expected_read_only_model = expected_fb_model->GetModel();
+  ModelT expected_model;
+  expected_read_only_model->UnPackTo(&expected_model);
+
+  // Comparison.
+  ASSERT_EQ(model_.subgraphs.size(), expected_model.subgraphs.size());
+  for (size_t subgraph_idx = 0; subgraph_idx < model_.subgraphs.size();
+       subgraph_idx++) {
+    const auto graph = model_.subgraphs[subgraph_idx].get();
+    const auto expected_graph = expected_model.subgraphs[subgraph_idx].get();
+    ASSERT_EQ(graph->tensors.size(), expected_graph->tensors.size());
+    for (size_t i = 0; i < graph->tensors.size(); i++) {
+      const auto tensor = graph->tensors[i].get();
+      const auto expected_tensor = expected_graph->tensors[i].get();
+      EXPECT_EQ(tensor->buffer, expected_tensor->buffer);
+      EXPECT_EQ(tensor->is_variable, expected_tensor->is_variable);
+      EXPECT_EQ(tensor->shape, expected_tensor->shape);
+      EXPECT_EQ(tensor->name, expected_tensor->name);
+      EXPECT_EQ(tensor->type, expected_tensor->type);
+      const auto quantization_params = tensor->quantization.get();
+      const auto expected_quantization_params =
+          expected_tensor->quantization.get();
+      if (quantization_params != nullptr ||
+          expected_quantization_params != nullptr) {
+        EXPECT_NE(quantization_params, nullptr);
+        EXPECT_NE(expected_quantization_params, nullptr);
+        EXPECT_EQ(quantization_params->scale,
+                  expected_quantization_params->scale);
+        EXPECT_EQ(quantization_params->zero_point,
+                  expected_quantization_params->zero_point);
+      }
+    }
+  }
+  ASSERT_EQ(model_.buffers.size(), expected_model.buffers.size());
+  for (size_t buffer_idx = 0; buffer_idx < model_.buffers.size();
+       ++buffer_idx) {
+    const auto buffer = model_.buffers[buffer_idx].get()->data;
+    const auto expected_buffer = expected_model.buffers[buffer_idx].get()->data;
+    EXPECT_EQ(buffer, expected_buffer);
+  }
+}
+
 class QuantizeFCTest : public QuantizeModelTest {
  protected:
   QuantizeFCTest() {
diff --git a/tensorflow/lite/tools/optimize/test_util.cc b/tensorflow/lite/tools/optimize/test_util.cc
index 4aea0bb0fed..fd62b06b403 100644
--- a/tensorflow/lite/tools/optimize/test_util.cc
+++ b/tensorflow/lite/tools/optimize/test_util.cc
@@ -54,6 +54,8 @@ const char* kLstmQuantized = "lstm_quantized.bin";
 
 const char* kModelWithMinimumOp = "minimum.bin";
 const char* kModelWithMaximumOp = "maximum.bin";
+const char* kLstmCalibrated2 = "lstm_calibrated2.bin";
+const char* kLstmQuantized2 = "lstm_quantized2.bin";
 
 const char* kModelWithUnpack = "unpack.bin";
 
diff --git a/tensorflow/lite/tools/optimize/test_util.h b/tensorflow/lite/tools/optimize/test_util.h
index 845dfd813a4..88ab17f2199 100644
--- a/tensorflow/lite/tools/optimize/test_util.h
+++ b/tensorflow/lite/tools/optimize/test_util.h
@@ -76,7 +76,8 @@ extern const char* kModelMixed;
 // Test model with split op.
 extern const char* kModelSplit;
 
-// Test model with LSTM op.
+// Test model with LSTM op that has layer norm, has projection, without
+// peephole, without cifg.
 extern const char* kLstmCalibrated;
 extern const char* kLstmQuantized;
 
@@ -86,6 +87,11 @@ extern const char* kModelWithMinimumOp;
 // Test model with a maximum op.
 extern const char* kModelWithMaximumOp;
 
+// Test model with LSTM op that has peephole, without layer norm, without
+// projection, without cifg.
+extern const char* kLstmCalibrated2;
+extern const char* kLstmQuantized2;
+
 // Test model with an unpack op.
 extern const char* kModelWithUnpack;
 
diff --git a/tensorflow/lite/tools/optimize/testdata/lstm_calibrated2.bin b/tensorflow/lite/tools/optimize/testdata/lstm_calibrated2.bin
new file mode 100644
index 00000000000..cf1fae62da1
Binary files /dev/null and b/tensorflow/lite/tools/optimize/testdata/lstm_calibrated2.bin differ
diff --git a/tensorflow/lite/tools/optimize/testdata/lstm_quantized2.bin b/tensorflow/lite/tools/optimize/testdata/lstm_quantized2.bin
new file mode 100644
index 00000000000..3fd926bb8f4
Binary files /dev/null and b/tensorflow/lite/tools/optimize/testdata/lstm_quantized2.bin differ
diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD
index e9e74e85ffa..a25c3162752 100644
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@@ -243,6 +243,7 @@ py_library(
             "**/*test.py",
             "**/benchmark.py",  # In platform_benchmark.
             "**/analytics.py",  # In platform_analytics.
+            "**/device_context.py",  # In platform_device_context.
         ],
     ) + ["platform/build_info.py"],
     srcs_version = "PY2AND3",
@@ -275,6 +276,16 @@ py_library(
     srcs_version = "PY2AND3",
 )
 
+py_library(
+    name = "platform_device_context",
+    srcs = ["platform/device_context.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":control_flow_ops",
+        ":framework",
+    ],
+)
+
 py_library(
     name = "platform_test",
     srcs = ["platform/googletest.py"],
@@ -3657,24 +3668,26 @@ py_library(
         ":loss_scale",
         ":unconnected_gradients",
         ":util",
+        "//tensorflow/python/distribute:distribute_lib",
         "//tensorflow/python/eager:backprop",
     ],
 )
 
-py_test(
+cuda_py_test(
     name = "loss_scaling_gradient_tape_test",
     size = "medium",
     srcs = ["training/experimental/loss_scaling_gradient_tape_test.py"],
-    python_version = "PY3",
-    deps = [
+    additional_deps = [
         ":client_testlib",
         ":constant_op",
+        ":framework_test_combinations_lib",
         ":loss_scale",
         ":loss_scaling_gradient_tape",
-        "//tensorflow/python/compat:v2_compat",
-        "//tensorflow/python/eager:def_function",
-        "//third_party/py/numpy",
         "@absl_py//absl/testing:parameterized",
+        "//third_party/py/numpy",
+        "//tensorflow/python/compat:v2_compat",
+        "//tensorflow/python/distribute:mirrored_strategy",
+        "//tensorflow/python/eager:def_function",
     ],
 )
 
@@ -3803,6 +3816,7 @@ py_library(
         ":nn_grad",
         ":nn_ops",
         ":nn_ops_gen",
+        ":platform_device_context",
         ":rnn",
         ":sparse_ops",
         ":util",
@@ -4811,6 +4825,18 @@ cuda_py_test(
     python_version = "PY3",
 )
 
+py_test(
+    name = "tensor_array_ops_test",
+    size = "small",
+    srcs = ["ops/tensor_array_ops_test.py"],
+    python_version = "PY3",
+    deps = [
+        ":array_ops",
+        ":client",
+        ":client_testlib",
+    ],
+)
+
 cuda_py_test(
     name = "special_math_ops_test",
     size = "medium",
@@ -7073,29 +7099,6 @@ py_library(
     ],
 )
 
-cuda_py_test(
-    name = "matmul_benchmark_test",
-    size = "medium",
-    srcs = ["ops/matmul_benchmark_test.py"],
-    additional_deps = [
-        ":math_ops",
-        ":random_ops",
-        ":client",
-        ":client_testlib",
-        ":control_flow_ops",
-        ":framework_for_generated_wrappers",
-        ":platform",
-        ":platform_benchmark",
-        ":matmul_benchmark",
-        ":variables",
-        "//third_party/py/numpy",
-        "//tensorflow/core:protos_all_py",
-    ],
-    main = "ops/matmul_benchmark_test.py",
-    python_version = "PY3",
-    tags = ["no_pip"],
-)
-
 cuda_py_test(
     name = "session_benchmark",
     srcs = ["client/session_benchmark.py"],
diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 71427c9c237..bd7cb81ff8f 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -31,7 +31,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 12, 4)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 12, 9)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS"
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
diff --git a/tensorflow/python/data/experimental/benchmarks/autotune_benchmark.py b/tensorflow/python/data/experimental/benchmarks/autotune_benchmark.py
index a6ee0d7dec7..9123aff4df9 100644
--- a/tensorflow/python/data/experimental/benchmarks/autotune_benchmark.py
+++ b/tensorflow/python/data/experimental/benchmarks/autotune_benchmark.py
@@ -23,6 +23,7 @@ import numpy as np
 
 from tensorflow.python.client import session
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.util import nest
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
@@ -30,149 +31,116 @@ from tensorflow.python.platform import test
 class AutotuneBenchmark(test.Benchmark):
   """Benchmarks for autotuning performance knobs."""
 
-  def benchmark_map(self):
-    a = self._benchmark_map(autotune=False)
-    b = self._benchmark_map(autotune=True)
-    c = self._benchmark_map(
-        autotune=True, algorithm=dataset_ops.AutotuneAlgorithm.GRADIENT_DESCENT)
-    print("HillClimb vs Default speedup: %f" % (a / b))
-    print("GradientDescent vs Default speedup: %f" % (a / c))
-
-  def _benchmark_map(self,
-                     autotune,
-                     algorithm=dataset_ops.AutotuneAlgorithm.HILL_CLIMB):
-    k = 1024 * 1024
-    dataset = dataset_ops.Dataset.from_tensors((np.random.rand(1, 4 * k),
-                                                np.random.rand(4 * k,
-                                                               1))).repeat()
-    dataset = dataset.map(
-        math_ops.matmul, num_parallel_calls=dataset_ops.AUTOTUNE)
+  def _run_benchmark(self, dataset, autotune, autotune_buffers,
+                     benchmark_iters, benchmark_label):
     options = dataset_ops.Options()
     options.experimental_optimization.apply_default_optimizations = False
     options.experimental_optimization.autotune = autotune
-    if autotune:
-      options.experimental_optimization.autotune_algorithm = algorithm.value
+    options.experimental_optimization.autotune_buffers = autotune_buffers
     dataset = dataset.with_options(options)
     iterator = dataset_ops.make_one_shot_iterator(dataset)
     get_next = iterator.get_next()
+    # Run the op directly to avoid copying the tensor to python.
+    get_next_op = nest.flatten(get_next)[0].op
 
     deltas = []
     with session.Session() as sess:
       for _ in range(5):
-        sess.run(get_next.op)
-      for _ in range(10000):
+        sess.run(get_next_op)
+      for _ in range(benchmark_iters):
         start = time.time()
-        sess.run(get_next.op)
+        sess.run(get_next_op)
         end = time.time()
         deltas.append(end - start)
 
+    autotune_string = "_autotune_{}".format(
+        "parallelism_and_buffer_sizes"
+        if autotune_buffers else "parallelism_only")
+
     self.report_benchmark(
-        iters=10000,
+        iters=benchmark_iters,
         wall_time=np.median(deltas),
-        name="map" + (("_autotune_%s" % algorithm.name) if autotune else ""))
+        name=benchmark_label + (autotune_string if autotune else ""))
     return np.median(deltas)
 
+  def benchmark_map(self):
+    a = self._benchmark_map(autotune=False)
+    b = self._benchmark_map(autotune=True, autotune_buffers=False)
+    c = self._benchmark_map(autotune=True, autotune_buffers=True)
+    print("autotune parallelism vs no autotuning speedup: {}".format(a / b))
+    print("autotune parallelism and buffer sizes vs no autotuning speedup: {}"
+          .format(a / c))
+
+  def _benchmark_map(self, autotune, autotune_buffers=False):
+    k = 1024 * 1024
+    dataset = dataset_ops.Dataset.from_tensors(
+        (np.random.rand(1, 4 * k), np.random.rand(4 * k, 1))).repeat()
+    dataset = dataset.map(
+        math_ops.matmul, num_parallel_calls=dataset_ops.AUTOTUNE)
+    return self._run_benchmark(
+        dataset,
+        autotune,
+        autotune_buffers,
+        benchmark_iters=10000,
+        benchmark_label="map")
+
   def benchmark_map_and_batch(self):
     a = self._benchmark_map_and_batch(autotune=False)
-    b = self._benchmark_map_and_batch(autotune=True)
-    c = self._benchmark_map_and_batch(
-        autotune=True, algorithm=dataset_ops.AutotuneAlgorithm.GRADIENT_DESCENT)
-    print("HillClimb vs Default speedup: %f" % (a / b))
-    print("GradientDescent vs Default speedup: %f" % (a / c))
+    b = self._benchmark_map_and_batch(autotune=True, autotune_buffers=False)
+    c = self._benchmark_map_and_batch(autotune=True, autotune_buffers=True)
+    print("autotune parallelism vs no autotuning speedup: {}".format(a / b))
+    print("autotune parallelism and buffer sizes vs no autotuning speedup: {}"
+          .format(a / c))
 
-  def _benchmark_map_and_batch(
-      self, autotune, algorithm=dataset_ops.AutotuneAlgorithm.HILL_CLIMB):
+  def _benchmark_map_and_batch(self, autotune, autotune_buffers=False):
     batch_size = 16
     k = 1024 * 1024
-    dataset = dataset_ops.Dataset.from_tensors((np.random.rand(1, 4 * k),
-                                                np.random.rand(4 * k,
-                                                               1))).repeat()
+    dataset = dataset_ops.Dataset.from_tensors(
+        (np.random.rand(1, 4 * k), np.random.rand(4 * k, 1))).repeat()
     dataset = dataset.map(
         math_ops.matmul, num_parallel_calls=dataset_ops.AUTOTUNE)
     dataset = dataset.batch(batch_size=batch_size)
-    options = dataset_ops.Options()
-    options.experimental_optimization.apply_default_optimizations = False
-    options.experimental_optimization.map_and_batch_fusion = True
-    options.experimental_optimization.autotune = autotune
-    if autotune:
-      options.experimental_optimization.autotune_algorithm = algorithm.value
-    dataset = dataset.with_options(options)
-    iterator = dataset_ops.make_one_shot_iterator(dataset)
-    get_next = iterator.get_next()
-
-    deltas = []
-    with session.Session() as sess:
-      for _ in range(5):
-        sess.run(get_next.op)
-      for _ in range(1000):
-        start = time.time()
-        sess.run(get_next.op)
-        end = time.time()
-        deltas.append(end - start)
-
-    self.report_benchmark(
-        iters=1000,
-        wall_time=np.median(deltas),
-        name="map_and_batch" +
-        (("_autotune_%s" % algorithm.name) if autotune else ""))
-    return np.median(deltas)
+    return self._run_benchmark(
+        dataset,
+        autotune,
+        autotune_buffers,
+        benchmark_iters=1000,
+        benchmark_label="map_and_batch")
 
   def benchmark_interleave(self):
     a = self._benchmark_interleave(autotune=False)
-    b = self._benchmark_interleave(autotune=True)
-    c = self._benchmark_interleave(
-        autotune=True, algorithm=dataset_ops.AutotuneAlgorithm.GRADIENT_DESCENT)
-    print("HillClimb vs Default speedup: %f" % (a / b))
-    print("GradientDescent vs Default speedup: %f" % (a / c))
+    b = self._benchmark_interleave(autotune=True, autotune_buffers=False)
+    c = self._benchmark_interleave(autotune=True, autotune_buffers=True)
+    print("autotune parallelism vs no autotuning speedup: {}".format(a / b))
+    print("autotune parallelism and buffer sizes vs no autotuning speedup: {}"
+          .format(a / c))
 
-  def _benchmark_interleave(self,
-                            autotune,
-                            algorithm=dataset_ops.AutotuneAlgorithm.HILL_CLIMB):
+  def _benchmark_interleave(self, autotune, autotune_buffers=False):
     k = 1024 * 1024
-    dataset = dataset_ops.Dataset.from_tensors((np.random.rand(1, 4 * k),
-                                                np.random.rand(4 * k,
-                                                               1))).repeat()
+    dataset = dataset_ops.Dataset.from_tensors(
+        (np.random.rand(1, 4 * k), np.random.rand(4 * k, 1))).repeat()
     dataset = dataset.map(math_ops.matmul)
     dataset = dataset_ops.Dataset.range(1).repeat().interleave(
         lambda _: dataset,
         cycle_length=10,
         num_parallel_calls=dataset_ops.AUTOTUNE)
-    options = dataset_ops.Options()
-    options.experimental_optimization.apply_default_optimizations = False
-    options.experimental_optimization.autotune = autotune
-    if autotune:
-      options.experimental_optimization.autotune_algorithm = algorithm.value
-    dataset = dataset.with_options(options)
-    iterator = dataset_ops.make_one_shot_iterator(dataset)
-    get_next = iterator.get_next()
-
-    deltas = []
-    with session.Session() as sess:
-      for _ in range(5):
-        sess.run(get_next.op)
-      for _ in range(10000):
-        start = time.time()
-        sess.run(get_next.op)
-        end = time.time()
-        deltas.append(end - start)
-
-    self.report_benchmark(
-        iters=10000,
-        wall_time=np.median(deltas),
-        name="interleave" +
-        (("_autotune_%s" % algorithm.name) if autotune else ""))
-    return np.median(deltas)
+    return self._run_benchmark(
+        dataset,
+        autotune,
+        autotune_buffers,
+        benchmark_iters=10000,
+        benchmark_label="interleave")
 
   def benchmark_map_and_interleave(self):
     a = self._benchmark_map_and_interleave(autotune=False)
-    b = self._benchmark_map_and_interleave(autotune=True)
-    c = self._benchmark_map_and_interleave(
-        autotune=True, algorithm=dataset_ops.AutotuneAlgorithm.GRADIENT_DESCENT)
-    print("HillClimb vs Default speedup: %f" % (a / b))
-    print("GradientDescent vs Default speedup: %f" % (a / c))
+    b = self._benchmark_map_and_interleave(
+        autotune=True, autotune_buffers=False)
+    c = self._benchmark_map_and_interleave(autotune=True, autotune_buffers=True)
+    print("autotune parallelism vs no autotuning speedup: {}".format(a / b))
+    print("autotune parallelism and buffer sizes vs no autotuning speedup: {}"
+          .format(a / c))
 
-  def _benchmark_map_and_interleave(
-      self, autotune, algorithm=dataset_ops.AutotuneAlgorithm.HILL_CLIMB):
+  def _benchmark_map_and_interleave(self, autotune, autotune_buffers=False):
     k = 1024 * 1024
     a = (np.random.rand(1, 8 * k), np.random.rand(8 * k, 1))
     b = (np.random.rand(1, 4 * k), np.random.rand(4 * k, 1))
@@ -204,42 +172,26 @@ class AutotuneBenchmark(test.Benchmark):
 
     dataset = dataset_ops.Dataset.zip((dataset, dataset_c))
     dataset = dataset.map(f2, num_parallel_calls=dataset_ops.AUTOTUNE)
-    options = dataset_ops.Options()
-    options.experimental_optimization.apply_default_optimizations = False
-    options.experimental_optimization.autotune = autotune
-    if autotune:
-      options.experimental_optimization.autotune_algorithm = algorithm.value
-    dataset = dataset.with_options(options)
-    iterator = dataset_ops.make_one_shot_iterator(dataset)
-    get_next = iterator.get_next()
-
-    deltas = []
-    with session.Session() as sess:
-      for _ in range(5):
-        sess.run(get_next)
-      for _ in range(10000):
-        start = time.time()
-        sess.run(get_next)
-        end = time.time()
-        deltas.append(end - start)
-
-    self.report_benchmark(
-        iters=10000,
-        wall_time=np.median(deltas),
-        name="map_and_interleave" +
-        (("_autotune_%s" % algorithm.name) if autotune else ""))
-    return np.median(deltas)
+    return self._run_benchmark(
+        dataset,
+        autotune,
+        autotune_buffers,
+        benchmark_iters=10000,
+        benchmark_label="map_and_interleave")
 
   def benchmark_map_batch_and_interleave(self):
     a = self._benchmark_map_batch_and_interleave(autotune=False)
-    b = self._benchmark_map_batch_and_interleave(autotune=True)
+    b = self._benchmark_map_batch_and_interleave(
+        autotune=True, autotune_buffers=False)
     c = self._benchmark_map_batch_and_interleave(
-        autotune=True, algorithm=dataset_ops.AutotuneAlgorithm.GRADIENT_DESCENT)
-    print("HillClimb vs Default speedup: %f" % (a / b))
-    print("GradientDescent vs Default speedup: %f" % (a / c))
+        autotune=True, autotune_buffers=True)
+    print("autotune parallelism vs no autotuning speedup: {}".format(a / b))
+    print("autotune parallelism and buffer sizes vs no autotuning speedup: {}"
+          .format(a / c))
 
-  def _benchmark_map_batch_and_interleave(
-      self, autotune, algorithm=dataset_ops.AutotuneAlgorithm.HILL_CLIMB):
+  def _benchmark_map_batch_and_interleave(self,
+                                          autotune,
+                                          autotune_buffers=False):
     batch_size = 16
     k = 1024 * 1024
     a = (np.random.rand(1, 8 * k), np.random.rand(8 * k, 1))
@@ -268,32 +220,12 @@ class AutotuneBenchmark(test.Benchmark):
         math_ops.matmul, num_parallel_calls=dataset_ops.AUTOTUNE)
     dataset_c = dataset_c.batch(batch_size=batch_size)
     dataset = dataset_ops.Dataset.zip((dataset, dataset_c))
-    options = dataset_ops.Options()
-    options.experimental_optimization.apply_default_optimizations = False
-    options.experimental_optimization.map_and_batch_fusion = True
-    options.experimental_optimization.autotune = autotune
-    if autotune:
-      options.experimental_optimization.autotune_algorithm = algorithm.value
-    dataset = dataset.with_options(options)
-    iterator = dataset_ops.make_one_shot_iterator(dataset)
-    get_next = iterator.get_next()
-
-    deltas = []
-    with session.Session() as sess:
-      for _ in range(5):
-        sess.run(get_next)
-      for _ in range(1000):
-        start = time.time()
-        sess.run(get_next)
-        end = time.time()
-        deltas.append(end - start)
-
-    self.report_benchmark(
-        iters=1000,
-        wall_time=np.median(deltas),
-        name="map_batch_and_interleave" +
-        (("_autotune_%s" % algorithm.name) if autotune else ""))
-    return np.median(deltas)
+    return self._run_benchmark(
+        dataset,
+        autotune,
+        autotune_buffers,
+        benchmark_iters=1000,
+        benchmark_label="map_batch_and_interleave")
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py
index d9c463d744d..d829863b994 100644
--- a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py
@@ -25,11 +25,11 @@ from tensorflow.python.data.experimental.ops import grouping
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import context
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
@@ -73,14 +73,12 @@ def _get_record_shape(sparse):
   return tensor_shape.TensorShape([None])
 
 
-@test_util.run_all_in_graph_and_eager_modes
 class BucketBySequenceLengthTest(test_base.DatasetTestBase,
                                  parameterized.TestCase):
 
-  @parameterized.named_parameters(
-      ("WithoutPadding", True),
-      ("WithPadding", False),
-  )
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         combinations.combine(param_no_padding=[True, False])))
   def testBucketDropReminder(self, param_no_padding):
 
     boundaries = [10, 20, 30]
@@ -201,10 +199,9 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase,
 
     _test_bucket_by_padding(param_no_padding)
 
-  @parameterized.named_parameters(
-      ("WithoutPadding", True),
-      ("WithPadding", False),
-  )
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         combinations.combine(param_no_padding=[True, False])))
   def testBucket(self, param_no_padding):
 
     boundaries = [10, 20, 30]
@@ -347,10 +344,9 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase,
     self.assertAllEqual(batches[4], [[1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
                                      [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])
 
-  @parameterized.named_parameters(
-      ("WithoutPadding", True),
-      ("WithPadding", False),
-  )
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         combinations.combine(param_no_padding=[True, False])))
   def testTupleElements(self, param_no_padding):
 
     def build_dataset(sparse):
@@ -381,10 +377,10 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase,
 
     _test_tuple_elements_by_padding(param_no_padding)
 
-  @parameterized.named_parameters(
-      ("DoDropRemainder", True),
-      ("DoNotDropRemainder", False),
-  )
+  @combinations.generate(
+      combinations.times(
+          test_base.default_test_combinations(),
+          combinations.combine(param_drop_remainder=[True, False])))
   def testBucketSparse(self, param_drop_remainder):  # pylint: disable=g-doc-args
     """Tests bucketing of sparse tensors (case where `no_padding` == True).
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py b/tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py
index 36c61636798..2fa149fcbaa 100644
--- a/tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/copy_to_device_test.py
@@ -17,6 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
+
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.compat import compat
 from tensorflow.python.data.experimental.ops import prefetching_ops
@@ -24,6 +26,7 @@ from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.data.util import structure
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
@@ -35,9 +38,9 @@ from tensorflow.python.util import compat as util_compat
 
 
 # TODO(b/117581999): add eager coverage when supported.
-class CopyToDeviceTest(test_base.DatasetTestBase):
+class CopyToDeviceTest(test_base.DatasetTestBase, parameterized.TestCase):
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testCopyToDevice(self):
     host_dataset = dataset_ops.Dataset.range(10)
     device_dataset = host_dataset.apply(
@@ -62,7 +65,7 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testCopyToDeviceInt32(self):
     host_dataset = dataset_ops.Dataset.from_tensors([0, 1, 2, 3])
     device_dataset = host_dataset.apply(
@@ -86,7 +89,7 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testCopyToSameDevice(self):
     host_dataset = dataset_ops.Dataset.range(10)
     device_dataset = host_dataset.apply(
@@ -111,7 +114,7 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testCopyToDeviceWithPrefetch(self):
     host_dataset = dataset_ops.Dataset.range(10)
     device_dataset = host_dataset.apply(
@@ -136,7 +139,7 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testCopyDictToDevice(self):
     host_dataset = dataset_ops.Dataset.range(10).map(lambda x: {"a": x})
     device_dataset = host_dataset.apply(
@@ -161,7 +164,7 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testCopyDictToDeviceWithPrefetch(self):
     host_dataset = dataset_ops.Dataset.range(10).map(lambda x: {"a": x})
     device_dataset = host_dataset.apply(
@@ -186,7 +189,7 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testCopySparseTensorsToDevice(self):
 
     def make_tensor(i):
@@ -219,7 +222,7 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testCopySparseTensorsToDeviceWithPrefetch(self):
 
     def make_tensor(i):
@@ -252,7 +255,7 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testCopyToDeviceGpu(self):
     if not test_util.is_gpu_available():
       self.skipTest("No GPU available")
@@ -273,7 +276,7 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testCopyToDeviceGpuWithPrefetch(self):
     if not test_util.is_gpu_available():
       self.skipTest("No GPU available")
@@ -294,7 +297,7 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testCopyToDeviceGpuWithMap(self):
     if not test_util.is_gpu_available():
       self.skipTest("No GPU available")
@@ -332,7 +335,7 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testCopyToDeviceGpuInt32(self):
     if not test_util.is_gpu_available():
       self.skipTest("No GPU available")
@@ -352,7 +355,7 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testCopyToDeviceGpuInt32AndPrefetch(self):
     if not test_util.is_gpu_available():
       self.skipTest("No GPU available")
@@ -372,7 +375,7 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testCopyToDeviceGpuStrings(self):
     if not test_util.is_gpu_available():
       self.skipTest("No GPU available")
@@ -392,7 +395,7 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testCopyToDeviceGpuStringsAndPrefetch(self):
     if not test_util.is_gpu_available():
       self.skipTest("No GPU available")
@@ -412,7 +415,7 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testCopyToDevicePingPongCPUGPU(self):
     if not test_util.is_gpu_available():
       self.skipTest("No GPU available")
@@ -436,7 +439,7 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
         with self.assertRaises(errors.OutOfRangeError):
           self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testCopyToDeviceWithReInit(self):
     host_dataset = dataset_ops.Dataset.range(10)
     device_dataset = host_dataset.apply(
@@ -465,7 +468,7 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testCopyToDeviceWithReInitAndPrefetch(self):
     host_dataset = dataset_ops.Dataset.range(10)
     device_dataset = host_dataset.apply(
@@ -494,7 +497,7 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testCopyToDeviceGpuWithReInit(self):
     if not test_util.is_gpu_available():
       self.skipTest("No GPU available")
@@ -518,7 +521,7 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testCopyToDeviceGpuWithReInitAndPrefetch(self):
     if not test_util.is_gpu_available():
       self.skipTest("No GPU available")
@@ -542,7 +545,7 @@ class CopyToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testIteratorGetNextAsOptionalOnGPU(self):
     if not test_util.is_gpu_available():
       self.skipTest("No GPU available")
diff --git a/tensorflow/python/data/experimental/kernel_tests/counter_test.py b/tensorflow/python/data/experimental/kernel_tests/counter_test.py
index 79e4523ea43..455e49aafc7 100644
--- a/tensorflow/python/data/experimental/kernel_tests/counter_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/counter_test.py
@@ -17,35 +17,33 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
+
 from tensorflow.python.data.experimental.ops import counter
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
 
 
-@test_util.run_all_in_graph_and_eager_modes
-class CounterTest(test_base.DatasetTestBase):
+class CounterTest(test_base.DatasetTestBase, parameterized.TestCase):
 
-  def testCounter(self):
+  @combinations.generate(
+      combinations.times(
+          test_base.default_test_combinations(),
+          combinations.combine(start=3, step=4, expected_output=[[3, 7, 11]]) +
+          combinations.combine(start=0, step=-1, expected_output=[[0, -1, -2]]))
+  )
+  def testCounter(self, start, step, expected_output):
     """Test dataset construction using `count`."""
-    dataset = counter.Counter(start=3, step=4)
+    dataset = counter.Counter(start, step)
     self.assertEqual(
         [], dataset_ops.get_legacy_output_shapes(dataset).as_list())
     self.assertEqual(dtypes.int64, dataset_ops.get_legacy_output_types(dataset))
     get_next = self.getNext(dataset)
-
-    negative_dataset = counter.Counter(start=0, step=-1)
-    negative_get_next = self.getNext(negative_dataset)
-
-    self.assertEqual(3, self.evaluate(get_next()))
-    self.assertEqual(3 + 4, self.evaluate(get_next()))
-    self.assertEqual(3 + 2 * 4, self.evaluate(get_next()))
-
-    self.assertEqual(0, self.evaluate(negative_get_next()))
-    self.assertEqual(-1, self.evaluate(negative_get_next()))
-    self.assertEqual(-2, self.evaluate(negative_get_next()))
+    for expected in expected_output:
+      self.assertEqual(expected, self.evaluate(get_next()))
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py
index 4b349ebd811..941ca209848 100644
--- a/tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/csv_dataset_test.py
@@ -22,21 +22,22 @@ import gzip
 import os
 import zlib
 
+from absl.testing import parameterized
+
 from tensorflow.python.data.experimental.ops import error_ops
 from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import readers as core_readers
 from tensorflow.python.eager import context
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.platform import test
 
 
-@test_util.run_all_in_graph_and_eager_modes
-class CsvDatasetTest(test_base.DatasetTestBase):
+class CsvDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def _setup_files(self, inputs, linebreak='\n', compression_type=None):
     filenames = []
@@ -117,26 +118,31 @@ class CsvDatasetTest(test_base.DatasetTestBase):
       dataset = readers.CsvDataset(filenames, **kwargs)
       self._verify_output_or_err(dataset, expected_output, expected_err_re)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_requiredFields(self):
     record_defaults = [[]] * 4
     inputs = [['1,2,3,4']]
     self._test_by_comparison(inputs, record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_int(self):
     record_defaults = [[0]] * 4
     inputs = [['1,2,3,4', '5,6,7,8']]
     self._test_by_comparison(inputs, record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_float(self):
     record_defaults = [[0.0]] * 4
     inputs = [['1.0,2.1,3.2,4.3', '5.4,6.5,7.6,8.7']]
     self._test_by_comparison(inputs, record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_string(self):
     record_defaults = [['']] * 4
     inputs = [['1.0,2.1,hello,4.3', '5.4,6.5,goodbye,8.7']]
     self._test_by_comparison(inputs, record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withEmptyFields(self):
     record_defaults = [[0]] * 4
     inputs = [[',,,', '1,1,1,', ',2,2,2']]
@@ -144,6 +150,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
         inputs, [[0, 0, 0, 0], [1, 1, 1, 0], [0, 2, 2, 2]],
         record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_errWithUnquotedQuotes(self):
     record_defaults = [['']] * 3
     inputs = [['1,2"3,4']]
@@ -152,6 +159,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
         expected_err_re='Unquoted fields cannot have quotes inside',
         record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_errWithUnescapedQuotes(self):
     record_defaults = [['']] * 3
     inputs = [['"a"b","c","d"']]
@@ -161,6 +169,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
         'Quote inside a string has to be escaped by another quote',
         record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_ignoreErrWithUnescapedQuotes(self):
     record_defaults = [['']] * 3
     inputs = [['1,"2"3",4', '1,"2"3",4",5,5', 'a,b,"c"d"', 'e,f,g']]
@@ -169,6 +178,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
     dataset = dataset.apply(error_ops.ignore_errors())
     self._verify_output_or_err(dataset, [['e', 'f', 'g']])
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_ignoreErrWithUnquotedQuotes(self):
     record_defaults = [['']] * 3
     inputs = [['1,2"3,4', 'a,b,c"d', '9,8"7,6,5', 'e,f,g']]
@@ -177,12 +187,14 @@ class CsvDatasetTest(test_base.DatasetTestBase):
     dataset = dataset.apply(error_ops.ignore_errors())
     self._verify_output_or_err(dataset, [['e', 'f', 'g']])
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withNoQuoteDelimAndUnquotedQuotes(self):
     record_defaults = [['']] * 3
     inputs = [['1,2"3,4']]
     self._test_by_comparison(
         inputs, record_defaults=record_defaults, use_quote_delim=False)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_mixedTypes(self):
     record_defaults = [
         constant_op.constant([], dtype=dtypes.int32),
@@ -193,30 +205,35 @@ class CsvDatasetTest(test_base.DatasetTestBase):
     inputs = [['1,2.1,3.2,4.3', '5,6.5,7.6,8.7']]
     self._test_by_comparison(inputs, record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withUseQuoteDelimFalse(self):
     record_defaults = [['']] * 4
     inputs = [['1,2,"3,4"', '"5,6",7,8']]
     self._test_by_comparison(
         inputs, record_defaults=record_defaults, use_quote_delim=False)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withFieldDelim(self):
     record_defaults = [[0]] * 4
     inputs = [['1:2:3:4', '5:6:7:8']]
     self._test_by_comparison(
         inputs, record_defaults=record_defaults, field_delim=':')
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withNaValue(self):
     record_defaults = [[0]] * 4
     inputs = [['1,NA,3,4', 'NA,6,7,8']]
     self._test_by_comparison(
         inputs, record_defaults=record_defaults, na_value='NA')
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withSelectCols(self):
     record_defaults = [['']] * 2
     inputs = [['1,2,3,4', '"5","6","7","8"']]
     self._test_by_comparison(
         inputs, record_defaults=record_defaults, select_cols=[1, 2])
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withSelectColsTooHigh(self):
     record_defaults = [[0]] * 2
     inputs = [['1,2,3,4', '5,6,7,8']]
@@ -226,23 +243,27 @@ class CsvDatasetTest(test_base.DatasetTestBase):
         record_defaults=record_defaults,
         select_cols=[3, 4])
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withOneCol(self):
     record_defaults = [['NA']]
     inputs = [['0', '', '2']]
     self._test_dataset(
         inputs, [['0'], ['NA'], ['2']], record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withMultipleFiles(self):
     record_defaults = [[0]] * 4
     inputs = [['1,2,3,4', '5,6,7,8'], ['5,6,7,8']]
     self._test_by_comparison(inputs, record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withLeadingAndTrailingSpaces(self):
     record_defaults = [[0.0]] * 4
     inputs = [['0, 1, 2, 3']]
     expected = [[0.0, 1.0, 2.0, 3.0]]
     self._test_dataset(inputs, expected, record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_errorWithMissingDefault(self):
     record_defaults = [[]] * 2
     inputs = [['0,']]
@@ -251,6 +272,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
         expected_err_re='Field 1 is required but missing in record!',
         record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_errorWithFewerDefaultsThanFields(self):
     record_defaults = [[0.0]] * 2
     inputs = [['0,1,2,3']]
@@ -259,6 +281,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
         expected_err_re='Expect 2 fields but have more in record',
         record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_errorWithMoreDefaultsThanFields(self):
     record_defaults = [[0.0]] * 5
     inputs = [['0,1,2,3']]
@@ -267,6 +290,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
         expected_err_re='Expect 5 fields but have 4 in record',
         record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withHeader(self):
     record_defaults = [[0]] * 2
     inputs = [['col1,col2', '1,2']]
@@ -278,6 +302,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
         header=True,
     )
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withHeaderAndNoRecords(self):
     record_defaults = [[0]] * 2
     inputs = [['col1,col2']]
@@ -289,6 +314,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
         header=True,
     )
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_errorWithHeaderEmptyFile(self):
     record_defaults = [[0]] * 2
     inputs = [[]]
@@ -300,12 +326,14 @@ class CsvDatasetTest(test_base.DatasetTestBase):
         header=True,
     )
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withEmptyFile(self):
     record_defaults = [['']] * 2
     inputs = [['']]  # Empty file
     self._test_dataset(
         inputs, expected_output=[], record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_errorWithEmptyRecord(self):
     record_defaults = [['']] * 2
     inputs = [['', '1,2']]  # First record is empty
@@ -314,6 +342,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
         expected_err_re='Expect 2 fields but have 1 in record',
         record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withChainedOps(self):
     # Testing that one dataset can create multiple iterators fine.
     # `repeat` creates multiple iterators from the same C++ Dataset.
@@ -325,6 +354,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
         ds_actual.repeat(5).prefetch(1),
         ds_expected.repeat(5).prefetch(1))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withTypeDefaults(self):
     # Testing using dtypes as record_defaults for required fields
     record_defaults = [dtypes.float32, [0.0]]
@@ -335,6 +365,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
         record_defaults=record_defaults,
     )
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMakeCsvDataset_fieldOrder(self):
     data = [[
         '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19',
@@ -352,6 +383,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
 
 ## The following tests exercise parsing logic for quoted fields
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withQuoted(self):
     record_defaults = [['']] * 4
     inputs = [['"a","b","c :)","d"', '"e","f","g :(","h"']]
@@ -363,6 +395,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
     self._test_dataset(
         inputs, [['0'], ['1'], ['2']], record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withNewLine(self):
     # In this case, we expect it to behave differently from
     # TextLineDataset->map(decode_csv) since that flow has bugs
@@ -371,6 +404,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
     expected = [['a', 'b', '"c"\n0', 'd\ne'], ['f', 'g', 'h', 'i']]
     self._test_dataset(inputs, expected, record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withNewLineInUnselectedCol(self):
     record_defaults = [['']]
     inputs = [['1,"2\n3",4', '5,6,7']]
@@ -380,6 +414,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
         record_defaults=record_defaults,
         select_cols=[0])
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withMultipleNewLines(self):
     # In this case, we expect it to behave differently from
     # TextLineDataset->map(decode_csv) since that flow has bugs
@@ -388,6 +423,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
     expected = [['a', 'b\n\nx', '"c"\n \n0', 'd\ne'], ['f', 'g', 'h', 'i']]
     self._test_dataset(inputs, expected, record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_errorWithTerminateMidRecord(self):
     record_defaults = [['']] * 4
     inputs = [['a,b,c,"a']]
@@ -397,6 +433,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
         'Reached end of file without closing quoted field in record',
         record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withEscapedQuotes(self):
     record_defaults = [['']] * 4
     inputs = [['1.0,2.1,"she said: ""hello""",4.3', '5.4,6.5,goodbye,8.7']]
@@ -406,6 +443,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
 ## Testing that parsing works with all buffer sizes, quoted/unquoted fields,
 ## and different types of line breaks
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withInvalidBufferSize(self):
     record_defaults = [['']] * 4
     inputs = [['a,b,c,d']]
@@ -432,6 +470,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
           record_defaults=record_defaults,
           buffer_size=i)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withLF(self):
     record_defaults = [['NA']] * 3
     inputs = [['abc,def,ghi', '0,1,2', ',,']]
@@ -439,6 +478,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
     self._test_dataset_on_buffer_sizes(
         inputs, expected, linebreak='\n', record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withCR(self):
     # Test that when the line separator is '\r', parsing works with all buffer
     # sizes
@@ -448,6 +488,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
     self._test_dataset_on_buffer_sizes(
         inputs, expected, linebreak='\r', record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withCRLF(self):
     # Test that when the line separator is '\r\n', parsing works with all buffer
     # sizes
@@ -457,6 +498,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
     self._test_dataset_on_buffer_sizes(
         inputs, expected, linebreak='\r\n', record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withBufferSizeAndQuoted(self):
     record_defaults = [['NA']] * 3
     inputs = [['"\n\n\n","\r\r\r","abc"', '"0","1","2"', '"","",""']]
@@ -465,6 +507,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
     self._test_dataset_on_buffer_sizes(
         inputs, expected, linebreak='\n', record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withCRAndQuoted(self):
     # Test that when the line separator is '\r', parsing works with all buffer
     # sizes
@@ -475,6 +518,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
     self._test_dataset_on_buffer_sizes(
         inputs, expected, linebreak='\r', record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withCRLFAndQuoted(self):
     # Test that when the line separator is '\r\n', parsing works with all buffer
     # sizes
@@ -485,6 +529,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
     self._test_dataset_on_buffer_sizes(
         inputs, expected, linebreak='\r\n', record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withGzipCompressionType(self):
     record_defaults = [['NA']] * 3
     inputs = [['"\n\n\n","\r\r\r","abc"', '"0","1","2"', '"","",""']]
@@ -497,6 +542,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
         compression_type='GZIP',
         record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withZlibCompressionType(self):
     record_defaults = [['NA']] * 3
     inputs = [['"\n\n\n","\r\r\r","abc"', '"0","1","2"', '"","",""']]
@@ -509,6 +555,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
         compression_type='ZLIB',
         record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_withScalarDefaults(self):
     record_defaults = [constant_op.constant(0, dtype=dtypes.int64)] * 4
     inputs = [[',,,', '1,1,1,', ',2,2,2']]
@@ -516,6 +563,7 @@ class CsvDatasetTest(test_base.DatasetTestBase):
         inputs, [[0, 0, 0, 0], [1, 1, 1, 0], [0, 2, 2, 2]],
         record_defaults=record_defaults)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCsvDataset_with2DDefaults(self):
     record_defaults = [constant_op.constant([[0]], dtype=dtypes.int64)] * 4
     inputs = [[',,,', '1,1,1,', ',2,2,2']]
diff --git a/tensorflow/python/data/experimental/kernel_tests/dense_to_sparse_batch_test.py b/tensorflow/python/data/experimental/kernel_tests/dense_to_sparse_batch_test.py
index cca7ae073ee..5dd1bb0532c 100644
--- a/tensorflow/python/data/experimental/kernel_tests/dense_to_sparse_batch_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/dense_to_sparse_batch_test.py
@@ -17,20 +17,21 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import errors
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-@test_util.run_all_in_graph_and_eager_modes
-class DenseToSparseBatchTest(test_base.DatasetTestBase):
+class DenseToSparseBatchTest(test_base.DatasetTestBase, parameterized.TestCase):
 
+  @combinations.generate(test_base.default_test_combinations())
   def testDenseToSparseBatchDataset(self):
     components = np.random.randint(12, size=(100,)).astype(np.int32)
     dataset = dataset_ops.Dataset.from_tensor_slices(
@@ -53,6 +54,7 @@ class DenseToSparseBatchTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(get_next())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testDenseToSparseBatchDatasetWithUnknownShape(self):
     components = np.random.randint(5, size=(40,)).astype(np.int32)
     dataset = dataset_ops.Dataset.from_tensor_slices(
@@ -80,12 +82,14 @@ class DenseToSparseBatchTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(get_next())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testDenseToSparseBatchDatasetWithInvalidShape(self):
     input_tensor = array_ops.constant([[1]])
     with self.assertRaisesRegexp(ValueError, "Dimension -2 must be >= 0"):
       dataset_ops.Dataset.from_tensors(input_tensor).apply(
           batching.dense_to_sparse_batch(4, [-2]))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testDenseToSparseBatchDatasetShapeErrors(self):
 
     def dataset_fn(input_tensor):
diff --git a/tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py
index 4a8c7d1ccc6..fc18afaa842 100644
--- a/tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/directed_interleave_dataset_test.py
@@ -17,22 +17,24 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.experimental.ops import interleave_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import random_seed
-from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
 
 
-@test_util.run_all_in_graph_and_eager_modes
-class DirectedInterleaveDatasetTest(test_base.DatasetTestBase):
+class DirectedInterleaveDatasetTest(test_base.DatasetTestBase,
+                                    parameterized.TestCase):
 
+  @combinations.generate(test_base.default_test_combinations())
   def testBasic(self):
     selector_dataset = dataset_ops.Dataset.range(10).repeat(100)
     input_datasets = [
@@ -76,6 +78,7 @@ class DirectedInterleaveDatasetTest(test_base.DatasetTestBase):
 
     return freqs
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSampleFromDatasets(self):
     random_seed.set_random_seed(1619)
     num_samples = 5000
@@ -95,6 +98,7 @@ class DirectedInterleaveDatasetTest(test_base.DatasetTestBase):
       freqs = self._testSampleFromDatasetsHelper(probs_ds, classes, num_samples)
       self.assertLess(self._chi2(probs, freqs / num_samples), 1e-2)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSelectFromDatasets(self):
     words = [b"foo", b"bar", b"baz"]
     datasets = [dataset_ops.Dataset.from_tensors(w).repeat() for w in words]
@@ -107,6 +111,7 @@ class DirectedInterleaveDatasetTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testErrors(self):
     with self.assertRaisesRegexp(ValueError,
                                  r"vector of length `len\(datasets\)`"):
diff --git a/tensorflow/python/data/experimental/kernel_tests/get_single_element_test.py b/tensorflow/python/data/experimental/kernel_tests/get_single_element_test.py
index f65740c5651..59c2ef68d99 100644
--- a/tensorflow/python/data/experimental/kernel_tests/get_single_element_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/get_single_element_test.py
@@ -23,25 +23,30 @@ from tensorflow.python.data.experimental.ops import get_single_element
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import function
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
-@test_util.run_all_in_graph_and_eager_modes
 class GetSingleElementTest(test_base.DatasetTestBase, parameterized.TestCase):
 
-  @parameterized.named_parameters(
-      ("Zero", 0, 1),
-      ("Five", 5, 1),
-      ("Ten", 10, 1),
-      ("Empty", 100, 1, errors.InvalidArgumentError, "Dataset was empty."),
-      ("MoreThanOne", 0, 2, errors.InvalidArgumentError,
-       "Dataset had more than one element."),
-  )
+  @combinations.generate(
+      combinations.times(
+          test_base.default_test_combinations(),
+          combinations.combine(
+              skip=[0, 5, 10], take=[1], error=[None], error_msg=[None]) +
+          combinations.combine(
+              skip=[100],
+              take=[1],
+              error=[errors.InvalidArgumentError],
+              error_msg=["Dataset was empty."]) + combinations.combine(
+                  skip=[0],
+                  take=[2],
+                  error=[errors.InvalidArgumentError],
+                  error_msg=["Dataset had more than one element."])))
   def testGetSingleElement(self, skip, take, error=None, error_msg=None):
 
     def make_sparse(x):
@@ -62,6 +67,7 @@ class GetSingleElementTest(test_base.DatasetTestBase, parameterized.TestCase):
       with self.assertRaisesRegexp(error, error_msg):
         self.evaluate(get_single_element.get_single_element(dataset))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testWindow(self):
     """Test that `get_single_element()` can consume a nested dataset."""
     def flat_map_func(ds):
@@ -73,6 +79,7 @@ class GetSingleElementTest(test_base.DatasetTestBase, parameterized.TestCase):
     self.assertDatasetProduces(
         dataset, [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]])
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSideEffect(self):
     counter_var = variables.Variable(0)
 
@@ -92,6 +99,7 @@ class GetSingleElementTest(test_base.DatasetTestBase, parameterized.TestCase):
     self.assertEqual(self.evaluate(fn()), b"hello")
     self.assertEqual(self.evaluate(counter_var), 1)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testAutomaticControlDependencies(self):
     counter_var = variables.Variable(1)
 
diff --git a/tensorflow/python/data/experimental/kernel_tests/group_by_reducer_test.py b/tensorflow/python/data/experimental/kernel_tests/group_by_reducer_test.py
index 0e9042b2ef8..bf823143d57 100644
--- a/tensorflow/python/data/experimental/kernel_tests/group_by_reducer_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/group_by_reducer_test.py
@@ -17,25 +17,26 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.experimental.ops import grouping
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
-@test_util.run_all_in_graph_and_eager_modes
-class GroupByReducerTest(test_base.DatasetTestBase):
+class GroupByReducerTest(test_base.DatasetTestBase, parameterized.TestCase):
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSum(self):
     reducer = grouping.Reducer(
         init_func=lambda _: np.int64(0),
@@ -49,6 +50,7 @@ class GroupByReducerTest(test_base.DatasetTestBase):
           expected_shapes=tensor_shape.TensorShape([]),
           expected_output=[(i - 1) * i, i * i])
 
+  @combinations.generate(test_base.default_test_combinations())
   def testAverage(self):
 
     def reduce_fn(x, y):
@@ -68,6 +70,7 @@ class GroupByReducerTest(test_base.DatasetTestBase):
           expected_shapes=tensor_shape.TensorShape([]),
           expected_output=[i - 1, i])
 
+  @combinations.generate(test_base.default_test_combinations())
   def testConcat(self):
     components = np.array(list("abcdefghijklmnopqrst")).view(np.chararray)
     reducer = grouping.Reducer(
@@ -84,6 +87,7 @@ class GroupByReducerTest(test_base.DatasetTestBase):
           expected_shapes=tensor_shape.TensorShape([]),
           expected_output=[b"acegikmoqs"[:i], b"bdfhjlnprt"[:i]])
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSparseSum(self):
     def _sparse(i):
       return sparse_tensor.SparseTensorValue(
@@ -103,6 +107,7 @@ class GroupByReducerTest(test_base.DatasetTestBase):
           expected_shapes=tensor_shape.TensorShape([]),
           expected_output=[(i - 1) * i, i * i])
 
+  @combinations.generate(test_base.default_test_combinations())
   def testChangingStateShape(self):
 
     def reduce_fn(x, _):
@@ -130,6 +135,7 @@ class GroupByReducerTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(get_next())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testTypeMismatch(self):
     reducer = grouping.Reducer(
         init_func=lambda x: constant_op.constant(1, dtype=dtypes.int32),
@@ -144,6 +150,7 @@ class GroupByReducerTest(test_base.DatasetTestBase):
           grouping.group_by_reducer(lambda _: np.int64(0), reducer))
 
   # TODO(b/78665031): Remove once non-scalar keys are supported.
+  @combinations.generate(test_base.default_test_combinations())
   def testInvalidKeyShape(self):
     reducer = grouping.Reducer(
         init_func=lambda x: np.int64(0),
@@ -157,6 +164,7 @@ class GroupByReducerTest(test_base.DatasetTestBase):
           grouping.group_by_reducer(lambda _: np.int64((0, 0)), reducer))
 
   # TODO(b/78665031): Remove once non-int64 keys are supported.
+  @combinations.generate(test_base.default_test_combinations())
   def testInvalidKeyType(self):
     reducer = grouping.Reducer(
         init_func=lambda x: np.int64(0),
@@ -169,6 +177,7 @@ class GroupByReducerTest(test_base.DatasetTestBase):
       dataset.apply(
           grouping.group_by_reducer(lambda _: "wrong", reducer))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testTuple(self):
     def init_fn(_):
       return np.array([], dtype=np.int64), np.int64(0)
diff --git a/tensorflow/python/data/experimental/kernel_tests/group_by_window_test.py b/tensorflow/python/data/experimental/kernel_tests/group_by_window_test.py
index e529364e509..2495083cf63 100644
--- a/tensorflow/python/data/experimental/kernel_tests/group_by_window_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/group_by_window_test.py
@@ -17,17 +17,18 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.experimental.ops import grouping
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import string_ops
@@ -37,8 +38,7 @@ from tensorflow.python.platform import test
 # NOTE(mrry): These tests are based on the tests in bucket_ops_test.py.
 # Currently, they use a constant batch size, though should be made to use a
 # different batch size per key.
-@test_util.run_all_in_graph_and_eager_modes
-class GroupByWindowTest(test_base.DatasetTestBase):
+class GroupByWindowTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def _dynamicPad(self, bucket, window, window_size):
     # TODO(mrry): To match `tf.contrib.training.bucket()`, implement a
@@ -51,6 +51,7 @@ class GroupByWindowTest(test_base.DatasetTestBase):
              32, (tensor_shape.TensorShape([]), tensor_shape.TensorShape(
                  [None]), tensor_shape.TensorShape([3])))))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSingleBucket(self):
 
     def _map_fn(v):
@@ -80,6 +81,7 @@ class GroupByWindowTest(test_base.DatasetTestBase):
     self.assertAllEqual(expected_unk_int64, bucketed_values[1])
     self.assertAllEqual(expected_vec3_str, bucketed_values[2])
 
+  @combinations.generate(test_base.default_test_combinations())
   def testEvenOddBuckets(self):
 
     def _map_fn(v):
@@ -132,6 +134,7 @@ class GroupByWindowTest(test_base.DatasetTestBase):
     self.assertAllEqual(expected_unk_int64, bucketed_values_odd[1])
     self.assertAllEqual(expected_vec3_str, bucketed_values_odd[2])
 
+  @combinations.generate(test_base.default_test_combinations())
   def testEvenOddBucketsFilterOutAllOdd(self):
 
     def _map_fn(v):
@@ -173,6 +176,7 @@ class GroupByWindowTest(test_base.DatasetTestBase):
     self.assertAllEqual(
         np.arange(64, 128, 2, dtype=np.int64), bucketed_values_even1["x"])
 
+  @combinations.generate(test_base.default_test_combinations())
   def testDynamicWindowSize(self):
     components = np.arange(100).astype(np.int64)
 
@@ -202,6 +206,7 @@ class GroupByWindowTest(test_base.DatasetTestBase):
 
     self.assertEqual(batches, 15)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSimple(self):
     components = np.random.randint(100, size=(200,)).astype(np.int64)
     dataset = dataset_ops.Dataset.from_tensor_slices(
@@ -222,6 +227,7 @@ class GroupByWindowTest(test_base.DatasetTestBase):
     self.assertGreaterEqual(num_full_batches, 24)
     self.assertTrue(all(c == 4 for c in counts[:num_full_batches]))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testImmediateOutput(self):
     components = np.array(
         [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 2, 2, 0, 0], dtype=np.int64)
@@ -240,6 +246,7 @@ class GroupByWindowTest(test_base.DatasetTestBase):
       self.assertAllEqual([2, 2, 2, 2], self.evaluate(get_next()))
       self.assertAllEqual([0, 0, 0, 0], self.evaluate(get_next()))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSmallGroups(self):
     components = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0], dtype=np.int64)
     dataset = dataset_ops.Dataset.from_tensor_slices(components).apply(
@@ -252,6 +259,7 @@ class GroupByWindowTest(test_base.DatasetTestBase):
     self.assertAllEqual([0, 0, 0], self.evaluate(get_next()))
     self.assertAllEqual([1], self.evaluate(get_next()))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testEmpty(self):
     dataset = dataset_ops.Dataset.range(4).apply(
         grouping.group_by_window(lambda _: 0, lambda _, xs: xs, 0))
@@ -262,6 +270,7 @@ class GroupByWindowTest(test_base.DatasetTestBase):
         "Window size must be greater than zero, but got 0."):
       print(self.evaluate(get_next()))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testReduceFuncError(self):
     components = np.random.randint(100, size=(200,)).astype(np.int64)
 
@@ -280,6 +289,7 @@ class GroupByWindowTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.InvalidArgumentError):
       self.evaluate(get_next())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testConsumeWindowDatasetMoreThanOnce(self):
     components = np.random.randint(50, size=(200,)).astype(np.int64)
 
@@ -311,6 +321,7 @@ class GroupByWindowTest(test_base.DatasetTestBase):
         counts.append(tight_result.shape[0])
     self.assertEqual(len(components), sum(counts))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testShortCircuit(self):
 
     dataset = dataset_ops.Dataset.range(10)
diff --git a/tensorflow/python/data/experimental/kernel_tests/ignore_errors_test.py b/tensorflow/python/data/experimental/kernel_tests/ignore_errors_test.py
index c37439f328b..5ed72767425 100644
--- a/tensorflow/python/data/experimental/kernel_tests/ignore_errors_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/ignore_errors_test.py
@@ -19,14 +19,15 @@ from __future__ import print_function
 
 import os
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.experimental.ops import error_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import readers
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import errors
-from tensorflow.python.framework import test_util
 from tensorflow.python.lib.io import python_io
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import io_ops
@@ -36,9 +37,9 @@ from tensorflow.python.util import compat
 _NUMPY_RANDOM_SEED = 42
 
 
-@test_util.run_all_in_graph_and_eager_modes
-class IgnoreErrorsTest(test_base.DatasetTestBase):
+class IgnoreErrorsTest(test_base.DatasetTestBase, parameterized.TestCase):
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMapIgnoreError(self):
     components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
 
@@ -53,6 +54,7 @@ class IgnoreErrorsTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(get_next())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testParallelMapIgnoreError(self):
     components = np.array([1., 2., 3., np.nan, 5.]).astype(np.float32)
 
@@ -67,6 +69,7 @@ class IgnoreErrorsTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(get_next())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testReadFileIgnoreError(self):
 
     def write_string_to_file(value, filename):
@@ -102,6 +105,7 @@ class IgnoreErrorsTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(get_next())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testTFRecordDatasetIgnoreError(self):
     filenames = []
     for i in range(5):
@@ -126,6 +130,7 @@ class IgnoreErrorsTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(get_next())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testZipIgnoreError(self):
     a = dataset_ops.Dataset.from_tensor_slices([1., 2., 0., 4.])
     b = a.map(lambda x: array_ops.check_numerics(1. / x, "error"))
diff --git a/tensorflow/python/data/experimental/kernel_tests/make_batched_features_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/make_batched_features_dataset_test.py
index 2ddff457bc4..980fd03b073 100644
--- a/tensorflow/python/data/experimental/kernel_tests/make_batched_features_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/make_batched_features_dataset_test.py
@@ -17,26 +17,29 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
 from tensorflow.python.data.experimental.ops import readers
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import readers as core_readers
 from tensorflow.python.data.util import nest
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import io_ops
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.platform import test
 
 
-@test_util.run_all_in_graph_and_eager_modes
 class MakeBatchedFeaturesDatasetTest(
-    reader_dataset_ops_test_base.MakeBatchedFeaturesDatasetTestBase):
+    reader_dataset_ops_test_base.MakeBatchedFeaturesDatasetTestBase,
+    parameterized.TestCase):
 
+  @combinations.generate(test_base.default_test_combinations())
   def testRead(self):
     for batch_size in [1, 2]:
       for num_epochs in [1, 10]:
@@ -85,6 +88,7 @@ class MakeBatchedFeaturesDatasetTest(
         with self.assertRaises(errors.OutOfRangeError):
           self._next_actual_batch()
 
+  @combinations.generate(test_base.default_test_combinations())
   def testReadWithEquivalentDataset(self):
     features = {
         "file": parsing_ops.FixedLenFeature([], dtypes.int64),
@@ -103,6 +107,7 @@ class MakeBatchedFeaturesDatasetTest(
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testReadWithFusedShuffleRepeatDataset(self):
     num_epochs = 5
     total_records = num_epochs * self._num_records
@@ -151,6 +156,7 @@ class MakeBatchedFeaturesDatasetTest(
           all_equal = all_equal and np.array_equal(batch1[i], batch2[i])
       self.assertFalse(all_equal)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testParallelReadersAndParsers(self):
     num_epochs = 5
     for batch_size in [1, 2]:
@@ -186,6 +192,7 @@ class MakeBatchedFeaturesDatasetTest(
           with self.assertRaises(errors.OutOfRangeError):
             self._next_actual_batch()
 
+  @combinations.generate(test_base.default_test_combinations())
   def testDropFinalBatch(self):
     for batch_size in [1, 2]:
       for num_epochs in [1, 10]:
@@ -201,6 +208,7 @@ class MakeBatchedFeaturesDatasetTest(
             if isinstance(tensor, ops.Tensor):  # Guard against SparseTensor.
               self.assertEqual(tensor.shape[0], batch_size)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testIndefiniteRepeatShapeInference(self):
     dataset = self.make_batch_feature(
         filenames=self.test_filenames[0],
@@ -213,6 +221,7 @@ class MakeBatchedFeaturesDatasetTest(
       if issubclass(clazz, ops.Tensor):
         self.assertEqual(32, shape[0])
 
+  @combinations.generate(test_base.default_test_combinations())
   def testOldStyleReader(self):
     with self.assertRaisesRegexp(
         TypeError, r"The `reader` argument must return a `Dataset` object. "
diff --git a/tensorflow/python/data/experimental/kernel_tests/make_csv_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/make_csv_dataset_test.py
index 16c323b3790..5f8382f43c4 100644
--- a/tensorflow/python/data/experimental/kernel_tests/make_csv_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/make_csv_dataset_test.py
@@ -21,21 +21,21 @@ import gzip
 import os
 import zlib
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.experimental.ops import readers
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
-from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
 
 
-@test_util.run_all_in_graph_and_eager_modes
-class MakeCsvDatasetTest(test_base.DatasetTestBase):
+class MakeCsvDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def _make_csv_dataset(self, filenames, batch_size, num_epochs=1, **kwargs):
     return readers.make_csv_dataset(
@@ -126,6 +126,7 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
     self._verify_output(dataset, batch_size, num_epochs, label_name,
                         expected_output, expected_keys)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMakeCSVDataset(self):
     """Tests making a CSV dataset with keys and defaults provided."""
     record_defaults = [
@@ -157,6 +158,7 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
         column_defaults=record_defaults,
     )
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMakeCSVDataset_withBatchSizeAndEpochs(self):
     """Tests making a CSV dataset with keys and defaults provided."""
     record_defaults = [
@@ -188,6 +190,7 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
         column_defaults=record_defaults,
     )
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMakeCSVDataset_withCompressionType(self):
     """Tests `compression_type` argument."""
     record_defaults = [
@@ -221,6 +224,7 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
           compression_type=compression_type,
       )
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMakeCSVDataset_withCompressionTypeAndNoColumnNames(self):
     """Tests `compression_type` argument."""
     record_defaults = [
@@ -269,6 +273,7 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
           compression_type="ZLIB",
       )
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMakeCSVDataset_withBadInputs(self):
     """Tests that exception is raised when input is malformed.
     """
@@ -304,6 +309,7 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
           label_name="not_a_real_label",
           column_names=column_names)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMakeCSVDataset_withNoLabel(self):
     """Tests making a CSV dataset with no label provided."""
     record_defaults = [
@@ -333,6 +339,7 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
         column_defaults=record_defaults,
     )
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMakeCSVDataset_withNoHeader(self):
     """Tests that datasets can be created from CSV files with no header line.
     """
@@ -363,6 +370,7 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
         column_defaults=record_defaults,
     )
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMakeCSVDataset_withTypes(self):
     """Tests that defaults can be a dtype instead of a Tensor for required vals.
     """
@@ -394,6 +402,7 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
         column_defaults=record_defaults,
     )
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMakeCSVDataset_withNoColNames(self):
     """Tests that datasets can be created when column names are not specified.
 
@@ -427,6 +436,7 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
         column_defaults=record_defaults,
     )
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMakeCSVDataset_withTypeInferenceMismatch(self):
     # Test that error is thrown when num fields doesn't match columns
     column_names = ["col%d" % i for i in range(5)]
@@ -442,6 +452,7 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
           batch_size=2,
           num_epochs=10)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMakeCSVDataset_withTypeInference(self):
     """Tests that datasets can be created when no defaults are specified.
 
@@ -468,6 +479,7 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
         header=True,
     )
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMakeCSVDataset_withTypeInferenceFallthrough(self):
     """Tests that datasets can be created when no defaults are specified.
 
@@ -498,6 +510,7 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
         header=True,
     )
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMakeCSVDataset_withNAValuesAndFieldDelim(self):
     """Tests that datasets can be created from different delim and na_value."""
     column_names = ["col%d" % i for i in range(5)]
@@ -520,6 +533,7 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
         field_delim=" ",
     )
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMakeCSVDataset_withSelectCols(self):
     record_defaults = [
         constant_op.constant([], dtypes.int32),
@@ -588,6 +602,7 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
         select_columns=[column_names[i] for i in select_cols],
     )
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMakeCSVDataset_withSelectColsError(self):
     record_defaults = [
         constant_op.constant([], dtypes.int32),
@@ -626,6 +641,7 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
           label_name=None,
           select_columns=["invalid_col_name"])
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMakeCSVDataset_withShuffle(self):
     record_defaults = [
         constant_op.constant([], dtypes.int32),
@@ -710,6 +726,7 @@ class MakeCsvDatasetTest(test_base.DatasetTestBase):
           all_equal = all_equal and np.array_equal(batch1[i], batch2[i])
       self.assertFalse(all_equal)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testIndefiniteRepeatShapeInference(self):
     column_names = ["col%d" % i for i in range(5)]
     inputs = [[",".join(x for x in column_names), "0,1,2,3,4", "5,6,7,8,9"], [
diff --git a/tensorflow/python/data/experimental/kernel_tests/make_tf_record_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/make_tf_record_dataset_test.py
index ec1760398fa..a67ccd92842 100644
--- a/tensorflow/python/data/experimental/kernel_tests/make_tf_record_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/make_tf_record_dataset_test.py
@@ -17,19 +17,22 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
+
 from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
 from tensorflow.python.data.experimental.ops import readers
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import errors
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import string_ops
 from tensorflow.python.platform import test
 
 
-@test_util.run_all_in_graph_and_eager_modes
 class MakeTFRecordDatasetTest(
-    reader_dataset_ops_test_base.TFRecordDatasetTestBase):
+    reader_dataset_ops_test_base.TFRecordDatasetTestBase,
+    parameterized.TestCase):
 
   def _read_test(self, batch_size, num_epochs, file_index=None,
                  num_parallel_reads=1, drop_final_batch=False, parser_fn=False):
@@ -63,6 +66,7 @@ class MakeTFRecordDatasetTest(
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(outputs())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testRead(self):
     for batch_size in [1, 2]:
       for num_epochs in [1, 3]:
@@ -78,6 +82,7 @@ class MakeTFRecordDatasetTest(
         # Basic test: read from both files, with parallel reads.
         self._read_test(batch_size, num_epochs, num_parallel_reads=8)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testDropFinalBatch(self):
     for batch_size in [1, 2, 10]:
       for num_epochs in [1, 3]:
@@ -91,6 +96,7 @@ class MakeTFRecordDatasetTest(
         self._read_test(batch_size, num_epochs, num_parallel_reads=8,
                         drop_final_batch=True)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testParserFn(self):
     for batch_size in [1, 2]:
       for num_epochs in [1, 3]:
@@ -145,6 +151,7 @@ class MakeTFRecordDatasetTest(
         actual.extend(b)
       self.assertAllEqual(sorted(expected), sorted(actual))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testShuffle(self):
     for batch_size in [1, 2]:
       for num_epochs in [1, 3]:
@@ -156,6 +163,7 @@ class MakeTFRecordDatasetTest(
           self._shuffle_test(batch_size, num_epochs, num_parallel_reads,
                              seed=21345)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testIndefiniteRepeatShapeInference(self):
     dataset = readers.make_tf_record_dataset(
         file_pattern=self.test_filenames, num_epochs=None, batch_size=32)
diff --git a/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py b/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
index a42ce40fb29..a2cc54d104e 100644
--- a/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/map_defun_op_test.py
@@ -19,17 +19,19 @@ from __future__ import print_function
 
 import time
 
+from absl.testing import parameterized
+
 from tensorflow.python.client import session
 from tensorflow.python.data.experimental.ops import map_defun
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.eager import function
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_spec
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import data_flow_ops
@@ -38,9 +40,11 @@ from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import test
 
 
-@test_util.run_v1_only("b/123903858: Add eager and V2 test coverage")
-class MapDefunTest(test_base.DatasetTestBase):
+# TODO(b/123903858): Add eager and V2 test coverage
+class MapDefunTest(test_base.DatasetTestBase, parameterized.TestCase):
 
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testNoIntraOpLimit(self):
 
     @function.defun(input_signature=[tensor_spec.TensorSpec([2], dtypes.int32)])
@@ -55,6 +59,8 @@ class MapDefunTest(test_base.DatasetTestBase):
     expected = elems * 2 + 3
     self.assertAllEqual(self.evaluate(r), self.evaluate(expected))
 
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testMapDefunSimple(self):
 
     @function.defun(input_signature=[tensor_spec.TensorSpec([2], dtypes.int32)])
@@ -67,6 +73,8 @@ class MapDefunTest(test_base.DatasetTestBase):
     expected = elems * 2 + 3
     self.assertAllEqual(self.evaluate(r), self.evaluate(expected))
 
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testMapDefunMismatchedTypes(self):
 
     @function.defun(input_signature=[tensor_spec.TensorSpec([], dtypes.int32)])
@@ -79,6 +87,8 @@ class MapDefunTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.InvalidArgumentError):
       self.evaluate(r)
 
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testMapDefunReduceDim(self):
     # Tests where the output has a different rank from the input
 
@@ -92,6 +102,8 @@ class MapDefunTest(test_base.DatasetTestBase):
     expected = constant_op.constant([1, 3, 5])
     self.assertAllEqual(self.evaluate(r), self.evaluate(expected))
 
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testMapDefunMultipleOutputs(self):
 
     @function.defun(input_signature=[tensor_spec.TensorSpec([2], dtypes.int32)])
@@ -105,6 +117,8 @@ class MapDefunTest(test_base.DatasetTestBase):
     expected = [elems, elems * 2 + 3]
     self.assertAllEqual(self.evaluate(r), self.evaluate(expected))
 
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testMapDefunShapeInference(self):
 
     @function.defun(input_signature=[tensor_spec.TensorSpec([2], dtypes.int32)])
@@ -116,6 +130,8 @@ class MapDefunTest(test_base.DatasetTestBase):
     result = map_defun.map_defun(fn, [elems], [dtypes.int32], [(2,)])[0]
     self.assertEqual(result.get_shape(), (3, 2))
 
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testMapDefunPartialShapeInference(self):
 
     @function.defun(input_signature=[tensor_spec.TensorSpec([2], dtypes.int32)])
@@ -126,6 +142,8 @@ class MapDefunTest(test_base.DatasetTestBase):
     result = map_defun.map_defun(fn, [elems], [dtypes.int32], [(2,)])
     self.assertEqual(result[0].get_shape().as_list(), [None, 2])
 
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testMapDefunRaisesErrorOnRuntimeShapeMismatch(self):
 
     @function.defun(input_signature=[
@@ -145,6 +163,8 @@ class MapDefunTest(test_base.DatasetTestBase):
           "All inputs must have the same dimension 0."):
         sess.run(result, feed_dict={elems1: [1, 2, 3, 4, 5], elems2: [1, 2, 3]})
 
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testMapDefunRaisesDefunError(self):
 
     @function.defun(input_signature=[tensor_spec.TensorSpec([], dtypes.int32)])
@@ -157,6 +177,8 @@ class MapDefunTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.InvalidArgumentError):
       self.evaluate(result)
 
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testMapDefunCancelledCorrectly(self):
 
     @function.defun(input_signature=[tensor_spec.TensorSpec([5], dtypes.int64)])
@@ -173,6 +195,8 @@ class MapDefunTest(test_base.DatasetTestBase):
                                  r"indices = 10 is not in \[0, 5\)"):
       self.evaluate(map_defun_op)
 
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testMapDefunWithUnspecifiedOutputShape(self):
 
     @function.defun(input_signature=[tensor_spec.TensorSpec([2], dtypes.int32)])
@@ -190,6 +214,8 @@ class MapDefunTest(test_base.DatasetTestBase):
     self.assertAllEqual(self.evaluate(r[1]), self.evaluate(expected + 1))
     self.assertAllEqual(self.evaluate(r[2]), self.evaluate(expected + 2))
 
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testMapDefunWithDifferentOutputShapeEachRun(self):
 
     @function.defun(
@@ -204,6 +230,8 @@ class MapDefunTest(test_base.DatasetTestBase):
       self.assertAllEqual(
           sess.run(r, feed_dict={elems: [[0], [1]]}), [[3], [5]])
 
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testMapDefunWithWrongOutputShape(self):
 
     @function.defun(input_signature=[tensor_spec.TensorSpec([2], dtypes.int32)])
@@ -216,6 +244,8 @@ class MapDefunTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.InvalidArgumentError):
       self.evaluate(r)
 
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testMapDefunWithInvalidInput(self):
 
     @function.defun(
@@ -233,6 +263,8 @@ class MapDefunTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.InvalidArgumentError):
         sess.run(r, feed_dict={p: 0})
 
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testMapDefunWithParentCancellation(self):
     # Checks that a cancellation of the parent graph is threaded through to
     # MapDefunOp correctly.
@@ -254,6 +286,8 @@ class MapDefunTest(test_base.DatasetTestBase):
       sess.close()
       thread.join()
 
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testMapDefunWithCapturedInputs(self):
     c = constant_op.constant(2)
 
@@ -266,6 +300,8 @@ class MapDefunTest(test_base.DatasetTestBase):
     expected = x + c
     self.assertAllEqual(self.evaluate(expected), self.evaluate(map_defun_op))
 
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testMapDefunWithVariantTensor(self):
 
     @function.defun(
@@ -288,6 +324,8 @@ class MapDefunTest(test_base.DatasetTestBase):
     actual = self.evaluate(deserialized)
     self.assertValuesEqual(expected, actual)
 
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testMapDefunWithVariantTensorAsCaptured(self):
 
     st = sparse_tensor.SparseTensor(
@@ -309,6 +347,8 @@ class MapDefunTest(test_base.DatasetTestBase):
     actual = self.evaluate(deserialized)
     self.assertValuesEqual(expected, actual)
 
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testMapDefunWithStrTensor(self):
 
     @function.defun(input_signature=[tensor_spec.TensorSpec([], dtypes.string)])
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/filter_fusion_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/filter_fusion_test.py
index 949f9e2e25c..1df52da395c 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/filter_fusion_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/filter_fusion_test.py
@@ -17,6 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
+
 from absl.testing import parameterized
 
 from tensorflow.python.data.experimental.ops import testing
@@ -29,12 +31,42 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
+def _test_combinations():
+  cases = []
+
+  take_all = lambda x: constant_op.constant(True)
+  is_zero = lambda x: math_ops.equal(x, 0)
+  greater = lambda x: math_ops.greater(x + 5, 0)
+  predicates = [take_all, is_zero, greater]
+  for i, x in enumerate(predicates):
+    for j, y in enumerate(predicates):
+      cases.append((lambda x: x, "Scalar{}{}".format(i, j), [x, y]))
+      for k, z in enumerate(predicates):
+        cases.append((lambda x: x, "Scalar{}{}{}".format(i, j, k), [x, y, z]))
+
+  take_all = lambda x, y: constant_op.constant(True)
+  is_zero = lambda x, y: math_ops.equal(x * math_ops.cast(y, dtypes.int64), 0)
+
+  cases.append((lambda x: (x, x), "Tuple1", [take_all, take_all]))
+  cases.append((lambda x: (x, 2), "Tuple2", [take_all, is_zero]))
+
+  def reduce_fn(x, y):
+    function, name, predicates = y
+    return x + combinations.combine(
+        function=function,
+        predicates=combinations.NamedObject(name, predicates))
+
+  return functools.reduce(reduce_fn, cases, [])
+
+
 class FilterFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
 
-  def _testFilterFusion(self, map_function, predicates):
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         _test_combinations()))
+  def testFilterFusion(self, function, predicates):
     dataset = dataset_ops.Dataset.range(5).apply(
-        testing.assert_next(["Map", "Filter",
-                             "MemoryCacheImpl"])).map(map_function)
+        testing.assert_next(["Map", "Filter", "MemoryCacheImpl"])).map(function)
     for predicate in predicates:
       dataset = dataset.filter(predicate)
 
@@ -45,7 +77,7 @@ class FilterFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
     dataset = dataset.with_options(options)
     expected_output = []
     for x in range(5):
-      r = map_function(x)
+      r = function(x)
       filtered = False
       for predicate in predicates:
         if isinstance(r, tuple):
@@ -60,26 +92,6 @@ class FilterFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
         expected_output.append(r)
     self.assertDatasetProduces(dataset, expected_output=expected_output)
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testFilterFusionScalar(self):
-    take_all = lambda x: constant_op.constant(True)
-    is_zero = lambda x: math_ops.equal(x, 0)
-    greater = lambda x: math_ops.greater(x + 5, 0)
-    predicates = [take_all, is_zero, greater]
-    for x in predicates:
-      for y in predicates:
-        self._testFilterFusion(lambda x: x, [x, y])
-        for z in predicates:
-          self._testFilterFusion(lambda x: x, [x, y, z])
-
-  @combinations.generate(test_base.default_test_combinations())
-  def testFilterFusionTuple(self):
-    take_all = lambda x, y: constant_op.constant(True)
-    is_zero = lambda x, y: math_ops.equal(x * math_ops.cast(y, dtypes.int64), 0)
-
-    self._testFilterFusion(lambda x: (x, x), [take_all, take_all])
-    self._testFilterFusion(lambda x: (x, 2), [take_all, is_zero])
-
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py
index 59f50fa1752..1097b1effc6 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/hoist_random_uniform_test.py
@@ -17,6 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
+
 from absl.testing import parameterized
 
 from tensorflow.python.data.experimental.ops import testing
@@ -33,6 +35,36 @@ from tensorflow.python.ops import random_ops
 from tensorflow.python.platform import test
 
 
+def _test_combinations():
+  def random(_):
+    return random_ops.random_uniform([],
+                                     minval=1,
+                                     maxval=10,
+                                     dtype=dtypes.float32,
+                                     seed=42)
+
+  def random_with_assert(x):
+    y = random(x)
+    assert_op = control_flow_ops.Assert(math_ops.greater_equal(y, 1), [y])
+    with ops.control_dependencies([assert_op]):
+      return y
+
+  cases = [
+      ("Increment", lambda x: x + 1, False),
+      ("Random", random, True),
+      ("RandomWithAssert", random_with_assert, True),
+      ("Complex", lambda x: (random(x) + random(x)) / 2, False),
+  ]
+
+  def reduce_fn(x, y):
+    name, map_fn, should_optimize = y
+    return x + combinations.combine(
+        map_fn=combinations.NamedObject(name, map_fn),
+        should_optimize=should_optimize)
+
+  return functools.reduce(reduce_fn, cases, [])
+
+
 class HoistRandomUniformTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def _testDataset(self, dataset):
@@ -51,10 +83,13 @@ class HoistRandomUniformTest(test_base.DatasetTestBase, parameterized.TestCase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(get_next())
 
-  def _testHoistFunction(self, function, should_optimize):
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         _test_combinations()))
+  def testHoistFunction(self, map_fn, should_optimize):
     dataset = dataset_ops.Dataset.range(5).apply(
         testing.assert_next(
-            ["Zip[0]", "Map"] if should_optimize else ["Map"])).map(function)
+            ["Zip[0]", "Map"] if should_optimize else ["Map"])).map(map_fn)
 
     options = dataset_ops.Options()
     options.experimental_optimization.apply_default_optimizations = False
@@ -62,31 +97,6 @@ class HoistRandomUniformTest(test_base.DatasetTestBase, parameterized.TestCase):
     dataset = dataset.with_options(options)
     self._testDataset(dataset)
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testNoRandom(self):
-    self._testHoistFunction(lambda x: x + 1, should_optimize=False)
-
-  @combinations.generate(test_base.default_test_combinations())
-  def testRandom(self):
-
-    def random(_):
-      return random_ops.random_uniform([],
-                                       minval=1,
-                                       maxval=10,
-                                       dtype=dtypes.float32,
-                                       seed=42)
-
-    def random_with_assert(x):
-      y = random(x)
-      assert_op = control_flow_ops.Assert(math_ops.greater_equal(y, 1), [y])
-      with ops.control_dependencies([assert_op]):
-        return y
-
-    self._testHoistFunction(random, should_optimize=True)
-    self._testHoistFunction(random_with_assert, should_optimize=True)
-    self._testHoistFunction(
-        lambda x: (random(x) + random(x)) / 2, should_optimize=False)
-
   @combinations.generate(test_base.default_test_combinations())
   def testCapturedInputs(self):
     a = constant_op.constant(1, dtype=dtypes.float32)
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py
index a0257f76e93..aa0ab40254f 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_and_filter_fusion_test.py
@@ -17,6 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
+
 from absl.testing import parameterized
 
 from tensorflow.python.data.experimental.ops import testing
@@ -29,6 +31,49 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
+def _test_combinations():
+  cases = []
+
+  identity = lambda x: x
+  increment = lambda x: x + 1
+  minus_five = lambda x: x - 5
+
+  def increment_and_square(x):
+    y = x + 1
+    return y * y
+
+  functions = [identity, increment, minus_five, increment_and_square]
+
+  take_all = lambda x: constant_op.constant(True)
+  is_zero = lambda x: math_ops.equal(x, 0)
+  is_odd = lambda x: math_ops.equal(x % 2, 0)
+  greater = lambda x: math_ops.greater(x + 5, 0)
+  predicates = [take_all, is_zero, is_odd, greater]
+
+  for i, function in enumerate(functions):
+    for j, predicate in enumerate(predicates):
+      cases.append((function, "Scalar{}{}".format(i, j), predicate))
+
+  replicate = lambda x: (x, x)
+  with_two = lambda x: (x, 2)
+  functions = [replicate, with_two]
+  take_all = lambda x, y: constant_op.constant(True)
+  is_zero = lambda x, y: math_ops.equal(x * math_ops.cast(y, dtypes.int64), 0)
+  predicates = [take_all, is_zero]
+
+  for i, function in enumerate(functions):
+    for j, predicate in enumerate(predicates):
+      cases.append((function, "Tuple{}{}".format(i, j), predicate))
+
+  def reduce_fn(x, y):
+    function, name, predicate = y
+    return x + combinations.combine(
+        function=function,
+        predicate=combinations.NamedObject(name, predicate))
+
+  return functools.reduce(reduce_fn, cases, [])
+
+
 class MapAndFilterFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def _testDataset(self, dataset, function, predicate):
@@ -43,7 +88,10 @@ class MapAndFilterFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
         expected_output.append(r)
     self.assertDatasetProduces(dataset, expected_output=expected_output)
 
-  def _testMapAndFilterFusion(self, function, predicate):
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         _test_combinations()))
+  def testMapAndFilterFusion(self, function, predicate):
     dataset = dataset_ops.Dataset.range(10).apply(
         testing.assert_next(["Map", "Filter",
                              "Map"])).map(function).filter(predicate)
@@ -53,41 +101,6 @@ class MapAndFilterFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
     dataset = dataset.with_options(options)
     self._testDataset(dataset, function, predicate)
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testMapAndFilterFusionScalar(self):
-    identity = lambda x: x
-    increment = lambda x: x + 1
-    minus_five = lambda x: x - 5
-
-    def increment_and_square(x):
-      y = x + 1
-      return y * y
-
-    functions = [identity, increment, minus_five, increment_and_square]
-
-    take_all = lambda x: constant_op.constant(True)
-    is_zero = lambda x: math_ops.equal(x, 0)
-    is_odd = lambda x: math_ops.equal(x % 2, 0)
-    greater = lambda x: math_ops.greater(x + 5, 0)
-    predicates = [take_all, is_zero, is_odd, greater]
-
-    for function in functions:
-      for predicate in predicates:
-        self._testMapAndFilterFusion(function, predicate)
-
-  @combinations.generate(test_base.default_test_combinations())
-  def testMapAndFilterFusionTuple(self):
-    replicate = lambda x: (x, x)
-    with_two = lambda x: (x, 2)
-    functions = [replicate, with_two]
-    take_all = lambda x, y: constant_op.constant(True)
-    is_zero = lambda x, y: math_ops.equal(x * math_ops.cast(y, dtypes.int64), 0)
-    predicates = [take_all, is_zero]
-
-    for function in functions:
-      for predicate in predicates:
-        self._testMapAndFilterFusion(function, predicate)
-
   @combinations.generate(test_base.default_test_combinations())
   def testCapturedInputs(self):
     a = constant_op.constant(3, dtype=dtypes.int64)
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_fusion_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_fusion_test.py
index 28da0474bc9..efe9c4880f2 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_fusion_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_fusion_test.py
@@ -17,6 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
+
 from absl.testing import parameterized
 
 from tensorflow.python.data.experimental.ops import testing
@@ -26,9 +28,44 @@ from tensorflow.python.framework import combinations
 from tensorflow.python.platform import test
 
 
+def _test_combinations():
+  cases = []
+
+  identity = lambda x: x
+  increment = lambda x: x + 1
+
+  def increment_and_square(x):
+    y = x + 1
+    return y * y
+
+  functions = [identity, increment, increment_and_square]
+
+  for i, x in enumerate(functions):
+    for j, y in enumerate(functions):
+      cases.append(("Scalar{}{}".format(i, j), [x, y]))
+      for k, z in enumerate(functions):
+        cases.append(("Scalar{}{}{}".format(i, j, k), [x, y, z]))
+
+  with_42 = lambda x: (x, 42)
+  swap = lambda x, y: (y, x)
+
+  cases.append(("Tuple1", [with_42, swap]))
+  cases.append(("Tuple2", [with_42, swap, swap]))
+
+  def reduce_fn(x, y):
+    name, functions = y
+    return x + combinations.combine(
+        functions=combinations.NamedObject(name, functions))
+
+  return functools.reduce(reduce_fn, cases, [])
+
+
 class MapFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
 
-  def _testMapFusion(self, functions):
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         _test_combinations()))
+  def testMapFusion(self, functions):
     dataset = dataset_ops.Dataset.range(5).apply(
         testing.assert_next(["Map", "MemoryCacheImpl"]))
     for function in functions:
@@ -50,31 +87,6 @@ class MapFusionTest(test_base.DatasetTestBase, parameterized.TestCase):
       expected_output.append(r)
     self.assertDatasetProduces(dataset, expected_output=expected_output)
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testMapFusionScalar(self):
-    identity = lambda x: x
-    increment = lambda x: x + 1
-
-    def increment_and_square(x):
-      y = x + 1
-      return y * y
-
-    functions = [identity, increment, increment_and_square]
-
-    for x in functions:
-      for y in functions:
-        self._testMapFusion([x, y])
-        for z in functions:
-          self._testMapFusion([x, y, z])
-
-  @combinations.generate(test_base.default_test_combinations())
-  def testMapAndFilterFusionTuple(self):
-    with_42 = lambda x: (x, 42)
-    swap = lambda x, y: (y, x)
-
-    self._testMapFusion([with_42, swap])
-    self._testMapFusion([with_42, swap, swap])
-
 
 if __name__ == "__main__":
   test.main()
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py b/tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py
index a28a3052abc..ac92ddea529 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimization/map_parallelization_test.py
@@ -17,6 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
+
 from absl.testing import parameterized
 
 from tensorflow.python.data.experimental.ops import testing
@@ -32,9 +34,33 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
+def _test_combinations():
+  def assert_greater(x):
+    assert_op = control_flow_ops.Assert(math_ops.greater(x, -1), [x])
+    with ops.control_dependencies([assert_op]):
+      return x
+
+  cases = [
+      ("Identity", lambda x: x, True),
+      ("Increment", lambda x: x + 1, True),
+      ("AssertGreater", assert_greater, True),
+  ]
+
+  def reduce_fn(x, y):
+    name, function, should_optimize = y
+    return x + combinations.combine(
+        function=combinations.NamedObject(name, function),
+        should_optimize=should_optimize)
+
+  return functools.reduce(reduce_fn, cases, [])
+
+
 class MapParallelizationTest(test_base.DatasetTestBase, parameterized.TestCase):
 
-  def _testMapParallelization(self, function, should_optimize):
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         _test_combinations()))
+  def testMapParallelization(self, function, should_optimize):
     next_nodes = ["ParallelMap"] if should_optimize else ["Map"]
     dataset = dataset_ops.Dataset.range(5).apply(
         testing.assert_next(next_nodes)).map(function)
@@ -45,24 +71,6 @@ class MapParallelizationTest(test_base.DatasetTestBase, parameterized.TestCase):
     self.assertDatasetProduces(
         dataset, expected_output=[function(x) for x in range(5)])
 
-  @combinations.generate(test_base.default_test_combinations())
-  def testIdentity(self):
-    self._testMapParallelization(lambda x: x, should_optimize=True)
-
-  @combinations.generate(test_base.default_test_combinations())
-  def testIncrement(self):
-    self._testMapParallelization(lambda x: x + 1, should_optimize=True)
-
-  @combinations.generate(test_base.default_test_combinations())
-  def testAssert(self):
-
-    def assert_greater(x):
-      assert_op = control_flow_ops.Assert(math_ops.greater(x, -1), [x])
-      with ops.control_dependencies([assert_op]):
-        return x
-
-    self._testMapParallelization(assert_greater, should_optimize=True)
-
   @combinations.generate(test_base.default_test_combinations())
   def testCapturedConstant(self):
     captured_t = constant_op.constant(42, dtype=dtypes.int64)
diff --git a/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py
index 1bd7e320466..397703e1c40 100644
--- a/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/optimize_dataset_test.py
@@ -24,6 +24,7 @@ import numpy as np
 
 from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.experimental.ops import grouping
+from tensorflow.python.data.experimental.ops import optimization_options
 from tensorflow.python.data.experimental.ops import scan_ops
 from tensorflow.python.data.experimental.ops import testing
 from tensorflow.python.data.experimental.ops import threadpool
@@ -215,11 +216,11 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       optimized_it = dataset_ops.make_initializable_iterator(optimized_dataset)
 
     self.assertGreaterEqual(len(w), 1)
-    expected = ("tf.data static optimizations are not compatible with "
-                "tf.Variable. The following optimizations will be disabled: %s."
-                " To enable optimizations, use resource variables instead by "
+    expected = ("tf.data graph rewrites are not compatible with "
+                "tf.Variable. The following rewrites will be disabled: %s."
+                " To enable rewrites, use resource variables instead by "
                 "calling `tf.enable_resource_variables()` at the start of the "
-                "program." % (", ".join(options._static_optimizations())))
+                "program." % (", ".join(options._graph_rewrites())))
     self.assertTrue(any([expected in str(warning) for warning in w]))
 
     # Check that outputs are the same in the optimized and unoptimized cases,
@@ -249,10 +250,10 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
         "shuffle_and_repeat_fusion",
     ]
     self.assertEqual(
-        set(options._static_optimizations()), set(expected_optimizations))
+        set(options._graph_rewrites()), set(expected_optimizations))
 
   def testOptimizationDisableDefault(self):
-    """Tests that we can disable all static optimizations enabled by default.
+    """Tests that we can disable all graph optimizations enabled by default.
 
     If the `apply_default_optimizations` optimization options flag is False,
     only explicitly enabled optimizations will be applied.
@@ -266,7 +267,27 @@ class OptimizeDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
         "noop_elimination",
     ]
     self.assertEqual(
-        set(options._static_optimizations()), set(expected_optimizations))
+        set(options._graph_rewrites()), set(expected_optimizations))
+
+  def testAutotuningDefaults(self):
+    options = dataset_ops.Options()
+
+    # Check defaults
+    autotune, algorithm, cpu_budget = options._autotune_settings()
+    self.assertTrue(autotune)
+    self.assertEqual(algorithm,
+                     optimization_options._AutotuneAlgorithm.HILL_CLIMB)
+    self.assertEqual(cpu_budget, 0)
+
+  def testAutotuningBufferSizes(self):
+    options = dataset_ops.Options()
+    options.experimental_optimization.autotune_buffers = True
+    self.assertIn("inject_prefetch", options._graph_rewrites())
+    autotune, algorithm, cpu_budget = options._autotune_settings()
+    self.assertTrue(autotune)
+    self.assertEqual(algorithm,
+                     optimization_options._AutotuneAlgorithm.GRADIENT_DESCENT)
+    self.assertEqual(cpu_budget, 0)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py b/tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py
index 811a58262ef..d7944042c6e 100644
--- a/tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/override_threadpool_test.py
@@ -28,14 +28,13 @@ from tensorflow.python.data.experimental.ops import threadpool
 from tensorflow.python.data.experimental.ops import unique
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import script_ops
 from tensorflow.python.platform import test
 
 
-@test_util.run_all_in_graph_and_eager_modes
 class OverrideThreadpoolTest(test_base.DatasetTestBase,
                              parameterized.TestCase):
 
@@ -70,17 +69,13 @@ class OverrideThreadpoolTest(test_base.DatasetTestBase,
       # perform work.
       self.assertLessEqual(len(thread_ids), num_threads)
 
-  @parameterized.named_parameters(
-      ("1", 1, None),
-      ("2", 2, None),
-      ("3", 4, None),
-      ("4", 8, None),
-      ("5", 16, None),
-      ("6", 4, -1),
-      ("7", 4, 0),
-      ("8", 4, 1),
-      ("9", 4, 4),
-  )
+  @combinations.generate(
+      combinations.times(
+          test_base.default_test_combinations(),
+          combinations.combine(
+              num_threads=[1, 2, 4, 8, 16], max_intra_op_parallelism=[None]) +
+          combinations.combine(
+              num_threads=[4], max_intra_op_parallelism=[-1, 0, 4])))
   def testNumThreadsDeprecated(self, num_threads, max_intra_op_parallelism):
 
     def override_threadpool_fn(dataset):
@@ -93,20 +88,17 @@ class OverrideThreadpoolTest(test_base.DatasetTestBase,
 
     self._testNumThreadsHelper(num_threads, override_threadpool_fn)
 
-  @parameterized.named_parameters(
-      ("1", 1, None),
-      ("2", 2, None),
-      ("3", 4, None),
-      ("4", 8, None),
-      ("5", 16, None),
-      ("6", None, 0),
-      ("7", None, 1),
-      ("8", None, 4),
-      ("9", 4, 0),
-      ("10", 4, 1),
-      ("11", 4, 4),
-      ("12", None, None),
-  )
+  @combinations.generate(
+      combinations.times(
+          test_base.default_test_combinations(),
+          combinations.combine(
+              num_threads=[1, 2, 4, 8, 16], max_intra_op_parallelism=[None]) +
+          combinations.combine(
+              num_threads=[None], max_intra_op_parallelism=[0, 1, 4]) +
+          combinations.combine(
+              num_threads=[4], max_intra_op_parallelism=[0, 1, 4]) +
+          combinations.combine(
+              num_threads=[None], max_intra_op_parallelism=[None])))
   def testNumThreads(self, num_threads, max_intra_op_parallelism):
 
     def override_threadpool_fn(dataset):
@@ -121,6 +113,7 @@ class OverrideThreadpoolTest(test_base.DatasetTestBase,
 
     self._testNumThreadsHelper(num_threads, override_threadpool_fn)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMaxIntraOpParallelismAsGraphDefInternal(self):
     dataset = dataset_ops.Dataset.from_tensors(0)
     dataset = dataset_ops._MaxIntraOpParallelismDataset(dataset, 1)
diff --git a/tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py b/tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py
index 1fe5655ec02..14d3c9d6d7f 100644
--- a/tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/parallel_interleave_test.py
@@ -22,24 +22,25 @@ import math
 import threading
 import time
 
+from absl.testing import parameterized
 import numpy as np
 from six.moves import zip_longest
 
 from tensorflow.python.data.experimental.ops import interleave_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import script_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import test
 
 
-@test_util.run_all_in_graph_and_eager_modes
-class ParallelInterleaveTest(test_base.DatasetTestBase):
+# TODO(feihugis): refactor this test to be parameterized.
+class ParallelInterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def setUp(self):
 
@@ -116,6 +117,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
             num_open -= 1
             break
 
+  @combinations.generate(test_base.default_test_combinations())
   def testPythonImplementation(self):
     input_lists = [[4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6],
                    [4, 4, 4, 4], [5, 5, 5, 5, 5], [6, 6, 6, 6, 6, 6]]
@@ -136,6 +138,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
       self.assertEqual(expected, produced, "Values differ at %s. %s != %s" %
                        (index, expected, produced))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testPythonImplementationBlockLength(self):
     input_lists = [[4] * 4, [5] * 5, [6] * 6] * 2
     expected_elements = [
@@ -147,6 +150,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
       self.assertEqual(expected, produced, "Values differ at %s. %s != %s" %
                        (index, expected, produced))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testPythonImplementationEmptyLists(self):
     input_lists = [[4, 4, 4, 4], [], [6, 6, 6, 6, 6, 6], [4, 4, 4, 4], [],
                    [6, 6, 6, 6, 6, 6]]
@@ -189,18 +193,23 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSingleThreaded(self):
     self._testSingleThreaded()
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSingleThreadedSloppy(self):
     self._testSingleThreaded(sloppy=True)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSingleThreadedPrefetch1Itr(self):
     self._testSingleThreaded(prefetch_input_elements=1)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSingleThreadedPrefetch1ItrSloppy(self):
     self._testSingleThreaded(prefetch_input_elements=1, sloppy=True)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSingleThreadedRagged(self):
     # Tests a sequence with wildly different elements per iterator.
     self.skipTest("b/131722904")
@@ -259,9 +268,11 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testTwoThreadsNoContention(self):
     self._testTwoThreadsNoContention()
 
+  @combinations.generate(test_base.default_test_combinations())
   def testTwoThreadsNoContentionSloppy(self):
     self._testTwoThreadsNoContention(sloppy=True)
 
@@ -306,9 +317,11 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testTwoThreadsNoContentionWithRaces(self):
     self._testTwoThreadsNoContentionWithRaces()
 
+  @combinations.generate(test_base.default_test_combinations())
   def testTwoThreadsNoContentionWithRacesSloppy(self):
     self._testTwoThreadsNoContentionWithRaces(sloppy=True)
 
@@ -343,9 +356,11 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testTwoThreadsNoContentionBlockLength(self):
     self._testTwoThreadsNoContentionBlockLength()
 
+  @combinations.generate(test_base.default_test_combinations())
   def testTwoThreadsNoContentionBlockLengthSloppy(self):
     self._testTwoThreadsNoContentionBlockLength(sloppy=True)
 
@@ -391,9 +406,11 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testTwoThreadsNoContentionWithRacesAndBlocking(self):
     self._testTwoThreadsNoContentionWithRacesAndBlocking()
 
+  @combinations.generate(test_base.default_test_combinations())
   def testTwoThreadsNoContentionWithRacesAndBlockingSloppy(self):
     self._testTwoThreadsNoContentionWithRacesAndBlocking(sloppy=True)
 
@@ -411,9 +428,11 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testEmptyInput(self):
     self._testEmptyInput()
 
+  @combinations.generate(test_base.default_test_combinations())
   def testEmptyInputSloppy(self):
     self._testEmptyInput(sloppy=True)
 
@@ -431,9 +450,11 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testNonEmptyInputIntoEmptyOutputs(self):
     self._testNonEmptyInputIntoEmptyOutputs()
 
+  @combinations.generate(test_base.default_test_combinations())
   def testNonEmptyInputIntoEmptyOutputsSloppy(self):
     self._testNonEmptyInputIntoEmptyOutputs(sloppy=True)
 
@@ -469,12 +490,15 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
           "At index %s: %s expected, got: %s" % (i, expected_element,
                                                  actual_element))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testPartiallyEmptyOutputs(self):
     self._testPartiallyEmptyOutputs()
 
+  @combinations.generate(test_base.default_test_combinations())
   def testPartiallyEmptyOutputsSloppy(self):
     self._testPartiallyEmptyOutputs(sloppy=True, prefetch_input_elements=0)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testDelayedOutputSloppy(self):
     # Explicitly control the sequence of events to ensure we correctly avoid
     # head-of-line blocking.
@@ -500,6 +524,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testBlockLengthWithContentionSloppy(self):
     self.skipTest("b/131722904")
     self._clear_coordination_events()
@@ -557,9 +582,11 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
       self.read_coordination_events[i].acquire()
       self.write_coordination_events[i].set()
 
+  @combinations.generate(test_base.default_test_combinations())
   def testEarlyExit(self):
     self._testEarlyExit()
 
+  @combinations.generate(test_base.default_test_combinations())
   def testEarlyExitSloppy(self):
     self._testEarlyExit(sloppy=True)
 
@@ -584,12 +611,15 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
         [[4] * 4, [5] * 5, [6] * 6] * self.repeat_count, 1, 2)
     self.assertItemsEqual(output_values, expected_values)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testTooManyReaders(self):
     self._testTooManyReaders()
 
+  @combinations.generate(test_base.default_test_combinations())
   def testTooManyReadersSloppy(self):
     self._testTooManyReaders(sloppy=True)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSparse(self):
     def _map_fn(i):
       return sparse_tensor.SparseTensor(
@@ -610,6 +640,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(get_next())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testErrorsInOutputFn(self):
     self.skipTest("b/131722904")
     self._clear_coordination_events()
@@ -642,6 +673,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testErrorsInInputFn(self):
 
     def map_py_fn(x):
@@ -687,6 +719,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testErrorsInInterleaveFn(self):
 
     def map_py_fn(x):
@@ -730,6 +763,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testShutdownRace(self):
     dataset = dataset_ops.Dataset.range(20)
     map_fn = lambda x: dataset_ops.Dataset.range(20 * x, 20 * (x + 1))
diff --git a/tensorflow/python/data/experimental/kernel_tests/parse_example_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/parse_example_dataset_test.py
index 794f72365df..58cba64617d 100644
--- a/tensorflow/python/data/experimental/kernel_tests/parse_example_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/parse_example_dataset_test.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import copy
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.core.example import example_pb2
@@ -28,11 +29,11 @@ from tensorflow.python.data.experimental.ops import parsing_ops as contrib_parsi
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import context
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import parsing_ops
 from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.platform import test
@@ -50,8 +51,8 @@ feature_lists = lambda d: feature_pb2.FeatureLists(feature_list=d)
 sequence_example = example_pb2.SequenceExample
 
 
-@test_util.run_all_in_graph_and_eager_modes
-class ParseExampleDatasetTest(test_base.DatasetTestBase):
+class ParseExampleDatasetTest(test_base.DatasetTestBase,
+                              parameterized.TestCase):
 
   def _compare_output_to_expected(self, dict_tensors, expected_tensors):
     self.assertEqual(set(dict_tensors.keys()), set(expected_tensors.keys()))
@@ -107,6 +108,7 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase):
         self.assertEqual(
             dataset_ops.get_legacy_output_shapes(dataset)[k].as_list()[1], None)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testEmptySerializedWithAllDefaults(self):
     sparse_name = "st_a"
     a_name = "a"
@@ -145,7 +147,7 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase):
         expected_values=expected_output,
         create_iterator_twice=True)
 
-  @test_util.run_deprecated_v1
+  @combinations.generate(test_base.graph_only_combinations())
   def testEmptySerializedWithoutDefaultsShouldFail(self):
     input_features = {
         "st_a":
@@ -179,7 +181,7 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase):
         expected_err=(errors_impl.InvalidArgumentError,
                       "Feature: c \\(data type: float\\) is required"))
 
-  @test_util.run_deprecated_v1
+  @combinations.generate(test_base.graph_only_combinations())
   def testDenseNotMatchingShapeShouldFail(self):
     original = [
         example(features=features({
@@ -197,6 +199,7 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase):
         expected_err=(errors_impl.InvalidArgumentError,
                       "Key: a, Index: 1.  Number of float values"))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testDenseDefaultNoShapeShouldFail(self):
     original = [example(features=features({"a": float_feature([1, 1, 3]),})),]
 
@@ -207,6 +210,7 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase):
         {"a": parsing_ops.FixedLenFeature(None, dtypes.float32)},
         expected_err=(ValueError, "Missing shape for feature a"))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSerializedContainingSparse(self):
     original = [
         example(features=features({
@@ -248,6 +252,7 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase):
         expected_values=expected_output,
         create_iterator_twice=True)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSerializedContainingSparseFeature(self):
     original = [
         example(features=features({
@@ -284,6 +289,7 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase):
         expected_values=expected_output,
         create_iterator_twice=True)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSerializedContainingSparseFeatureReuse(self):
     original = [
         example(features=features({
@@ -325,6 +331,7 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase):
         expected_values=expected_output,
         create_iterator_twice=True)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSerializedContaining3DSparseFeature(self):
     original = [
         example(features=features({
@@ -370,6 +377,7 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase):
         expected_values=expected_output,
         create_iterator_twice=True)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSerializedContainingDense(self):
     aname = "a"
     bname = "b*has+a:tricky_name"
@@ -407,6 +415,7 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase):
 
   # This test is identical as the previous one except
   # for the creation of 'serialized'.
+  @combinations.generate(test_base.default_test_combinations())
   def testSerializedContainingDenseWithConcat(self):
     aname = "a"
     bname = "b*has+a:tricky_name"
@@ -452,6 +461,7 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase):
         expected_values=expected_output,
         create_iterator_twice=True)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSerializedContainingDenseScalar(self):
     original = [
         example(features=features({
@@ -476,6 +486,7 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase):
         expected_values=expected_output,
         create_iterator_twice=True)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSerializedContainingDenseWithDefaults(self):
     original = [
         example(features=features({
@@ -514,6 +525,7 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase):
         expected_values=expected_output,
         create_iterator_twice=True)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSerializedSparseAndSparseFeatureAndDenseWithNoDefault(self):
     expected_st_a = sparse_tensor.SparseTensorValue(  # indices, values, shape
         np.empty((0, 2), dtype=np.int64),  # indices
@@ -569,6 +581,7 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase):
         expected_values=expected_output,
         create_iterator_twice=True)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testerializedContainingSparseAndSparseFeatureWithReuse(self):
     expected_idx = sparse_tensor.SparseTensorValue(  # indices, values, shape
         np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.int64),
@@ -667,11 +680,13 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase):
         expected_values=expected_output,
         create_iterator_twice=True)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSerializedContainingVarLenDenseLargerBatch(self):
     np.random.seed(3456)
     for batch_size in (1, 10, 20, 100, 256):
       self._testSerializedContainingVarLenDenseLargerBatch(batch_size)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSerializedShapeMismatch(self):
     aname = "a"
     bname = "b"
@@ -724,7 +739,7 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase):
           expected_err=(ValueError,
                         "Cannot reshape a tensor with 0 elements to shape"))
 
-  @test_util.run_deprecated_v1
+  @combinations.generate(test_base.graph_only_combinations())
   def testSerializedContainingVarLenDense(self):
     aname = "a"
     bname = "b"
@@ -877,6 +892,7 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase):
                       "Unsupported: FixedLenSequenceFeature requires "
                       "allow_missing to be True."))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSerializedContainingRaggedFeatureWithNoPartitions(self):
     original = [
         example(
@@ -922,6 +938,7 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase):
         expected_values=expected_output,
         create_iterator_twice=True)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSerializedContainingRaggedFeatureWithOnePartition(self):
     original = [
         example(
@@ -1040,6 +1057,7 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase):
         expected_values=expected_output,
         create_iterator_twice=True)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSerializedContainingRaggedFeatureWithMultiplePartitions(self):
     original = [
         # rt shape: [(batch), 2, None, None]
diff --git a/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py b/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py
index f51da6e8b66..8ac4e239881 100644
--- a/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/prefetch_to_device_test.py
@@ -17,11 +17,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
+
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.data.experimental.ops import prefetching_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import structure
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
@@ -31,9 +34,9 @@ from tensorflow.python.platform import test
 
 
 # TODO(b/117581999): add eager coverage when supported.
-class PrefetchToDeviceTest(test_base.DatasetTestBase):
+class PrefetchToDeviceTest(test_base.DatasetTestBase, parameterized.TestCase):
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testPrefetchToDevice(self):
     host_dataset = dataset_ops.Dataset.range(10)
     device_dataset = host_dataset.apply(
@@ -57,7 +60,7 @@ class PrefetchToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testPrefetchToSameDevice(self):
     host_dataset = dataset_ops.Dataset.range(10)
     device_dataset = host_dataset.apply(
@@ -82,7 +85,7 @@ class PrefetchToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testPrefetchDictToDevice(self):
     host_dataset = dataset_ops.Dataset.range(10).map(lambda x: {"a": x})
     device_dataset = host_dataset.apply(
@@ -106,7 +109,7 @@ class PrefetchToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testPrefetchSparseTensorsToDevice(self):
     def make_tensor(i):
       return sparse_tensor.SparseTensorValue(
@@ -136,7 +139,7 @@ class PrefetchToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testPrefetchToDeviceGpu(self):
     if not test_util.is_gpu_available():
       self.skipTest("No GPU available")
@@ -156,7 +159,7 @@ class PrefetchToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testPrefetchToDeviceWithReInit(self):
     host_dataset = dataset_ops.Dataset.range(10)
     device_dataset = host_dataset.apply(
@@ -184,7 +187,7 @@ class PrefetchToDeviceTest(test_base.DatasetTestBase):
       with self.assertRaises(errors.OutOfRangeError):
         self.evaluate(next_element)
 
-  @test_util.deprecated_graph_mode_only
+  @combinations.generate(test_base.graph_only_combinations())
   def testPrefetchToDeviceGpuWithReInit(self):
     if not test_util.is_gpu_available():
       self.skipTest("No GPU available")
diff --git a/tensorflow/python/data/experimental/kernel_tests/prefetch_with_slack_test.py b/tensorflow/python/data/experimental/kernel_tests/prefetch_with_slack_test.py
index 5de98189322..ff1f1680a76 100644
--- a/tensorflow/python/data/experimental/kernel_tests/prefetch_with_slack_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/prefetch_with_slack_test.py
@@ -24,16 +24,17 @@ from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import multi_device_iterator_ops
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
 
 
-@test_util.run_all_in_graph_and_eager_modes
 class PrefetchWithSlackTest(test_base.DatasetTestBase, parameterized.TestCase):
 
-  @test_util.run_v1_only("b/121264236")
+  # TODO(b/121264236)
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testPrefetchWithSlackOption(self):
     """Determines slack_period based on num devices attached to iterator."""
     dataset = dataset_ops.Dataset.range(10)
@@ -44,9 +45,9 @@ class PrefetchWithSlackTest(test_base.DatasetTestBase, parameterized.TestCase):
     multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
         dataset, ["/cpu:1", "/cpu:2"])
     dataset = multi_device_iterator._dataset  # pylint: disable=protected-access
-    self.assertIn("slack", dataset.options()._static_optimizations())
+    self.assertIn("slack", dataset.options()._graph_rewrites())
     self.assertIn("slack:slack_period:2",
-                  dataset.options()._static_optimization_configs())
+                  dataset.options()._graph_rewrite_configs())
 
     config = config_pb2.ConfigProto(device_count={"CPU": 3})
     with self.test_session(config=config):
@@ -60,6 +61,7 @@ class PrefetchWithSlackTest(test_base.DatasetTestBase, parameterized.TestCase):
         self.evaluate(elem_on_1)
         self.evaluate(elem_on_2)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testPrefetchWithSlackOptionWithoutIterator(self):
     """Defaults to slack period of 1 without iterator."""
     dataset = dataset_ops.Dataset.range(10)
@@ -67,11 +69,12 @@ class PrefetchWithSlackTest(test_base.DatasetTestBase, parameterized.TestCase):
     options = dataset_ops.Options()
     options.experimental_slack = True
     dataset = dataset.with_options(options)
-    self.assertIn("slack", dataset.options()._static_optimizations())
+    self.assertIn("slack", dataset.options()._graph_rewrites())
     self.assertIn("slack:slack_period:1",
-                  dataset.options()._static_optimization_configs())
+                  dataset.options()._graph_rewrite_configs())
     self.assertDatasetProduces(dataset, range(10))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testWithPassthroughDataset(self):
     """Should still work with a passthrough dataset after prefetch()."""
     dataset = dataset_ops.Dataset.range(10)
@@ -82,6 +85,7 @@ class PrefetchWithSlackTest(test_base.DatasetTestBase, parameterized.TestCase):
     dataset = dataset.with_options(options)
     self.assertDatasetProduces(dataset, range(1, 11))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testErrorWithoutPrefetch(self):
     """The rewrite fails if there is no prefetch() in the pipeline."""
     dataset = dataset_ops.Dataset.range(10)
@@ -92,6 +96,7 @@ class PrefetchWithSlackTest(test_base.DatasetTestBase, parameterized.TestCase):
       get_next = self.getNext(dataset)
       self.evaluate(get_next())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testErrorWithInvalidDataset(self):
     """With a nested dataset op after prefetch, the rewrite should fail."""
     dataset = dataset_ops.Dataset.range(10)
diff --git a/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py
index 32bcdbe183b..30496658529 100644
--- a/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/rebatch_dataset_test.py
@@ -32,8 +32,8 @@ from tensorflow.python.data.experimental.ops import scan_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.util import nest
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import test_util
 from tensorflow.python.lib.io import python_io
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
@@ -47,13 +47,11 @@ def _flat_shapes(dataset):
   return nest.flatten(dataset_ops.get_legacy_output_shapes(dataset))
 
 
-@test_util.run_all_in_graph_and_eager_modes
 class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
 
-  drop_remainder_cases = [("WithDropRemainder", True),
-                          ("WithoutDropRemainder", False)]
-
-  @parameterized.named_parameters(drop_remainder_cases)
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         combinations.combine(drop_remainder=[True, False])))
   def testBasic(self, drop_remainder):
     dataset = dataset_ops.Dataset.range(1024).batch(
         32, drop_remainder=drop_remainder)
@@ -64,13 +62,16 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     expected_output = [[k for k in range(i, i + 8)] for i in range(0, 1024, 8)]  # pylint: disable=g-complex-comprehension
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testScalarInputError(self):
     dataset = dataset_ops.Dataset.range(1024)
     distribute._RebatchDataset(dataset.batch(4), num_replicas=4)
     with self.assertRaisesRegexp(ValueError, "at least one dimension"):
       distribute._RebatchDataset(dataset, num_replicas=4)
 
-  @parameterized.named_parameters(drop_remainder_cases)
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         combinations.combine(drop_remainder=[True, False])))
   def testBatchNotDivisibleByNumReplicas(self, drop_remainder):
     dataset = dataset_ops.Dataset.range(1024).batch(
         32, drop_remainder=drop_remainder)
@@ -89,6 +90,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       i += 4
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testBatchSizeNotDivisibleByNumReplicas2(self):
     dataset = dataset_ops.Dataset.range(32).batch(16, drop_remainder=True)
     rebatched_dataset = distribute._RebatchDataset(dataset, num_replicas=5)
@@ -102,6 +104,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     expected_output.extend([[]])  # Last replica gets an empty batch
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testTupleOutput(self):
     dataset = dataset_ops.Dataset.range(1024).map(lambda x: (x, x)).batch(32)
     rebatched_dataset = distribute._RebatchDataset(dataset, num_replicas=4)
@@ -110,6 +113,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
                        for i in range(0, 1024, 8)]
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testNestedDictionaryOutput(self):
     dataset = dataset_ops.Dataset.range(1024).map(
         lambda x: {"a": x, "b": {"c": x}}).batch(32)
@@ -119,7 +123,9 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
                        for i in range(0, 1024, 8)]
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
-  @parameterized.named_parameters(drop_remainder_cases)
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         combinations.combine(drop_remainder=[True, False])))
   def testFinalPartialBatch(self, drop_remainder):
     dataset = dataset_ops.Dataset.range(1032).batch(
         32, drop_remainder=drop_remainder)
@@ -136,7 +142,9 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
           [[k for k in range(i, i + 2)] for i in range(1024, 1032, 2)])
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
-  @parameterized.named_parameters(drop_remainder_cases)
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         combinations.combine(drop_remainder=[True, False])))
   def testFinalPartialBatchAfterRebatch(self, drop_remainder):
     dataset = dataset_ops.Dataset.range(34).batch(
         32, drop_remainder=drop_remainder)
@@ -150,6 +158,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
       expected_output += [[32], [33], [], []]
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMultipleBatches(self):
     dataset = dataset_ops.Dataset.range(128).batch(4).batch(8)
     self.assertEqual([[None, None]],
@@ -170,6 +179,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
                        for i in range(0, 128, 8)]
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMapAndBatch(self):
     dataset = dataset_ops.Dataset.range(1024).apply(
         batching.map_and_batch(math_ops.square, 32))
@@ -180,6 +190,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
                        for i in range(0, 1024, 8)]
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMapAndBatchWithCapturedInput(self):
     captured_t = variables.Variable(42)
     dataset = dataset_ops.Dataset.range(1024).apply(
@@ -193,6 +204,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     self.assertDatasetProduces(
         rebatched_dataset, expected_output, requires_initialization=True)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testPaddedBatch(self):
     dataset = dataset_ops.Dataset.range(128).batch(
         4, drop_remainder=True).padded_batch(
@@ -213,6 +225,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
                        for i in range(0, 128, 8)]
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testConcatenate(self):
     dataset1 = dataset_ops.Dataset.range(64).batch(8)
     dataset2 = dataset_ops.Dataset.range(32).batch(8)
@@ -224,6 +237,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
                        [[i, i + 1] for i in range(0, 32, 2)])
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testConcatenateDifferentShapes(self):
     dataset1 = dataset_ops.Dataset.range(64).batch(16)
     dataset2 = dataset_ops.Dataset.range(32).batch(8)
@@ -235,6 +249,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
                        [[i, i + 1] for i in range(0, 32, 2)])
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testZip(self):
     dataset1 = dataset_ops.Dataset.range(64).batch(8)
     dataset2 = dataset_ops.Dataset.range(32).batch(8)
@@ -245,6 +260,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     expected_output = [([i, i + 1], [i, i + 1]) for i in range(0, 32, 2)]
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testZipDifferentShapes(self):
     dataset1 = dataset_ops.Dataset.range(64).batch(16)
     dataset2 = dataset_ops.Dataset.range(32).batch(8)
@@ -256,6 +272,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
                        for i in range(0, 32, 2)]
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testFlatMapBatching(self):
     dataset = dataset_ops.Dataset.range(2).flat_map(
         lambda _: dataset_ops.Dataset.range(32).batch(  # pylint: disable=g-long-lambda
@@ -274,6 +291,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
                        for i in range(0, 32, 8)]  # generates 4 elements
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testInterleaveBatching(self):
     dataset = dataset_ops.Dataset.range(2).interleave(
         lambda _: dataset_ops.Dataset.range(32).batch(  # pylint: disable=g-long-lambda
@@ -290,6 +308,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     expected_output += expected_output
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testParallelInterleaveBatching(self):
     dataset = dataset_ops.Dataset.range(2).interleave(
         lambda _: dataset_ops.Dataset.range(32).batch(  # pylint: disable=g-long-lambda
@@ -307,6 +326,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     expected_output += expected_output
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testGroupByWindowStaticBatch(self):
     dataset = dataset_ops.Dataset.from_tensor_slices(
         [[array_ops.constant(i, dtype=dtypes.int64)] * 3 for i in range(40)])
@@ -326,6 +346,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
                        for k in range(2)]
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testGroupByWindowDynamicBatch(self):
     # {0, 1, 0, 1, ...}
     dataset = dataset_ops.Dataset.range(40).map(lambda x: x % 2)
@@ -350,6 +371,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     expected_output = [[value] * batch_size for batch_size, value in pairs]
     self.assertDatasetProduces(dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testGroupByWindowDynamicBatchWithPartialBatch(self):
     # {0, 1, 0, 1, ...}
     dataset = dataset_ops.Dataset.range(40).map(lambda x: x % 2)
@@ -371,6 +393,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     expected_output = [[value] * batch_size for batch_size, value in pairs]
     self.assertDatasetProduces(dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testGroupByWindowDynamicBatchWithPartialBatchWithDropRemainder(self):
     # This test exercises nested batch functionality, dynamic batch size
     # and drop_remainder=True together.
@@ -395,6 +418,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     expected_output = [[value] * batch_size for batch_size, value in pairs]
     self.assertDatasetProduces(dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testScanAfterBatch(self):
     dataset = dataset_ops.Dataset.range(40).batch(10).apply(
         scan_ops.scan(np.int64(2), lambda state, value: (state, value * state)))
@@ -405,6 +429,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     expected_output = [[i * 2 for i in range(j*5, (j+1)*5)] for j in range(8)]  # pylint: disable=g-complex-comprehension
     self.assertDatasetProduces(dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testMakeBatchedFeaturesDataset(self):
     # Set up
     fn = os.path.join(self.get_temp_dir(), "tf_record.txt")
@@ -438,6 +463,7 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
     } for i in range(0, 1024, 8)]  # pylint: disable=g-complex-comprehension
     self.assertDatasetProduces(rebatched_dataset, expected_output)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testRaggedTensorDataset(self):
     # Set up a dataset that produces ragged tensors with a static batch size.
     row_lengths = np.random.randint(8, size=128)
diff --git a/tensorflow/python/data/experimental/kernel_tests/rejection_resample_test.py b/tensorflow/python/data/experimental/kernel_tests/rejection_resample_test.py
index 673e77fc3bb..fb1d4ea5d3a 100644
--- a/tensorflow/python/data/experimental/kernel_tests/rejection_resample_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/rejection_resample_test.py
@@ -24,9 +24,9 @@ import numpy as np
 from tensorflow.python.data.experimental.ops import resampling
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import string_ops
@@ -34,12 +34,11 @@ from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
 
-@test_util.run_all_in_graph_and_eager_modes
 class RejectionResampleTest(test_base.DatasetTestBase, parameterized.TestCase):
 
-  @parameterized.named_parameters(
-      ("InitialDistributionKnown", True),
-      ("InitialDistributionUnknown", False))
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         combinations.combine(initial_known=[True, False])))
   def testDistribution(self, initial_known):
     classes = np.random.randint(5, size=(20000,))  # Uniformly sampled
     target_dist = [0.9, 0.05, 0.05, 0.0, 0.0]
@@ -72,9 +71,9 @@ class RejectionResampleTest(test_base.DatasetTestBase, parameterized.TestCase):
     returned_dist = class_counts / total_returned
     self.assertAllClose(target_dist, returned_dist, atol=1e-2)
 
-  @parameterized.named_parameters(
-      ("OnlyInitial", True),
-      ("NotInitial", False))
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         combinations.combine(only_initial_dist=[True, False])))
   def testEdgeCasesSampleFromInitialDataset(self, only_initial_dist):
     init_dist = [0.5, 0.5]
     target_dist = [0.5, 0.5] if only_initial_dist else [0.0, 1.0]
@@ -99,6 +98,7 @@ class RejectionResampleTest(test_base.DatasetTestBase, parameterized.TestCase):
       while True:
         returned.append(self.evaluate(get_next()))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testRandomClasses(self):
     init_dist = [0.25, 0.25, 0.25, 0.25]
     target_dist = [0.0, 0.0, 0.0, 1.0]
diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/stats_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/stats_dataset_serialization_test.py
index 27b14f0730f..66658ea0a5b 100644
--- a/tensorflow/python/data/experimental/kernel_tests/serialization/stats_dataset_serialization_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/serialization/stats_dataset_serialization_test.py
@@ -25,7 +25,6 @@ from tensorflow.python.data.experimental.ops import stats_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import combinations
-from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
@@ -104,9 +103,7 @@ class StatsDatasetSerializationTest(
 
   @combinations.generate(test_base.default_test_combinations())
   def test_set_stats_aggregator_not_support_checkpointing(self):
-    with self.assertRaisesRegexp(errors.UnimplementedError,
-                                 "does not support checkpointing"):
-      self.run_core_tests(self._build_dataset_stats_aggregator, 10)
+    self.run_core_tests(self._build_dataset_stats_aggregator, 10)
 
 
 if __name__ == "__main__":
diff --git a/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
index 92ae528b940..8bb109a6519 100644
--- a/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/shuffle_and_repeat_test.py
@@ -17,18 +17,18 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.experimental.ops import shuffle_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import errors
-from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
 
 
-@test_util.run_all_in_graph_and_eager_modes
-class ShuffleAndRepeatTest(test_base.DatasetTestBase):
+class ShuffleAndRepeatTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def _build_ds(self, seed, count=5, num_elements=20):
     return dataset_ops.Dataset.range(num_elements).apply(
@@ -44,6 +44,7 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase):
         self.evaluate(get_next())
     return outputs
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCorrectOutput(self):
     output = self._gen_outputs(lambda: self._build_ds(10), 100)
     self.assertSequenceEqual(
@@ -52,6 +53,7 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase):
     for i in range(5):
       self.assertSequenceEqual(sorted(output[i * 20:(i + 1) * 20]), range(20))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testReshuffling(self):
     # Check that the output orders of different epochs are indeed different.
     output = self._gen_outputs(lambda: self._build_ds(10), 100)
@@ -60,17 +62,20 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase):
       epoch2 = output[(i + 1) * 20:(i + 2) * 20]
       self.assertNotEqual(epoch1, epoch2)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSameOrderForSameSeeds(self):
     output1 = self._gen_outputs(lambda: self._build_ds(10), 100)
     output2 = self._gen_outputs(lambda: self._build_ds(10), 100)
     self.assertEqual(output1, output2)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testDifferentOrderForDifferentSeeds(self):
     output1 = self._gen_outputs(lambda: self._build_ds(10), 100)
     output2 = self._gen_outputs(lambda: self._build_ds(20), 100)
     self.assertNotEqual(output1, output2)
     self.assertEqual(sorted(output1), sorted(output2))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCountNone(self):
     output1 = self._gen_outputs(
         lambda: self._build_ds(10, count=None), 100, verify_exhausted=False)
@@ -79,6 +84,7 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase):
     self.assertNotEqual(output1, output2)
     self.assertEqual(sorted(output1), sorted(output2))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testCountMinusOne(self):
     output1 = self._gen_outputs(
         lambda: self._build_ds(10, count=-1), 100, verify_exhausted=False)
@@ -87,6 +93,7 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase):
     self.assertNotEqual(output1, output2)
     self.assertEqual(sorted(output1), sorted(output2))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testInfiniteOutputs(self):
     # Asserting the iterator is exhausted after producing 100 items should fail.
     with self.assertRaises(AssertionError):
@@ -94,6 +101,7 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase):
     with self.assertRaises(AssertionError):
       self._gen_outputs(lambda: self._build_ds(10, count=-1), 100)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testInfiniteEmpty(self):
     with self.assertRaises(errors.OutOfRangeError):
       self._gen_outputs(lambda: self._build_ds(10, count=None, num_elements=0),
@@ -102,12 +110,14 @@ class ShuffleAndRepeatTest(test_base.DatasetTestBase):
       self._gen_outputs(lambda: self._build_ds(10, count=-1, num_elements=0),
                         100)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testLargeBufferSize(self):
     ds = dataset_ops.Dataset.range(20).apply(
         shuffle_ops.shuffle_and_repeat(buffer_size=21))
     get_next = self.getNext(ds)
     self.evaluate(get_next())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testVeryLargeBufferSize(self):
     num_epochs = 1000 * 1000
     # Each element being shuffled and repeated has shape (100,). This will OOM
diff --git a/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py
index f55f62f5cb0..8e1dd4bd8dc 100644
--- a/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/sql_dataset_test.py
@@ -18,18 +18,22 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
+
 from tensorflow.python.data.experimental.kernel_tests import sql_dataset_test_base
+from tensorflow.python.data.kernel_tests import test_base
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
 
 
-@test_util.run_all_in_graph_and_eager_modes
-class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
+class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase,
+                     parameterized.TestCase):
 
   # Test that SqlDataset can read from a database table.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSet(self):
     for _ in range(2):  # Run twice to verify statelessness of db operations.
       dataset = self._createSqlDataset(
@@ -44,6 +48,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
           num_test_iterations=2)
 
   # Test that SqlDataset works on a join query.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetJoinQuery(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -60,6 +65,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that SqlDataset can read a database entry with a null-terminator
   # in the middle of the text and place the entry in a `string` tensor.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetNullTerminator(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -76,6 +82,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
   # Test that SqlDataset works when used on two different queries.
   # Because the output types of the dataset must be determined at graph-creation
   # time, the two queries must have the same number and types of columns.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetReuseSqlDataset(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -100,6 +107,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that an `OutOfRangeError` is raised on the first call to
   # `get_next_str_only` if result set is empty.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadEmptyResultSet(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -110,6 +118,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
       self.evaluate(get_next())
 
   # Test that an error is raised when `driver_name` is invalid.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetWithInvalidDriverName(self):
     with self.assertRaises(errors.InvalidArgumentError):
       dataset = self._createSqlDataset(
@@ -120,6 +129,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
       self.assertDatasetProduces(dataset, expected_output=[])
 
   # Test that an error is raised when a column name in `query` is nonexistent
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetWithInvalidColumnName(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -130,6 +140,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
       self.evaluate(get_next())
 
   # Test that an error is raised when there is a syntax error in `query`.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetOfQueryWithSyntaxError(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -141,6 +152,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that an error is raised when the number of columns in `query`
   # does not match the length of `, output_types`.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetWithMismatchBetweenColumnsAndOutputTypes(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -154,6 +166,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
   # than a select query. In particular, the error refers to the number of
   # output types passed to the op not matching the number of columns in the
   # result set of the query (namely, 0 for an insert statement.)
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetOfInsertQuery(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -165,6 +178,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that `SqlDataset` can read an integer from a SQLite database table and
   # place it in an `int8` tensor.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetInt8(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -178,6 +192,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that `SqlDataset` can read a negative or 0-valued integer from a
   # SQLite database table and place it in an `int8` tensor.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetInt8NegativeAndZero(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -191,6 +206,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that `SqlDataset` can read a large (positive or negative) integer from
   # a SQLite database table and place it in an `int8` tensor.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetInt8MaxValues(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -205,6 +221,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that `SqlDataset` can read an integer from a SQLite database table and
   # place it in an `int16` tensor.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetInt16(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -218,6 +235,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that `SqlDataset` can read a negative or 0-valued integer from a
   # SQLite database table and place it in an `int16` tensor.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetInt16NegativeAndZero(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -231,6 +249,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that `SqlDataset` can read a large (positive or negative) integer from
   # a SQLite database table and place it in an `int16` tensor.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetInt16MaxValues(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -246,6 +265,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that `SqlDataset` can read an integer from a SQLite database table and
   # place it in an `int32` tensor.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetInt32(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -257,6 +277,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that `SqlDataset` can read a negative or 0-valued integer from a
   # SQLite database table and place it in an `int32` tensor.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetInt32NegativeAndZero(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -270,6 +291,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that `SqlDataset` can read a large (positive or negative) integer from
   # a SQLite database table and place it in an `int32` tensor.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetInt32MaxValues(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -285,6 +307,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that `SqlDataset` can read a numeric `varchar` from a SQLite database
   # table and place it in an `int32` tensor.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetInt32VarCharColumnAsInt(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -298,6 +321,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that `SqlDataset` can read an integer from a SQLite database table
   # and place it in an `int64` tensor.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetInt64(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -311,6 +335,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that `SqlDataset` can read a negative or 0-valued integer from a
   # SQLite database table and place it in an `int64` tensor.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetInt64NegativeAndZero(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -324,6 +349,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that `SqlDataset` can read a large (positive or negative) integer from
   # a SQLite database table and place it in an `int64` tensor.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetInt64MaxValues(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -339,6 +365,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that `SqlDataset` can read an integer from a SQLite database table and
   # place it in a `uint8` tensor.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetUInt8(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -352,6 +379,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that `SqlDataset` can read the minimum and maximum uint8 values from a
   # SQLite database table and place them in `uint8` tensors.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetUInt8MinAndMaxValues(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -367,6 +395,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that `SqlDataset` can read an integer from a SQLite database table
   # and place it in a `uint16` tensor.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetUInt16(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -380,6 +409,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that `SqlDataset` can read the minimum and maximum uint16 values from a
   # SQLite database table and place them in `uint16` tensors.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetUInt16MinAndMaxValues(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -396,6 +426,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
   # Test that `SqlDataset` can read a 0-valued and 1-valued integer from a
   # SQLite database table and place them as `True` and `False` respectively
   # in `bool` tensors.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetBool(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -409,6 +440,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that `SqlDataset` can read an integer that is not 0-valued or 1-valued
   # from a SQLite database table and place it as `True` in a `bool` tensor.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetBoolNotZeroOrOne(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -422,6 +454,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
 
   # Test that `SqlDataset` can read a float from a SQLite database table
   # and place it in a `float64` tensor.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetFloat64(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -437,6 +470,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
   # Test that `SqlDataset` can read a float from a SQLite database table beyond
   # the precision of 64-bit IEEE, without throwing an error. Test that
   # `SqlDataset` identifies such a value as equal to itself.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetFloat64OverlyPrecise(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -458,6 +492,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
   # representing the largest integer representable as a 64-bit IEEE float
   # such that the previous integer is also representable as a 64-bit IEEE float.
   # Test that `SqlDataset` can distinguish these two numbers.
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetFloat64LargestConsecutiveWholeNumbersNotEqual(self):
     get_next = self.getNext(
         self._createSqlDataset(
@@ -472,6 +507,7 @@ class SqlDatasetTest(sql_dataset_test_base.SqlDatasetTestBase):
       self.evaluate(get_next())
 
   # Test that SqlDataset can stop correctly when combined with batch
+  @combinations.generate(test_base.default_test_combinations())
   def testReadResultSetWithBatchStop(self):
     dataset = self._createSqlDataset(
         query="SELECT * FROM data", output_types=(dtypes.int32))
diff --git a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
index 4f04a0a3639..f77f2f21bf7 100644
--- a/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/stats_dataset_ops_test.py
@@ -17,6 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.data.experimental.kernel_tests import reader_dataset_ops_test_base
@@ -24,7 +25,9 @@ from tensorflow.python.data.experimental.kernel_tests import stats_dataset_test_
 from tensorflow.python.data.experimental.ops import batching
 from tensorflow.python.data.experimental.ops import stats_aggregator
 from tensorflow.python.data.experimental.ops import stats_ops
+from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
@@ -32,8 +35,11 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
-class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
+# TODO(jsimsa): Figure out why are graph tests failing.
+class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase,
+                       parameterized.TestCase):
 
+  @combinations.generate(test_base.eager_only_combinations())
   def testBytesProduced(self):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(100).map(
@@ -57,6 +63,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     self.assertStatisticsHasCount(handle, "bytes_produced", 100.0, 101)
     self.assertStatisticsHasSum(handle, "bytes_produced", expected_sum, 101)
 
+  @combinations.generate(test_base.eager_only_combinations())
   def testLatencyStats(self):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(100).apply(
@@ -76,6 +83,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     handle = self.getHandle(aggregator)
     self.assertStatisticsHasCount(handle, "record_latency", 100.0, 101)
 
+  @combinations.generate(test_base.eager_only_combinations())
   def testPrefetchBufferUtilization(self):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(100).map(
@@ -117,6 +125,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
         301,
         offset=2)
 
+  @combinations.generate(test_base.eager_only_combinations())
   def testPrefetchBufferScalars(self):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(10).map(
@@ -140,6 +149,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
+  @combinations.generate(test_base.eager_only_combinations())
   def testFilteredElementsStats(self):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(101).filter(
@@ -167,6 +177,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
         handle, self.regexForNodeName("FilterDataset", "filtered_elements"),
         34.0)
 
+  @combinations.generate(test_base.eager_only_combinations())
   def testReinitialize(self):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(100).apply(
@@ -187,6 +198,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
       self.assertStatisticsHasCount(handle, "record_latency", (j + 1) * 100.0,
                                     (j * 100) + 101)
 
+  @combinations.generate(test_base.eager_only_combinations())
   def testNoAggregatorRegistered(self):
     dataset = dataset_ops.Dataset.range(100).apply(
         stats_ops.latency_stats("record_latency"))
@@ -198,6 +210,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
+  @combinations.generate(test_base.eager_only_combinations())
   def testMultipleTags(self):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(100).apply(
@@ -221,6 +234,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
         handle, "record_latency", 100.0, 201, offset=1)
     self.assertStatisticsHasCount(handle, "record_latency_2", 100.0, 201)
 
+  @combinations.generate(test_base.eager_only_combinations())
   def testRepeatedTags(self):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(100).apply(
@@ -239,6 +253,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     handle = self.getHandle(aggregator)
     self.assertStatisticsHasCount(handle, "record_latency", 200.0, 201)
 
+  @combinations.generate(test_base.eager_only_combinations())
   def testMultipleIteratorsSameAggregator(self):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(100).apply(
@@ -259,6 +274,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     handle = self.getHandle(aggregator)
     self.assertStatisticsHasCount(handle, "record_latency", 200.0, 201)
 
+  @combinations.generate(test_base.eager_only_combinations())
   def testMultipleDatasetWithPrefixes(self):
     aggregator = stats_aggregator.StatsAggregator()
     dataset = dataset_ops.Dataset.range(100).apply(
@@ -289,6 +305,7 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
     self.assertStatisticsHasCount(handle, "dataset2::record_latency", 100.0,
                                   201)
 
+  @combinations.generate(test_base.eager_only_combinations())
   def testMultiplePrefetchStats(self):
 
     aggregator = stats_aggregator.StatsAggregator()
@@ -314,8 +331,10 @@ class StatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
       self.evaluate(next_element())
 
 
-class ThreadUtilizationStatsTest(stats_dataset_test_base.StatsDatasetTestBase):
+class ThreadUtilizationStatsTest(stats_dataset_test_base.StatsDatasetTestBase,
+                                 parameterized.TestCase):
 
+  @combinations.generate(test_base.eager_only_combinations())
   def testMapBufferUtilization(self):
 
     def dataset_fn():
@@ -326,6 +345,7 @@ class ThreadUtilizationStatsTest(stats_dataset_test_base.StatsDatasetTestBase):
     self.parallelCallsStats(
         dataset_fn, {"ParallelMapDataset"}, 10, function_processing_time=True)
 
+  @combinations.generate(test_base.eager_only_combinations())
   def testMapAutoTuneBufferUtilization(self):
 
     def dataset_fn():
@@ -336,6 +356,7 @@ class ThreadUtilizationStatsTest(stats_dataset_test_base.StatsDatasetTestBase):
     self.parallelCallsStats(
         dataset_fn, {"ParallelMapDataset"}, 10, function_processing_time=True)
 
+  @combinations.generate(test_base.eager_only_combinations())
   def testInterleaveAutoTuneBufferUtilization(self):
 
     def dataset_fn():
@@ -351,6 +372,7 @@ class ThreadUtilizationStatsTest(stats_dataset_test_base.StatsDatasetTestBase):
 
     self.parallelCallsStats(dataset_fn, {"ParallelInterleaveDatasetV2"}, 10)
 
+  @combinations.generate(test_base.eager_only_combinations())
   def testMapAndBatchAutoTuneBufferUtilization(self):
 
     def dataset_fn():
@@ -370,8 +392,10 @@ class ThreadUtilizationStatsTest(stats_dataset_test_base.StatsDatasetTestBase):
 
 class FeatureStatsDatasetTest(
     stats_dataset_test_base.StatsDatasetTestBase,
-    reader_dataset_ops_test_base.MakeBatchedFeaturesDatasetTestBase):
+    reader_dataset_ops_test_base.MakeBatchedFeaturesDatasetTestBase,
+    parameterized.TestCase):
 
+  @combinations.generate(test_base.eager_only_combinations())
   def testFeaturesStats(self):
     num_epochs = 5
     total_records = num_epochs * self._num_records
diff --git a/tensorflow/python/data/experimental/kernel_tests/take_while_test.py b/tensorflow/python/data/experimental/kernel_tests/take_while_test.py
index b2b0effb0df..959837faa24 100644
--- a/tensorflow/python/data/experimental/kernel_tests/take_while_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/take_while_test.py
@@ -23,18 +23,21 @@ import numpy as np
 from tensorflow.python.data.experimental.ops import take_while_ops
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import errors
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
-@test_util.run_all_in_graph_and_eager_modes
 class TakeWhileTest(test_base.DatasetTestBase, parameterized.TestCase):
 
-  @parameterized.parameters((14, 2), (15, 2), (100, 3))
+  @combinations.generate(
+      combinations.times(
+          test_base.default_test_combinations(),
+          combinations.combine(num_elements=[14, 15], window_size=[2]) +
+          combinations.combine(num_elements=[100], window_size=[3])))
   def testTakeWhileDataset(self, num_elements, window_size):
 
     def _predicate_func(elem):
@@ -49,8 +52,19 @@ class TakeWhileTest(test_base.DatasetTestBase, parameterized.TestCase):
     expected_num_elements = int(num_elements / window_size) * window_size
     self.assertDatasetProduces(dataset, np.arange(expected_num_elements))
 
-  @parameterized.parameters((10, 2, False), (16, 7, False), (100, 99, False),
-                            (100, 101, True), (0, 1, True))
+  @combinations.generate(
+      combinations.times(
+          test_base.default_test_combinations(),
+          combinations.combine(
+              num_elements=[10], upper_bound=[2], out_of_bounds=[False]) +
+          combinations.combine(
+              num_elements=[16], upper_bound=[7], out_of_bounds=[False]) +
+          combinations.combine(
+              num_elements=[100], upper_bound=[99], out_of_bounds=[False]) +
+          combinations.combine(
+              num_elements=[100], upper_bound=[101], out_of_bounds=[True]) +
+          combinations.combine(
+              num_elements=[0], upper_bound=[1], out_of_bounds=[True])))
   def testTakeWhileDatasetRange(self, num_elements, upper_bound, out_of_bounds):
     dataset = dataset_ops.Dataset.range(num_elements).apply(
         take_while_ops.take_while(lambda x: x < upper_bound))
@@ -62,6 +76,7 @@ class TakeWhileTest(test_base.DatasetTestBase, parameterized.TestCase):
     else:
       self.assertDatasetProduces(dataset, np.arange(upper_bound))
 
+  @combinations.generate(test_base.default_test_combinations())
   def testTakeWhileDatasetString(self):
 
     def not_equal(string):
@@ -79,7 +94,13 @@ class TakeWhileTest(test_base.DatasetTestBase, parameterized.TestCase):
     with self.assertRaises(errors.OutOfRangeError):
       self.assertEqual(b"test", self.evaluate(next_element()))
 
-  @parameterized.parameters((5, 3), (10, 0), (100, 5), (8, 7))
+  @combinations.generate(
+      combinations.times(
+          test_base.default_test_combinations(),
+          combinations.combine(size=[5], index=[3]) +
+          combinations.combine(size=[10], index=[0]) +
+          combinations.combine(size=[100], index=[5]) +
+          combinations.combine(size=[8], index=[7])))
   def testTakewhileDatasetShortCircuit(self, size, index):
 
     def _predicate_func(data_elem):
@@ -98,6 +119,7 @@ class TakeWhileTest(test_base.DatasetTestBase, parameterized.TestCase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(next_element())
 
+  @combinations.generate(test_base.default_test_combinations())
   def testTakeWhileDatasetWithRepeat(self):
     dataset = dataset_ops.Dataset.range(10).apply(
         take_while_ops.take_while(lambda x: x < 2)).repeat(5)
diff --git a/tensorflow/python/data/experimental/kernel_tests/tf_record_writer_test.py b/tensorflow/python/data/experimental/kernel_tests/tf_record_writer_test.py
index 136a446bbd8..a327fc82466 100644
--- a/tensorflow/python/data/experimental/kernel_tests/tf_record_writer_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/tf_record_writer_test.py
@@ -19,14 +19,16 @@ from __future__ import print_function
 
 import os
 
+from absl.testing import parameterized
+
 from tensorflow.python.data.experimental.ops import grouping
 from tensorflow.python.data.experimental.ops import writers
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import readers
 from tensorflow.python.eager import function
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import test_util
 from tensorflow.python.lib.io import python_io
 from tensorflow.python.lib.io import tf_record
 from tensorflow.python.ops import string_ops
@@ -34,8 +36,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
 
-@test_util.run_all_in_graph_and_eager_modes
-class TFRecordWriterTest(test_base.DatasetTestBase):
+class TFRecordWriterTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def setUp(self):
     super(TFRecordWriterTest, self).setUp()
@@ -63,11 +64,13 @@ class TFRecordWriterTest(test_base.DatasetTestBase):
   def _outputFilename(self):
     return os.path.join(self.get_temp_dir(), "tf_record.out.txt")
 
+  @combinations.generate(test_base.default_test_combinations())
   def testWrite(self):
     self.evaluate(self.writer_fn(self._createFile()))
     for i, r in enumerate(tf_record.tf_record_iterator(self._outputFilename())):
       self.assertAllEqual(self._record(i), r)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testWriteZLIB(self):
     options = tf_record.TFRecordOptions(tf_record.TFRecordCompressionType.ZLIB)
     self.evaluate(
@@ -76,6 +79,7 @@ class TFRecordWriterTest(test_base.DatasetTestBase):
         tf_record.tf_record_iterator(self._outputFilename(), options=options)):
       self.assertAllEqual(self._record(i), r)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testWriteGZIP(self):
     options = tf_record.TFRecordOptions(tf_record.TFRecordCompressionType.GZIP)
     self.evaluate(
@@ -84,20 +88,24 @@ class TFRecordWriterTest(test_base.DatasetTestBase):
         tf_record.tf_record_iterator(self._outputFilename(), options=options)):
       self.assertAllEqual(self._record(i), r)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testFailDataset(self):
     with self.assertRaises(TypeError):
       writers.TFRecordWriter(self._outputFilename(), "").write("whoops")
 
+  @combinations.generate(test_base.default_test_combinations())
   def testFailDType(self):
     input_dataset = dataset_ops.Dataset.from_tensors(10)
     with self.assertRaises(TypeError):
       writers.TFRecordWriter(self._outputFilename(), "").write(input_dataset)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testFailShape(self):
     input_dataset = dataset_ops.Dataset.from_tensors([["hello"], ["world"]])
     with self.assertRaises(TypeError):
       writers.TFRecordWriter(self._outputFilename(), "").write(input_dataset)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testSideEffect(self):
     def writer_fn():
       input_dataset = readers.TFRecordDataset(self._createFile())
@@ -112,6 +120,7 @@ class TFRecordWriterTest(test_base.DatasetTestBase):
     for i, r in enumerate(tf_record.tf_record_iterator(self._outputFilename())):
       self.assertAllEqual(self._record(i), r)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testShard(self):
     filename = self._createFile()
     dataset = readers.TFRecordDataset([filename])
diff --git a/tensorflow/python/data/experimental/kernel_tests/unique_test.py b/tensorflow/python/data/experimental/kernel_tests/unique_test.py
index 42d76a2eb30..9a51c4224ff 100644
--- a/tensorflow/python/data/experimental/kernel_tests/unique_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/unique_test.py
@@ -17,17 +17,18 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
+
 from tensorflow.python.data.experimental.ops import unique
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import test_util
 from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
 
-@test_util.run_all_in_graph_and_eager_modes
-class UniqueTest(test_base.DatasetTestBase):
+class UniqueTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   def _testSimpleHelper(self, dtype, test_cases):
     """Test the `unique()` transformation on a list of test cases.
@@ -52,7 +53,7 @@ class UniqueTest(test_base.DatasetTestBase):
           for element in expected
       ])
 
-  @test_util.run_deprecated_v1
+  @combinations.generate(test_base.graph_only_combinations())
   def testSimpleInt(self):
     for dtype in [dtypes.int32, dtypes.int64]:
       self._testSimpleHelper(dtype, [
@@ -65,7 +66,7 @@ class UniqueTest(test_base.DatasetTestBase):
           ([[1, 1], [1, 1], [2, 2], [3, 3], [1, 1]], [[1, 1], [2, 2], [3, 3]]),
       ])
 
-  @test_util.run_deprecated_v1
+  @combinations.generate(test_base.graph_only_combinations())
   def testSimpleString(self):
     self._testSimpleHelper(dtypes.string, [
         ([], []),
diff --git a/tensorflow/python/data/experimental/kernel_tests/variant_test.py b/tensorflow/python/data/experimental/kernel_tests/variant_test.py
index 6a3a1424d12..897aa223371 100644
--- a/tensorflow/python/data/experimental/kernel_tests/variant_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/variant_test.py
@@ -17,16 +17,18 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
+
 from tensorflow.python.data.experimental.ops import cardinality
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.framework import test_util
+from tensorflow.python.framework import combinations
 from tensorflow.python.platform import test
 
 
-@test_util.run_all_in_graph_and_eager_modes
-class VariantTest(test_base.DatasetTestBase):
+class VariantTest(test_base.DatasetTestBase, parameterized.TestCase):
 
+  @combinations.generate(test_base.default_test_combinations())
   def testRoundtripRange(self):
     dataset = dataset_ops.Dataset.range(10)
     variant = dataset_ops.to_variant(dataset)
@@ -35,6 +37,7 @@ class VariantTest(test_base.DatasetTestBase):
     self.assertDatasetProduces(dataset, range(10))
     self.assertEqual(self.evaluate(cardinality.cardinality(dataset)), 10)
 
+  @combinations.generate(test_base.default_test_combinations())
   def testRoundtripMap(self):
     dataset = dataset_ops.Dataset.range(10).map(lambda x: x*x)
     variant = dataset_ops.to_variant(dataset)
diff --git a/tensorflow/python/data/experimental/kernel_tests/wrap_unwrap_test.py b/tensorflow/python/data/experimental/kernel_tests/wrap_unwrap_test.py
index 09627d02994..3fd252ab3ac 100644
--- a/tensorflow/python/data/experimental/kernel_tests/wrap_unwrap_test.py
+++ b/tensorflow/python/data/experimental/kernel_tests/wrap_unwrap_test.py
@@ -17,18 +17,20 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
+
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.framework import combinations
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_dataset_ops
 from tensorflow.python.platform import test
 
 
-@test_util.run_all_in_graph_and_eager_modes
-class WrapDatasetVariantTest(test_base.DatasetTestBase):
+class WrapDatasetVariantTest(test_base.DatasetTestBase, parameterized.TestCase):
 
+  @combinations.generate(test_base.default_test_combinations())
   def testBasic(self):
     ds = dataset_ops.Dataset.range(100)
     ds_variant = ds._variant_tensor  # pylint: disable=protected-access
@@ -42,7 +44,9 @@ class WrapDatasetVariantTest(test_base.DatasetTestBase):
     for i in range(100):
       self.assertEqual(i, self.evaluate(get_next()))
 
-  @test_util.run_v1_only("b/123901304")
+  # TODO(b/123901304)
+  @combinations.generate(
+      combinations.combine(tf_api_version=[1], mode=["graph"]))
   def testSkipEagerGPU(self):
     ds = dataset_ops.Dataset.range(100)
     ds_variant = ds._variant_tensor  # pylint: disable=protected-access
diff --git a/tensorflow/python/data/experimental/ops/iterator_ops.py b/tensorflow/python/data/experimental/ops/iterator_ops.py
index 6e3fa3200ff..3324612a8b4 100644
--- a/tensorflow/python/data/experimental/ops/iterator_ops.py
+++ b/tensorflow/python/data/experimental/ops/iterator_ops.py
@@ -181,7 +181,6 @@ class CheckpointInputPipelineHook(session_run_hook.SessionRunHook):
     # `checkpoint_dir` is the same as the model checkpoint directory, there are
     # no conflicts during restore.
     self._latest_filename = "checkpoint_" + checkpoint_prefix
-    self._first_run = True
 
   def begin(self):
     # Build a Saver that saves all iterators in the `GLOBAL_ITERATORS`
@@ -196,6 +195,11 @@ class CheckpointInputPipelineHook(session_run_hook.SessionRunHook):
     # pylint: enable=protected-access
     self._checkpoint_saver_hook.begin()
 
+  def after_create_session(self, session, coord):
+    # If a new session was created, we set _first_run to True so that we can
+    # restore if needed.
+    self._first_run = True
+
   def _restore_or_save_initial_ckpt(self, session):
     # Ideally this should be run in after_create_session but is not for the
     # following reason:
diff --git a/tensorflow/python/data/experimental/ops/optimization_options.py b/tensorflow/python/data/experimental/ops/optimization_options.py
index 57cee3d0e5f..5db4db91c17 100644
--- a/tensorflow/python/data/experimental/ops/optimization_options.py
+++ b/tensorflow/python/data/experimental/ops/optimization_options.py
@@ -17,6 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import enum
+
 from tensorflow.python.data.util import options
 from tensorflow.python.util.tf_export import tf_export
 
@@ -24,6 +26,12 @@ from tensorflow.python.util.tf_export import tf_export
 _ENABLE_AUTOTUNE_BUFFERS_BY_DEFAULT = False
 
 
+class _AutotuneAlgorithm(enum.Enum):
+  """Controls what algorithm is used in the autotune implementation."""
+  HILL_CLIMB = 0
+  GRADIENT_DESCENT = 1
+
+
 @tf_export("data.experimental.MapVectorizationOptions")
 class MapVectorizationOptions(options.OptionsBase):
   """Represents options for the MapVectorization optimization."""
@@ -44,12 +52,14 @@ class MapVectorizationOptions(options.OptionsBase):
       "original segment at runtime based on their iterations speed. If None, "
       "defaults to False.")
 
-  def _static_optimizations(self):
+  def _graph_rewrites(self):
     if self.enabled:
       return ["map_vectorization"]
     return []
 
-  def _static_optimization_configs(self):
+  def _graph_rewrite_configs(self):
+    if not self.enabled:
+      return []
     if self.use_choose_fastest:
       return ["map_vectorization:use_choose_fastest:true"]
     else:
@@ -76,7 +86,7 @@ class OptimizationOptions(options.OptionsBase):
       name="apply_default_optimizations",
       ty=bool,
       docstring=
-      "Whether to apply default static optimizations. If False, only static "
+      "Whether to apply default graph optimizations. If False, only graph "
       "optimizations that have been explicitly enabled will be applied.")
 
   autotune = options.create_option(
@@ -86,13 +96,6 @@ class OptimizationOptions(options.OptionsBase):
       "Whether to automatically tune performance knobs. If None, defaults to "
       "True.")
 
-  autotune_algorithm = options.create_option(
-      name="autotune_algorithm",
-      ty=int,
-      docstring=
-      "When autotuning is enabled (through `autotune`), identifies the "
-      "algorithm to use for the autotuning optimization.")
-
   autotune_buffers = options.create_option(
       name="autotune_buffers",
       ty=bool,
@@ -183,8 +186,34 @@ class OptimizationOptions(options.OptionsBase):
       docstring="Whether to fuse shuffle and repeat transformations. If None, "
       "defaults to True.")
 
-  def _static_optimizations(self):
-    """Produces the list of enabled static optimizations."""
+  def _autotune_buffers(self):
+    if self.autotune_buffers is not None:
+      return self.autotune_buffers
+    # The default setting for autotune_buffers is based on
+    # _ENABLE_AUTOTUNE_BUFFERS_BY_DEFAULT
+    return _ENABLE_AUTOTUNE_BUFFERS_BY_DEFAULT
+
+  def _autotune_settings(self):
+    # Default autotune settings
+    autotune = True
+
+    # If autotune_buffers is enabled, we use the GRADIENT_DESCENT algorithm by
+    # default, which is more performant for tuning heterogeneous parameters.
+    algorithm = (
+        _AutotuneAlgorithm.GRADIENT_DESCENT
+        if self._autotune_buffers() else _AutotuneAlgorithm.HILL_CLIMB)
+    cpu_budget = 0  # Indicates that all CPU cores should be used by default.
+
+    # Set these options if they are explicitly set by the user.
+    if self.autotune is False:  # pylint: disable=g-bool-id-comparison
+      autotune = False
+    if self.autotune_cpu_budget is not None:
+      cpu_budget = self.autotune_cpu_budget
+
+    return autotune, algorithm, cpu_budget
+
+  def _graph_rewrites(self):
+    """Produces the list of enabled graph optimizations."""
     result = set()
     all_optimizations = [
         "filter_fusion",
@@ -215,17 +244,19 @@ class OptimizationOptions(options.OptionsBase):
           result.add(optimization)
 
     if self.map_vectorization is not None:
-      result.update(self.map_vectorization._static_optimizations())  # pylint: disable=protected-access
+      result.update(self.map_vectorization._graph_rewrites())  # pylint: disable=protected-access
 
-    # The default setting for autotune_buffers is based on
-    # _ENABLE_AUTOTUNE_BUFFERS_BY_DEFAULT
-    autotune_buffers = self.autotune_buffers or (
-        self.autotune_buffers is None and _ENABLE_AUTOTUNE_BUFFERS_BY_DEFAULT)
+    autotune_buffers = self._autotune_buffers()
     if self.autotune is not False and autotune_buffers:  # pylint: disable=g-bool-id-comparison
+      # When autotuning buffer sizes is enabled, we inject a `prefetch`
+      # transformation after asynchronous dataset ops. Only the buffer sizes of
+      # prefetch transformations will be autotuned, though this is practically
+      # equivalent to tuning the buffer sizes of the other asynchronous
+      # transformations.
       result.add("inject_prefetch")
     return sorted(list(result))
 
-  def _static_optimization_configs(self):
+  def _graph_rewrite_configs(self):
     if self.map_vectorization is not None:
-      return self.map_vectorization._static_optimization_configs()  # pylint: disable=protected-access
+      return self.map_vectorization._graph_rewrite_configs()  # pylint: disable=protected-access
     return []
diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py
index f3367023a7b..06bdfd03eb8 100644
--- a/tensorflow/python/data/ops/dataset_ops.py
+++ b/tensorflow/python/data/ops/dataset_ops.py
@@ -29,7 +29,6 @@ import numpy as np
 import six
 from six.moves import queue as Queue  # pylint: disable=redefined-builtin
 
-
 from tensorflow.core.framework import graph_pb2
 from tensorflow.python import tf2
 from tensorflow.python.compat import compat
@@ -90,17 +89,11 @@ autograph = lazy_loader.LazyLoader(
 
 ops.NotDifferentiable("ReduceDataset")
 
-
 # A constant that can be used to enable auto-tuning.
 AUTOTUNE = -1
 tf_export("data.experimental.AUTOTUNE").export_constant(__name__, "AUTOTUNE")
 
 
-class AutotuneAlgorithm(enum.Enum):
-  HILL_CLIMB = 0
-  GRADIENT_DESCENT = 1
-
-
 class ExternalStatePolicy(enum.Enum):
   WARN = 0
   IGNORE = 1
@@ -227,9 +220,9 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
         def. In that case, the state in these ops would be thrown away.
       strip_device_assignment: If true, non-local (i.e. job and task) device
         assignment is stripped from ops in the serialized graph.
-      external_state_policy: The ExternalStatePolicy enum that determines how
-        we handle input pipelines that depend on external state. By default,
-        its set to WARN.
+      external_state_policy: The ExternalStatePolicy enum that determines how we
+        handle input pipelines that depend on external state. By default, its
+        set to WARN.
 
     Returns:
       A scalar `tf.Tensor` of `tf.string` type, representing this dataset as a
@@ -355,6 +348,8 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
 
     dataset = self
     options = self.options()
+
+    # (1) Apply threading options
     if options.experimental_threading is not None:
       t_options = options.experimental_threading
       if t_options.max_intra_op_parallelism is not None:
@@ -363,36 +358,31 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
       if t_options.private_threadpool_size is not None:
         dataset = _PrivateThreadPoolDataset(dataset,
                                             t_options.private_threadpool_size)
+
+    # (2) Apply graph rewrite options
     # pylint: disable=protected-access
-    static_optimizations = options._static_optimizations()
-    static_optimization_configs = options._static_optimization_configs()
+    graph_rewrites = options._graph_rewrites()
+    graph_rewrite_configs = options._graph_rewrite_configs()
     # pylint: enable=protected-access
-    if static_optimizations:
+    if graph_rewrites:
       if self._has_captured_ref():
         warnings.warn(
-            "tf.data static optimizations are not compatible with tf.Variable. "
-            "The following optimizations will be disabled: %s. To enable "
-            "optimizations, use resource variables instead by calling "
+            "tf.data graph rewrites are not compatible with tf.Variable. "
+            "The following rewrites will be disabled: %s. To enable "
+            "rewrites, use resource variables instead by calling "
             "`tf.enable_resource_variables()` at the start of the program." %
-            ", ".join(static_optimizations))
+            ", ".join(graph_rewrites))
       else:
-        dataset = _OptimizeDataset(dataset, static_optimizations,
-                                   static_optimization_configs)
+        dataset = _OptimizeDataset(dataset, graph_rewrites,
+                                   graph_rewrite_configs)
 
-    autotune = True
-    algorithm = AutotuneAlgorithm.HILL_CLIMB
-    cpu_budget = 0  # Indicates that all CPU cores should be used.
-    if options.experimental_optimization is not None:
-      if options.experimental_optimization.autotune is False:  # pylint: disable=g-bool-id-comparison
-        autotune = False
-      if options.experimental_optimization.autotune_algorithm is not None:
-        algorithm = options.experimental_optimization.autotune_algorithm
-      if options.experimental_optimization.autotune_cpu_budget is not None:
-        cpu_budget = options.experimental_optimization.autotune_cpu_budget
+    # (3) Apply autotune options
+    autotune, algorithm, cpu_budget = options._autotune_settings()  # pylint: disable=protected-access
 
     if autotune:
       dataset = _ModelDataset(dataset, algorithm, cpu_budget)
 
+    # (4) Apply stats aggregator options
     if options.experimental_stats and options.experimental_stats.aggregator:  # pylint: disable=line-too-long
       dataset = _SetStatsAggregatorDataset(  # pylint: disable=protected-access
           dataset, options.experimental_stats.aggregator,
@@ -2600,7 +2590,7 @@ def get_legacy_output_types(dataset_or_iterator):
 class Options(options_lib.OptionsBase):
   """Represents options for tf.data.Dataset.
 
-  An `Options` object can be, for instance, used to control which static
+  An `Options` object can be, for instance, used to control which graph
   optimizations to apply or whether to use performance modeling to dynamically
   tune the parallelism of operations such as `tf.data.Dataset.map` or
   `tf.data.Dataset.interleave`.
@@ -2675,11 +2665,15 @@ class Options(options_lib.OptionsBase):
       "might be thrown away; FAIL: We fail if any state is being captured.",
       default_factory=lambda: ExternalStatePolicy.WARN)
 
-  def _static_optimizations(self):
-    """Produces the list of enabled static optimizations."""
-
+  def _graph_rewrites(self):
+    """Produces the list of enabled static graph rewrites."""
     result = []
-    result.extend(self.experimental_optimization._static_optimizations())  # pylint: disable=protected-access
+    if self.experimental_optimization is not None:
+      result.extend(self.experimental_optimization._graph_rewrites())  # pylint: disable=protected-access
+    else:
+      # Apply default options
+      result.extend(
+          optimization_options.OptimizationOptions()._graph_rewrites())  # pylint: disable=protected-access
 
     if self.experimental_deterministic is False:
       result.append("make_sloppy")
@@ -2692,12 +2686,11 @@ class Options(options_lib.OptionsBase):
       result.append("make_stateless")
     return result
 
-  def _static_optimization_configs(self):
-    """Produces the list of configurations for enabled static optimizations."""
+  def _graph_rewrite_configs(self):
+    """Produces the list of configurations for enabled graph optimizations."""
     result = []
     if self.experimental_optimization:
-      result.extend(
-          self.experimental_optimization._static_optimization_configs())  # pylint: disable=protected-access
+      result.extend(self.experimental_optimization._graph_rewrite_configs())  # pylint: disable=protected-access
 
     if self.experimental_slack:
       num_devices = self.experimental_distribute.num_devices
@@ -2706,6 +2699,13 @@ class Options(options_lib.OptionsBase):
       result.append("slack:slack_period:%d" % num_devices)
     return result
 
+  def _autotune_settings(self):
+    if self.experimental_optimization is not None:
+      return self.experimental_optimization._autotune_settings()  # pylint: disable=protected-access
+
+    # Return default autotune options
+    return optimization_options.OptimizationOptions()._autotune_settings()  # pylint: disable=protected-access
+
   def merge(self, options):
     """Merges itself with the given `tf.data.Options`.
 
@@ -4177,20 +4177,11 @@ class _ModelDataset(UnaryUnchangedStructureDataset):
 
   def __init__(self, input_dataset, algorithm, cpu_budget):
     self._input_dataset = input_dataset
-    # TODO(jsimsa): This check is introduced for forward compatibility and can
-    # be removed after 7/24/2019. At that point, all servers are expected to
-    # recognize the `algorithm` attribute.
-    if algorithm != AutotuneAlgorithm.HILL_CLIMB:
-      variant_tensor = gen_dataset_ops.model_dataset(
-          input_dataset._variant_tensor,  # pylint: disable=protected-access
-          algorithm=algorithm,
-          cpu_budget=cpu_budget,
-          **self._flat_structure)
-    else:
-      variant_tensor = gen_dataset_ops.model_dataset(
-          input_dataset._variant_tensor,  # pylint: disable=protected-access
-          cpu_budget=cpu_budget,
-          **self._flat_structure)
+    variant_tensor = gen_dataset_ops.model_dataset(
+        input_dataset._variant_tensor,  # pylint: disable=protected-access
+        algorithm=algorithm.value,
+        cpu_budget=cpu_budget,
+        **self._flat_structure)
     super(_ModelDataset, self).__init__(input_dataset, variant_tensor)
 
 
diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index 2bc35ef52af..43592e63fa8 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -726,7 +726,7 @@ cuda_py_test(
         "//tensorflow/python/keras",
     ],
     python_version = "PY3",
-    shard_count = 8,
+    shard_count = 4,
     tags = [
         "guitar",
         "multi_and_single_gpu",
diff --git a/tensorflow/python/debug/examples/v2/debug_mnist_v2.py b/tensorflow/python/debug/examples/v2/debug_mnist_v2.py
index fab52b9cfd6..9d410b36c98 100644
--- a/tensorflow/python/debug/examples/v2/debug_mnist_v2.py
+++ b/tensorflow/python/debug/examples/v2/debug_mnist_v2.py
@@ -99,8 +99,8 @@ def parse_args():
       "--dump_tensor_debug_mode",
       type=str,
       default="NO_TENSOR",
-      help="Mode for dumping tensor values. Options: NO_TENSOR, FULL_TENSOR. "
-      "This is relevant only when --dump_dir is set.")
+      help="Mode for dumping tensor values. Options: NO_TENSOR, CURT_HEALTH, "
+      "FULL_TENSOR. This is relevant only when --dump_dir is set.")
   # TODO(cais): Add more tensor debug mode strings once they are supported.
   parser.add_argument(
       "--dump_circular_buffer_size",
diff --git a/tensorflow/python/debug/lib/debug_events_writer_test.py b/tensorflow/python/debug/lib/debug_events_writer_test.py
index 86e7fd26e1a..f6e973befed 100644
--- a/tensorflow/python/debug/lib/debug_events_writer_test.py
+++ b/tensorflow/python/debug/lib/debug_events_writer_test.py
@@ -202,11 +202,11 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase):
 
     # Before FlushExecutionFiles() is called. No data should have been written
     # to the file.
-    executed_op_types, _, _, _, _ = self._readAndCheckExecutionFile()
+    executed_op_types, _, _, _, _, _ = self._readAndCheckExecutionFile()
     self.assertFalse(executed_op_types)
 
     writer.FlushExecutionFiles()
-    executed_op_types, _, _, _, _ = self._readAndCheckExecutionFile()
+    executed_op_types, _, _, _, _, _ = self._readAndCheckExecutionFile()
     for i, executed_op_type in enumerate(executed_op_types):
       self.assertEqual(
           executed_op_type,
@@ -222,7 +222,7 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase):
       writer.WriteExecution(execution)
     writer.FlushExecutionFiles()
 
-    executed_op_types, _, _, _, _ = self._readAndCheckExecutionFile()
+    executed_op_types, _, _, _, _, _ = self._readAndCheckExecutionFile()
     self.assertLen(executed_op_types, num_execution_events)
     for i, executed_op_type in enumerate(executed_op_types):
       self.assertEqual(executed_op_type, "OpType%d" % i)
@@ -302,7 +302,7 @@ class DebugEventsWriterTest(dumping_callback_test_lib.DumpingCallbackTestBase):
     writer.FlushExecutionFiles()
 
     # Verify the content of the .execution file.
-    executed_op_types, _, _, _, _ = self._readAndCheckExecutionFile()
+    executed_op_types, _, _, _, _, _ = self._readAndCheckExecutionFile()
     self.assertLen(executed_op_types, circular_buffer_size)
     self.assertLen(executed_op_types, len(set(executed_op_types)))
 
diff --git a/tensorflow/python/debug/lib/debug_v2_ops_test.py b/tensorflow/python/debug/lib/debug_v2_ops_test.py
index 08b0ec17316..c665da7132d 100644
--- a/tensorflow/python/debug/lib/debug_v2_ops_test.py
+++ b/tensorflow/python/debug/lib/debug_v2_ops_test.py
@@ -29,6 +29,7 @@ from tensorflow.python.debug.lib import dumping_callback_test_lib
 from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.framework import test_util
@@ -222,7 +223,8 @@ class DebugIdentityV2OpTest(dumping_callback_test_lib.DumpingCallbackTestBase):
           next(graph_trace_iter)
 
   @test_util.run_in_graph_and_eager_modes
-  def testDebugNumericSummaryV2OpReduceInfNanTwoSlots(self):
+  def testDebugNumericSummaryV2OpReduceInfNanThreeSlots(self):
+
     def debug_summary(x):
       return self.evaluate(gen_debug_ops.debug_numeric_summary_v2(
           x, tensor_debug_mode=(
@@ -265,6 +267,420 @@ class DebugIdentityV2OpTest(dumping_callback_test_lib.DumpingCallbackTestBase):
     self.assertAllEqual(
         debug_summary(constant_op.constant(x)), [0.0, 0.0, np.nan])
 
+  @test_util.run_in_graph_and_eager_modes
+  def testDebugNumericSummaryV2OpLargeTensorIDError(self):
+    modes = [
+        debug_event_pb2.TensorDebugMode.CURT_HEALTH,
+        debug_event_pb2.TensorDebugMode.CONCISE_HEALTH,
+        debug_event_pb2.TensorDebugMode.SHAPE,
+    ]
+    # Maximum allowed tensor_id
+    tensor_id = np.power(2, 53)
+    for mode in modes:
+      self.evaluate(
+          gen_debug_ops.debug_numeric_summary_v2(
+              constant_op.constant(42.0),
+              tensor_debug_mode=mode,
+              tensor_id=tensor_id,
+              output_dtype=dtypes.float64))
+    # Incrementing by one should error
+    tensor_id += 1
+    for mode in modes:
+      with self.assertRaises(errors.InvalidArgumentError):
+        self.evaluate(
+            gen_debug_ops.debug_numeric_summary_v2(
+                constant_op.constant(42.0),
+                tensor_debug_mode=mode,
+                tensor_id=tensor_id,
+                output_dtype=dtypes.float64))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDebugNumericSummaryV2OpCurtHealthValuesSmall(self):
+
+    def debug_summary(x):
+      return self.evaluate(
+          gen_debug_ops.debug_numeric_summary_v2(
+              x,
+              tensor_debug_mode=(debug_event_pb2.TensorDebugMode.CURT_HEALTH),
+              tensor_id=x._id,
+              output_dtype=dtypes.float64)), x._id
+
+    tensor, tensor_id = debug_summary(constant_op.constant([]))
+    self.assertAllEqual(tensor, [tensor_id, 0.0])
+
+    tensor, tensor_id = debug_summary(constant_op.constant(42.0))
+    self.assertAllEqual(tensor, [tensor_id, 0.0])
+
+    tensor, tensor_id = debug_summary(constant_op.constant([3.0, 4.0]))
+    self.assertAllEqual(tensor, [tensor_id, 0.0])
+
+    tensor, tensor_id = debug_summary(
+        constant_op.constant(np.array([3.0, -np.inf])))
+    self.assertAllEqual(tensor, [tensor_id, 1.0])
+
+    tensor, tensor_id = debug_summary(
+        constant_op.constant(np.array([[0, 0], [np.nan, 0]])))
+    self.assertAllEqual(tensor, [tensor_id, 1.0])
+
+    tensor, tensor_id = debug_summary(
+        constant_op.constant(np.array([[0, 0], [np.nan, np.inf]])))
+    self.assertAllEqual(tensor, [tensor_id, 1.0])
+
+    tensor, tensor_id = debug_summary(
+        constant_op.constant(np.array([[0, np.inf], [np.nan, -np.inf]])))
+    self.assertAllEqual(tensor, [tensor_id, 1.0])
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDebugNumericSummaryV2OpCurtHealthValuesLarge(self):
+
+    def debug_summary(x):
+      return self.evaluate(
+          gen_debug_ops.debug_numeric_summary_v2(
+              x,
+              tensor_debug_mode=(debug_event_pb2.TensorDebugMode.CURT_HEALTH),
+              tensor_id=x._id,
+              output_dtype=dtypes.float64)), x._id
+
+    x = np.zeros([100, 100], dtype=np.float16)
+    x[32, 47] = np.nan
+    tensor, tensor_id = debug_summary(constant_op.constant(x))
+    self.assertAllEqual(tensor, [tensor_id, 1.0])
+    x = np.zeros([97, 97], dtype=np.float32)
+    x[50, 83] = -np.inf
+    tensor, tensor_id = debug_summary(constant_op.constant(x))
+    self.assertAllEqual(tensor, [tensor_id, 1.0])
+    x[1, 41] = np.nan
+    tensor, tensor_id = debug_summary(constant_op.constant(x))
+    self.assertAllEqual(tensor, [tensor_id, 1.0])
+    x = np.zeros([9701], dtype=np.float64)
+    x[9700] = np.nan
+    tensor, tensor_id = debug_summary(constant_op.constant(x))
+    self.assertAllEqual(tensor, [tensor_id, 1.0])
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDebugNumericSummaryV2OpCurtHealthConsistency(self):
+
+    def debug_summary(x):
+      return self.evaluate(
+          gen_debug_ops.debug_numeric_summary_v2(
+              x,
+              tensor_debug_mode=(debug_event_pb2.TensorDebugMode.CURT_HEALTH),
+              tensor_id=x._id,
+              output_dtype=dtypes.float64)), x._id
+
+    x = np.zeros([100, 100], dtype=np.float16)
+    x[43, 99] = np.nan
+    c = constant_op.constant(x)
+    tensor_1, tensor_id_1 = debug_summary(c)
+    tensor_2, tensor_id_2 = debug_summary(c)
+    self.assertAllEqual(tensor_1, tensor_2)
+    self.assertEqual(tensor_id_1, tensor_id_2)
+
+    x = np.zeros([100, 100, 50], dtype=np.float64)
+    x[0, 0, 1] = np.inf
+    c = constant_op.constant(x)
+    tensor_1, tensor_id_1 = debug_summary(c)
+    tensor_2, tensor_id_2 = debug_summary(c)
+    self.assertAllEqual(tensor_1, tensor_2)
+    self.assertEqual(tensor_id_1, tensor_id_2)
+
+    c = constant_op.constant(np.ones((100, 200), np.double))
+    tensor_1, tensor_id_1 = debug_summary(c)
+    tensor_2, tensor_id_2 = debug_summary(c)
+    self.assertAllEqual(tensor_1, tensor_2)
+    self.assertEqual(tensor_id_1, tensor_id_2)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDebugNumericSummaryV2OpConciseHealthSmall(self):
+
+    def debug_summary(x):
+      return self.evaluate(
+          gen_debug_ops.debug_numeric_summary_v2(
+              x,
+              tensor_debug_mode=(
+                  debug_event_pb2.TensorDebugMode.CONCISE_HEALTH),
+              tensor_id=x._id,
+              output_dtype=dtypes.float64)), x._id
+
+    tensor, tensor_id = debug_summary(constant_op.constant([]))
+    self.assertAllEqual(tensor, [tensor_id, 0.0, 0.0, 0.0, 0.0])
+
+    tensor, tensor_id = debug_summary(constant_op.constant(42.0))
+    self.assertAllEqual(tensor, [tensor_id, 1.0, 0.0, 0.0, 0.0])
+
+    tensor, tensor_id = debug_summary(constant_op.constant([3.0, 4.0]))
+    self.assertAllEqual(tensor, [tensor_id, 2.0, 0.0, 0.0, 0.0])
+
+    tensor, tensor_id = debug_summary(
+        constant_op.constant(np.array([3.0, -np.inf])))
+    self.assertAllEqual(tensor, [tensor_id, 2.0, 1.0, 0.0, 0.0])
+
+    tensor, tensor_id = debug_summary(
+        constant_op.constant(np.array([[0, 0], [np.nan, 0]])))
+    self.assertAllEqual(tensor, [tensor_id, 4.0, 0.0, 0.0, 1.0])
+
+    tensor, tensor_id = debug_summary(
+        constant_op.constant(np.array([[0, 0], [np.nan, np.inf]])))
+    self.assertAllEqual(tensor, [tensor_id, 4.0, 0.0, 1.0, 1.0])
+
+    tensor, tensor_id = debug_summary(
+        constant_op.constant(np.array([[0, np.inf], [np.nan, -np.inf]])))
+    self.assertAllEqual(tensor, [tensor_id, 4.0, 1.0, 1.0, 1.0])
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDebugNumericSummaryV2OpConciseHealthLarge(self):
+
+    def debug_summary(x):
+      return self.evaluate(
+          gen_debug_ops.debug_numeric_summary_v2(
+              x,
+              tensor_debug_mode=(
+                  debug_event_pb2.TensorDebugMode.CONCISE_HEALTH),
+              tensor_id=x._id,
+              output_dtype=dtypes.float64)), x._id
+
+    x = np.zeros([100, 100], dtype=np.float16)
+    x[32, :] = np.nan
+    tensor, tensor_id = debug_summary(constant_op.constant(x))
+    self.assertAllEqual(tensor, [tensor_id, 10000.0, 0.0, 0.0, 100.0])
+    x = np.zeros([97, 97], dtype=np.float32)
+    x[50, 83:85] = -np.inf
+    tensor, tensor_id = debug_summary(constant_op.constant(x))
+    self.assertAllEqual(tensor, [tensor_id, 97 * 97, 2.0, 0.0, 0.0])
+    x[1:9, 41] = np.nan
+    tensor, tensor_id = debug_summary(constant_op.constant(x))
+    self.assertAllEqual(tensor, [tensor_id, 97 * 97, 2.0, 0.0, 8.0])
+    x = np.zeros([9701], dtype=np.float64)
+    x[9700] = np.nan
+    tensor, tensor_id = debug_summary(constant_op.constant(x))
+    self.assertAllEqual(tensor, [tensor_id, 9701, 0.0, 0.0, 1.0])
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDebugNumericSummaryV2OpConciseHealthConsistency(self):
+
+    def debug_summary(x):
+      return self.evaluate(
+          gen_debug_ops.debug_numeric_summary_v2(
+              x,
+              tensor_debug_mode=(
+                  debug_event_pb2.TensorDebugMode.CONCISE_HEALTH),
+              tensor_id=x._id,
+              output_dtype=dtypes.float64)), x._id
+
+    # Assert the same op is returns a consistent value
+    x = np.zeros([100, 100], dtype=np.float16)
+    x[3, 4] = -np.inf
+    c = constant_op.constant(x)
+    tensor_1, tensor_id_1 = debug_summary(c)
+    tensor_2, tensor_id_2 = debug_summary(c)
+    self.assertAllEqual(tensor_1, tensor_2)
+    self.assertEqual(tensor_id_1, tensor_id_2)
+
+    c = constant_op.constant(np.ones((100, 200), np.double))
+    tensor_1, tensor_id_1 = debug_summary(c)
+    tensor_2, tensor_id_2 = debug_summary(c)
+    self.assertAllEqual(tensor_1, tensor_2)
+    self.assertEqual(tensor_id_1, tensor_id_2)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDebugNumericSummaryV2OpShapeEmpty(self):
+
+    def debug_summary(x):
+      return self.evaluate(
+          gen_debug_ops.debug_numeric_summary_v2(
+              x,
+              tensor_debug_mode=(debug_event_pb2.TensorDebugMode.SHAPE),
+              tensor_id=x._id,
+              output_dtype=dtypes.float64)), x._id
+
+    tensor, tensor_id = debug_summary(constant_op.constant(0.0))
+    self.assertAllEqual(
+        tensor, [tensor_id, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDebugNumericSummaryV2OpShapeSmall(self):
+
+    def debug_summary(x):
+      return self.evaluate(
+          gen_debug_ops.debug_numeric_summary_v2(
+              x,
+              tensor_debug_mode=(debug_event_pb2.TensorDebugMode.SHAPE),
+              tensor_id=x._id,
+              output_dtype=dtypes.float64)), x._id
+
+    x = np.zeros([3, 4], dtype=np.float32)
+    tensor, tensor_id = debug_summary(constant_op.constant(x))
+    self.assertAllEqual(
+        tensor, [tensor_id, 1.0, 2.0, 12.0, 3.0, 4.0, 0.0, 0.0, 0.0, 0.0])
+
+    x = np.ones([1, 2, 3, 4, 5, 6], dtype=np.float16)
+    x[0, 1, 2, 2, 2, 2] = np.nan
+    tensor, tensor_id = debug_summary(constant_op.constant(x))
+    self.assertAllEqual(
+        tensor,
+        [tensor_id, 19, 6.0, 2 * 3 * 4 * 5 * 6, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
+
+    x = np.zeros([2], dtype=np.float32)
+    tensor, tensor_id = debug_summary(constant_op.constant(x))
+    self.assertAllEqual(
+        tensor, [tensor_id, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0])
+
+    tensor, tensor_id = debug_summary(constant_op.constant([]))
+    self.assertAllEqual(
+        tensor, [tensor_id, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDebugNumericSummaryV2OpShapeLarge(self):
+
+    def debug_summary(x):
+      return self.evaluate(
+          gen_debug_ops.debug_numeric_summary_v2(
+              x,
+              tensor_debug_mode=(debug_event_pb2.TensorDebugMode.SHAPE),
+              tensor_id=x._id,
+              output_dtype=dtypes.float64)), x._id
+
+    x = np.ones([1, 2, 3, 4, 5, 6, 7], dtype=np.double)
+    tensor, tensor_id = debug_summary(constant_op.constant(x))
+    self.assertAllEqual(tensor, [
+        tensor_id, 2.0, 7.0, 2 * 3 * 4 * 5 * 6 * 7, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0
+    ])
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDebugNumericSummaryV2OpFullHealthSmall(self):
+
+    def debug_summary(x):
+      return self.evaluate(
+          gen_debug_ops.debug_numeric_summary_v2(
+              x,
+              tensor_debug_mode=(debug_event_pb2.TensorDebugMode.FULL_HEALTH),
+              tensor_id=x._id,
+              output_dtype=dtypes.float64)), x._id
+
+    tensor, tensor_id = debug_summary(constant_op.constant([]))
+    expected = [tensor_id, -1, 1, 1, 0, 0, 0, 0, 0, 0, 0]
+    self.assertAllEqual(tensor, expected)
+
+    tensor, tensor_id = debug_summary(constant_op.constant(42.0))
+    expected = [tensor_id, -1, 1, 0, 1, 0, 0, 0, 0, 0, 1]
+    self.assertAllEqual(tensor, expected)
+
+    tensor, tensor_id = debug_summary(constant_op.constant([3.0, 4.0]))
+    expected = [tensor_id, -1, 1, 1, 2, 0, 0, 0, 0, 0, 2]
+    self.assertAllEqual(tensor, expected)
+
+    tensor, tensor_id = debug_summary(
+        constant_op.constant(np.array([3, -np.inf], dtype=np.float32)))
+    expected = [tensor_id, -1, 1, 1, 2, 1, 0, 0, 0, 0, 1]
+    self.assertAllEqual(tensor, expected)
+
+    tensor, tensor_id = debug_summary(
+        constant_op.constant(np.array([[0, 0], [np.nan, 0]], dtype=np.float64)))
+    expected = [tensor_id, -1, 2, 2, 4, 0, 0, 1, 0, 3, 0]
+    self.assertAllEqual(tensor, expected)
+
+    tensor, tensor_id = debug_summary(
+        constant_op.constant(
+            np.array([[0, 0], [np.nan, np.inf]], dtype=np.float16)))
+    expected = [tensor_id, -1, 19, 2, 4, 0, 1, 1, 0, 2, 0]
+    self.assertAllEqual(tensor, expected)
+
+    tensor, tensor_id = debug_summary(
+        constant_op.constant(
+            np.array([[0, np.inf], [np.nan, -np.inf]], dtype=np.float32)))
+    expected = [tensor_id, -1, 1, 2, 4, 1, 1, 1, 0, 1, 0]
+    self.assertAllEqual(tensor, expected)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDebugNumericSummaryV2OpFullHealthLarge(self):
+
+    def debug_summary(x):
+      return self.evaluate(
+          gen_debug_ops.debug_numeric_summary_v2(
+              x,
+              tensor_debug_mode=(debug_event_pb2.TensorDebugMode.FULL_HEALTH),
+              tensor_id=x._id,
+              output_dtype=dtypes.float64)), x._id
+
+    def tensor_counts(arr):
+      counts = [len(np.shape(arr)), np.size(arr), 0, 0, 0, 0, 0, 0]
+      for n in np.ravel(arr):
+        if np.isneginf(n):
+          counts[2] += 1
+        elif np.isposinf(n):
+          counts[3] += 1
+        elif np.isnan(n):
+          counts[4] += 1
+        elif n < 0.:
+          counts[5] += 1
+        elif n == 0.:
+          counts[6] += 1
+        else:
+          counts[7] += 1
+      return counts
+
+    x = np.zeros([50, 50], dtype=np.float16)
+    x[32, 47] = np.nan
+    x[0:4, 3] = np.inf
+    x[40:50, 40:50] = 10
+    x[3, 20] = -10
+    tensor, tensor_id = debug_summary(constant_op.constant(x))
+    expected = [tensor_id, -1, 19] + tensor_counts(x)
+    self.assertAllEqual(tensor, expected)
+
+    x = np.ones([25, 25, 50], dtype=np.float32) * np.inf
+    x[:, :, 1] = np.nan
+    x[:, :, 2] = -np.inf
+    x[:, :, 3] = -1
+    x[:, :, 4] = 0
+    x[:, :, 5] = 1
+    tensor, tensor_id = debug_summary(constant_op.constant(x))
+    expected = [tensor_id, -1, 1] + tensor_counts(x)
+    self.assertAllEqual(tensor, expected)
+    x[0, 0, 0] = np.nan
+    tensor, tensor_id = debug_summary(constant_op.constant(x))
+    expected = [tensor_id, -1, 1,] + tensor_counts(x)
+    self.assertAllEqual(tensor, expected)
+    x = np.zeros([9701], dtype=np.float64)
+    x[9700] = np.nan
+    tensor, tensor_id = debug_summary(constant_op.constant(x))
+    expected = [tensor_id, -1, 2] + tensor_counts(x)
+    self.assertAllEqual(tensor, expected)
+
+  @test_util.run_in_graph_and_eager_modes
+  def testDebugNumericSummaryV2OpFullHealthConsistency(self):
+
+    def debug_summary(x):
+      return self.evaluate(
+          gen_debug_ops.debug_numeric_summary_v2(
+              x,
+              tensor_debug_mode=(debug_event_pb2.TensorDebugMode.FULL_HEALTH),
+              tensor_id=x._id,
+              output_dtype=dtypes.float64)), x._id
+
+    # Assert the same op is returns a consistant value
+    x = np.zeros([100, 100], dtype=np.float16)
+    x[32, 47] = np.nan
+    x[0:4, 3] = np.inf
+    x[90:100, 90:100] = 10
+    x[3, 20] = -10
+    c = constant_op.constant(x)
+    tensor_1, tensor_id_1 = debug_summary(c)
+    tensor_2, tensor_id_2 = debug_summary(c)
+    self.assertAllEqual(tensor_1, tensor_2)
+    self.assertEqual(tensor_id_1, tensor_id_2)
+
+    x = np.ones((100, 200, 3, 10), np.double)
+    x[1, 30, 2] = 10
+    x[5, :, 0, 1] = np.nan
+    x[90:100, 150, :, :] = np.inf
+    c = constant_op.constant(x)
+    tensor_1, tensor_id_1 = debug_summary(c)
+    tensor_2, tensor_id_2 = debug_summary(c)
+    self.assertAllEqual(tensor_1, tensor_2)
+    self.assertEqual(tensor_id_1, tensor_id_2)
+
+
 if __name__ == "__main__":
   ops.enable_eager_execution()
   googletest.main()
diff --git a/tensorflow/python/debug/lib/distributed_callbacks_test.py b/tensorflow/python/debug/lib/distributed_callbacks_test.py
index e1ff0f823c3..7ae555c285a 100644
--- a/tensorflow/python/debug/lib/distributed_callbacks_test.py
+++ b/tensorflow/python/debug/lib/distributed_callbacks_test.py
@@ -266,7 +266,7 @@ class DistributedDumpingCallbackTest(
      tensor_values) = self._readAndCheckGraphExecutionTracesFile(context_ids)
 
     # Eager execution of tf.function should be recorded.
-    executed_op_types, _, _, _, _ = self._readAndCheckExecutionFile()
+    executed_op_types, _, _, _, _, _ = self._readAndCheckExecutionFile()
     fit_functions = [op_type for op_type in executed_op_types
                      if "_distributed_function" in op_type]
     self.assertLen(fit_functions, epochs)
diff --git a/tensorflow/python/debug/lib/dumping_callback.py b/tensorflow/python/debug/lib/dumping_callback.py
index adb924aefaa..b0cac891dde 100644
--- a/tensorflow/python/debug/lib/dumping_callback.py
+++ b/tensorflow/python/debug/lib/dumping_callback.py
@@ -23,14 +23,17 @@ import re
 import socket
 import threading
 import uuid
+import weakref
 
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
+from tensorflow.core.framework import tensor_pb2
 from tensorflow.core.protobuf import debug_event_pb2
 from tensorflow.core.protobuf import graph_debug_info_pb2
 from tensorflow.python.debug.lib import debug_events_writer
 from tensorflow.python.debug.lib import op_callbacks_common
 from tensorflow.python.debug.lib import source_utils
+from tensorflow.python.eager import function as function_lib
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import op_callbacks
@@ -59,6 +62,10 @@ def _get_id():
   return str(uuid.uuid4())
 
 
+def _concrete_tensor_to_proto(tensor):
+  return tensor_util.make_tensor_proto(tensor.numpy())
+
+
 class _DumpingCallback(object):
   """An object holding the states surrouding the dumping callback."""
 
@@ -81,14 +88,35 @@ class _DumpingCallback(object):
     self._stack_frame_to_id = dict()
     # Mapping op context to unique ID.
     self._context_to_id = dict()
+    self._function_weakref_to_graph_id = dict()
+    # pylint:disable=protected-access
+    self._function_prefixes = (
+        compat.as_bytes(function_lib._FORWARD_PREFIX),
+        compat.as_bytes(function_lib._BACKWARD_PREFIX),
+        compat.as_bytes(function_lib._INFERENCE_PREFIX))
+    # pylint:enable=protected-access
+    self._op_type_to_context_id = dict()
     # Keeps track of counter for symbolic tensors output by in-graph ops.
     self._symbolic_tensor_counter = 0
     self._source_file_paths_lock = threading.Lock()
     self._stack_frame_to_id_lock = threading.Lock()
-    self._context_to_id_lock = threading.Lock()
+    self._context_lock = threading.Lock()
     self._symbolic_tensor_counter_lock = threading.Lock()
     self._writer = None
 
+  def function_callback(self, function):
+    """A callback to be called on creation of Functions.
+
+    Used to establish a join between function name and graph (context) ID.
+
+    Args:
+      function: The just-created Function.
+    """
+    function_weakref = weakref.ref(function)
+    graph_id = self._get_context_id(function.graph)
+    with self._context_lock:
+      self._function_weakref_to_graph_id[function_weakref] = graph_id
+
   @property
   def dump_root(self):
     return self._dump_root
@@ -133,7 +161,7 @@ class _DumpingCallback(object):
     if context in self._context_to_id:  # 1st check, without lock.
       return self._context_to_id[context]
     graph_is_new = False
-    with self._context_to_id_lock:
+    with self._context_lock:
       if context not in self._context_to_id:  # 2nd check, with lock.
         graph_is_new = True
         context_id = _get_id()
@@ -253,7 +281,6 @@ class _DumpingCallback(object):
       automatic control dependencies (see `auto_control_deps.py`) instead of
       tensor overriding.
     """
-    del tensor_ids  # Unused currently.
     # TODO(b/144441464, b/144440920, b/144440922): Make use of it.
 
     tensor_debug_mode = self._tensor_debug_mode
@@ -270,7 +297,6 @@ class _DumpingCallback(object):
             instrumented_tensors.append(tensor)
           continue
         if is_v1_graph_mode and not tensor.dtype.is_numpy_compatible:
-
           instrumented_tensors.append(tensor)
           continue
         # Except in V1 graph mode + control flow, debug_identity_v2 trigger auto
@@ -294,6 +320,30 @@ class _DumpingCallback(object):
               debug_tensor.op)
           instrumented_tensors.append(identity)
       return instrumented_tensors
+    elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.CURT_HEALTH:
+      for output_slot, tensor in enumerate(tensors):
+        if (not self._should_dump_tensor(op_type, tensor.dtype) or
+            not tensor.dtype.is_floating):
+          if is_v1_graph_mode:
+            instrumented_tensors.append(tensor)
+          continue
+        debug_tensor = gen_debug_ops.debug_identity_v2(
+            gen_debug_ops.debug_numeric_summary_v2(
+                tensor,
+                tensor_id=tensor_ids[output_slot],
+                tensor_debug_mode=self._tensor_debug_mode,
+                output_dtype=dtypes.float64),
+            tfdbg_context_id=tfdbg_context_id,
+            op_name=op_name,
+            output_slot=output_slot,
+            tensor_debug_mode=self._tensor_debug_mode,
+            debug_urls=debug_urls)
+        if is_v1_graph_mode:
+          identity = array_ops.identity(tensor)
+          identity.op._add_control_input(  # pylint: disable=protected-access
+              debug_tensor.op)
+          instrumented_tensors.append(identity)
+      return instrumented_tensors
     elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR:
       for output_slot, tensor in enumerate(tensors):
         if (not self._should_dump_tensor(op_type, tensor.dtype) or
@@ -318,7 +368,11 @@ class _DumpingCallback(object):
           "Symbolic tensor instrumentation is not implemented for debug mode "
           "%s" % self._tensor_debug_mode)
 
-  def _dump_eager_tensors(self, tensors, op_type, input_tensor_ids):
+  def _dump_eager_tensors(self,
+                          tensors,
+                          op_type,
+                          input_tensor_ids,
+                          graph_id=None):
     """Dump the value of eager tensors.
 
     The destination of the dumping is determined by the dump_root of the
@@ -332,6 +386,8 @@ class _DumpingCallback(object):
         value transform.
       op_type: Type of the op that generates the tensors, as a string.
       input_tensor_ids: IDs of the input EagerTensors to the op.
+      graph_id: ID of the executed graph, applicable only to eager execution of
+        a FuncGraph.
 
     Returns:
       A tfdbg Execution protocol buffer.
@@ -342,15 +398,18 @@ class _DumpingCallback(object):
     if tensor_debug_mode == debug_event_pb2.TensorDebugMode.NO_TENSOR:
       return debug_event_pb2.Execution(
           op_type=op_type,
+          graph_id=graph_id,
           num_outputs=len(tensors),
           input_tensor_ids=input_tensor_ids,
           output_tensor_ids=output_tensor_ids,
           tensor_debug_mode=tensor_debug_mode,
           code_location=self._process_stack_frames())
-    elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR:
+    elif tensor_debug_mode in (debug_event_pb2.TensorDebugMode.CURT_HEALTH,
+                               debug_event_pb2.TensorDebugMode.FULL_TENSOR):
       execution_proto = debug_event_pb2.Execution(
           op_type=op_type,
           num_outputs=len(tensors),
+          graph_id=graph_id,
           input_tensor_ids=input_tensor_ids,
           output_tensor_ids=output_tensor_ids,
           tensor_debug_mode=tensor_debug_mode,
@@ -358,8 +417,20 @@ class _DumpingCallback(object):
       for tensor in tensors:
         if (self._should_dump_tensor(op_type, tensor.dtype) and
             tensor.dtype.is_numpy_compatible):
-          execution_proto.tensor_protos.append(
-              tensor_util.make_tensor_proto(tensor.numpy()))
+          if tensor_debug_mode == debug_event_pb2.TensorDebugMode.CURT_HEALTH:
+            if tensor.dtype.is_floating:
+              tensor_proto = _concrete_tensor_to_proto(
+                  gen_debug_ops.debug_numeric_summary_v2(
+                      tensor,
+                      tensor_debug_mode=tensor_debug_mode,
+                      output_dtype=dtypes.float64))
+            else:
+              # A placeholder for non-floating-type output tensors.
+              tensor_proto = tensor_pb2.TensorProto()
+          elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR:
+            tensor_proto = _concrete_tensor_to_proto(tensor)
+          if tensor_proto:
+            execution_proto.tensor_protos.append(tensor_proto)
       return execution_proto
     else:
       raise NotImplementedError(
@@ -396,9 +467,49 @@ class _DumpingCallback(object):
         return self._instrument_symbolic_tensors(
             outputs, op_type, op_name, context_id, output_tensor_ids)
     else:
+      if compat.as_bytes(op_type) == b"DebugNumericSummaryV2":
+        # TODO(b/140334369): Remove this special casing logic once op_callback.
+        # automatically prevents infinite recursion in eager mode.
+        return None
+      context_id = self._func_graph_id_from_func_name(op_type)
       input_ids = [t._id for t in inputs]  # pylint:disable=protected-access
-      writer.WriteExecution(
-          self._dump_eager_tensors(outputs, op_type, input_ids))
+      writer.WriteExecution(self._dump_eager_tensors(
+          outputs, op_type, input_ids, graph_id=context_id))
+
+  def _func_graph_id_from_func_name(self, op_type):
+    """Attempt to get the ID of a FuncGraph based on an op type name.
+
+    Also caches the ID for faster access later.
+
+    Args:
+      op_type: Op type string, which may be the name of a function.
+
+    Returns:
+      If the op_type name does not fit the pattern of a function name (e.g.,
+      one that starts with "__inference_"), `None` is returned immediately.
+      Else, if the FuncGraph is found, ID of the underlying FuncGraph is
+      returned as a string.
+      Else, `None` is returned.
+    """
+    op_type = compat.as_bytes(op_type)
+    if op_type.startswith(self._function_prefixes):
+      # op_type for eagerly-executed FuncGraphs have the prefixed and suffixed
+      # form such as "__inference_my_function_13579", wherein the middle part
+      # "my_function" is the name of the Python function from which the
+      # FuncGraph is compiled. Due to the suffix, the op_type is unique for
+      # - duplicate Python function names
+      # - multiple compilation of the same Python function
+      if op_type in self._op_type_to_context_id:
+        return self._op_type_to_context_id[op_type]
+      with self._context_lock:
+        for function_weakref in self._function_weakref_to_graph_id:
+          if function_weakref().name == op_type:
+            graph_id = self._function_weakref_to_graph_id[function_weakref]
+            self._op_type_to_context_id[op_type] = graph_id
+            return graph_id
+      return None
+    else:
+      return None
 
   def _get_symbolic_tensor_ids(self, num_tensors):
     tensor_ids = []
@@ -538,10 +649,12 @@ def enable_dump_debug_info(dump_root,
 
   tensor_debug_mode = debug_event_pb2.TensorDebugMode.Value(tensor_debug_mode)
   if tensor_debug_mode not in (debug_event_pb2.TensorDebugMode.NO_TENSOR,
+                               debug_event_pb2.TensorDebugMode.CURT_HEALTH,
                                debug_event_pb2.TensorDebugMode.FULL_TENSOR):
     raise NotImplementedError(
         "tfdbg dumping: support for tensor debug mode %s is not "
-        "implemented yet" % tensor_debug_mode)
+        "implemented yet" %
+        debug_event_pb2.TensorDebugMode.Name(tensor_debug_mode))
 
   # Validate the types of tensor_dtypes.
   if tensor_dtypes is not None:
@@ -578,6 +691,8 @@ def enable_dump_debug_info(dump_root,
                                                op_regex,
                                                tensor_dtypes)
     op_callbacks.add_op_callback(_state.dumping_callback.callback)
+    function_lib.add_function_callback(
+        _state.dumping_callback.function_callback)
 
   if _state.dumping_callback.dump_root != dump_root:
     _state.dumping_callback.dump_root = dump_root
@@ -605,6 +720,8 @@ def disable_dump_debug_info():
     dump_root = _state.dumping_callback.dump_root
     debug_events_writer.DebugEventsWriter(dump_root).Close()
     op_callbacks.remove_op_callback(_state.dumping_callback.callback)
+    function_lib.remove_function_callback(
+        _state.dumping_callback.function_callback)
     delattr(_state, "dumping_callback")
     logging.info("Disabled dumping callback in thread %s (dump root: %s)",
                  threading.current_thread().name, dump_root)
diff --git a/tensorflow/python/debug/lib/dumping_callback_test.py b/tensorflow/python/debug/lib/dumping_callback_test.py
index d32d543b382..9400610b946 100644
--- a/tensorflow/python/debug/lib/dumping_callback_test.py
+++ b/tensorflow/python/debug/lib/dumping_callback_test.py
@@ -87,10 +87,11 @@ class TracingCallbackTest(
 
   @parameterized.named_parameters(
       ("NoTensor", "NO_TENSOR"),
+      ("CurtHealth", "CURT_HEALTH"),
       ("FullTensor", "FULL_TENSOR"),
   )
   def testPureEagerOpExecution(self, tensor_debug_mode):
-    """Test catching Infinity in eager op execution: float32."""
+    """Test dumping data from eager op execution: float32."""
     writer = dumping_callback.enable_dump_debug_info(
         self.dump_root, tensor_debug_mode=tensor_debug_mode)
 
@@ -129,12 +130,22 @@ class TracingCallbackTest(
         prev_wall_time = debug_event.wall_time
         execution = debug_event.execution
         executed_op_types.append(execution.op_type)
+        # No graph IDs should have been logged for eager op executions.
+        self.assertFalse(execution.graph_id)
         self.assertTrue(execution.input_tensor_ids)
         self.assertTrue(execution.output_tensor_ids)
         if tensor_debug_mode == "NO_TENSOR":
           # Due to the NO_TENSOR tensor debug mode, tensor_protos ought to
           # be empty.
           self.assertFalse(execution.tensor_protos)
+        elif tensor_debug_mode == "CURT_HEALTH":
+          self.assertLen(execution.tensor_protos, 1)
+          if execution.op_type in ("AddV2", "Mul", "RealDiv"):
+            # 1st element: -1 is the unset tensor_id for eager op execution.
+            # 2nd element: 0 means there is no inf or nan.
+            self.assertAllClose(
+                tensor_util.MakeNdarray(execution.tensor_protos[0]),
+                [-1.0, 0.0])
         elif tensor_debug_mode == "FULL_TENSOR":
           # Under the FULL_TENSOR mode, the value of the tensor should be
           # available through `tensor_protos`.
@@ -193,6 +204,7 @@ class TracingCallbackTest(
 
   @parameterized.named_parameters(
       ("NoTensor", "NO_TENSOR"),
+      ("CurtHealth", "CURT_HEALTH"),
       ("FullTensor", "FULL_TENSOR"),
   )
   @test_util.run_in_graph_and_eager_modes
@@ -218,17 +230,31 @@ class TracingCallbackTest(
       # NOTE(b/142486213): Execution of the TF function happens with
       # Session.run() in v1 graph mode, so doesn't get logged to the
       # .execution file.
-      executed_op_types, _, _, _, _ = self._readAndCheckExecutionFile()
+      (executed_op_types, executed_graph_ids,
+       _, _, _, _) = self._readAndCheckExecutionFile()
       executed_op_types = [op_type for op_type in executed_op_types
                            if "sin1p_log_sum" in op_type]
       self.assertLen(executed_op_types, 1)
 
     stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
-    (context_ids, op_types,
-     op_name_to_op_type, _) = self._readAndCheckGraphsFile(stack_frame_by_id)
+    (context_ids, op_types, op_name_to_op_type,
+     op_name_to_context_id) = self._readAndCheckGraphsFile(stack_frame_by_id)
+
     self.assertIn("AddV2", op_types)
     self.assertIn("Log", op_types)
     self.assertIn("Sin", op_types)
+    if context.executing_eagerly():
+      # Check the correctness of the ID of the executed graph ID.
+      sin_op_name = [op_name for op_name in op_name_to_op_type
+                     if op_name_to_op_type[op_name] == "Sin"]
+      self.assertLen(sin_op_name, 1)
+      sin_context_id = op_name_to_context_id[sin_op_name[0]]
+      # The executed "op" is a FuncGraph, and its graph ID should have been
+      # recorded properly and be the ID of the graph that the Sin op belongs to.
+      executed_graph_ids = [
+          executed_graph_ids[i] for i, op_type
+          in enumerate(executed_op_types) if "sin1p_log_sum" in op_type]
+      self.assertEqual(executed_graph_ids[0], sin_context_id)
 
     (op_names, _, _,
      tensor_values) = self._readAndCheckGraphExecutionTracesFile(context_ids)
@@ -241,6 +267,15 @@ class TracingCallbackTest(
       for tensor_value in tensor_values:
         self.assertEqual(tensor_value.dtype, np.float32)
         self.assertEqual(tensor_value.shape, (0,))
+    elif tensor_debug_mode == "CURT_HEALTH":
+      for tensor_value in tensor_values:
+        self.assertLen(tensor_value, 2)
+        # 1st element: tensor_id, should be >= 0.
+        # TODO(cais): Assert on detailed value once Function-graph association
+        # is in place.
+        self.assertGreaterEqual(tensor_value[0], 0)
+        # 2nd element: 0 means there is no inf or nan.
+        self.assertEqual(tensor_value[1], 0)
     elif tensor_debug_mode == "FULL_TENSOR":
       self.assertAllClose(tensor_values[0], 5.0)  # 1st AddV2 op.
       self.assertAllClose(tensor_values[1], np.log(5.0))  # Log op.
@@ -248,6 +283,72 @@ class TracingCallbackTest(
       self.assertAllClose(tensor_values[3],
                           np.sin(np.log(5.0) + 1.0))  # Sin op.
 
+  def testCapturingExecutedGraphIdsOfTwoCompilationsOfSameFunction(self):
+    """Test correct executed IDs of two FuncGraphs from the same Py function."""
+    writer = dumping_callback.enable_dump_debug_info(
+        self.dump_root, tensor_debug_mode="NO_TENSOR")
+
+    @def_function.function
+    def ceil_times_two(x):
+      return math_ops.ceil(x) * 2.0
+
+    x_float32 = np.array(3.5, dtype=np.float32)
+    x_float64 = np.array(4.5, dtype=np.float64)
+    # Four executions, with two different FuncGraphs, which should lead
+    # to two unique executed graph IDs (see assertion below).
+    self.assertAllClose(ceil_times_two(x_float32), 8.0)
+    self.assertAllClose(ceil_times_two(x_float64), 10.0)
+    self.assertAllClose(ceil_times_two(x_float32), 8.0)
+    self.assertAllClose(ceil_times_two(x_float64), 10.0)
+    writer.FlushNonExecutionFiles()
+    writer.FlushExecutionFiles()
+
+    (executed_op_types, executed_graph_ids,
+     _, _, _, _) = self._readAndCheckExecutionFile()
+    self.assertLen(executed_op_types, 4)
+    for executed_op_type in executed_op_types:
+      self.assertStartsWith(executed_op_type, "__inference_ceil_times_two_")
+    self.assertLen(executed_graph_ids, 4)
+    self.assertEqual(executed_graph_ids[0], executed_graph_ids[2])
+    self.assertEqual(executed_graph_ids[1], executed_graph_ids[3])
+    self.assertLen(set(executed_graph_ids), 2)
+
+  def testCapturingExecutedGraphIdsOfDuplicateFunctionNames(self):
+    """Two FuncGraphs compiled from Python functions with identical names."""
+    writer = dumping_callback.enable_dump_debug_info(
+        self.dump_root, tensor_debug_mode="NO_TENSOR")
+
+    class TestClass(object):
+
+      @def_function.function
+      def ceil_times_two(self, x):
+        return math_ops.ceil(x) * 2.0
+
+    # The `ceil_times_two` method of the two objects will be compiled
+    # into separate FuncGraphs.
+    test_object_1 = TestClass()
+    test_object_2 = TestClass()
+
+    x = np.array(3.5, dtype=np.float32)
+    # Four executions, with two different FuncGraphs, which should lead
+    # to two unique executed graph IDs (see assertion below).
+    self.assertAllClose(test_object_1.ceil_times_two(x), 8.0)
+    self.assertAllClose(test_object_2.ceil_times_two(x), 8.0)
+    self.assertAllClose(test_object_1.ceil_times_two(x), 8.0)
+    self.assertAllClose(test_object_2.ceil_times_two(x), 8.0)
+    writer.FlushNonExecutionFiles()
+    writer.FlushExecutionFiles()
+
+    (executed_op_types, executed_graph_ids,
+     _, _, _, _) = self._readAndCheckExecutionFile()
+    self.assertLen(executed_op_types, 4)
+    for executed_op_type in executed_op_types:
+      self.assertStartsWith(executed_op_type, "__inference_ceil_times_two_")
+    self.assertLen(executed_graph_ids, 4)
+    self.assertEqual(executed_graph_ids[0], executed_graph_ids[2])
+    self.assertEqual(executed_graph_ids[1], executed_graph_ids[3])
+    self.assertLen(set(executed_graph_ids), 2)
+
   @parameterized.named_parameters(
       ("AddV2", "AddV2"),
       ("Log", "Log"),
@@ -438,7 +539,7 @@ class TracingCallbackTest(
       # After the flushing, the .execution file should hold the appropriate
       # contents.
       if context.executing_eagerly():
-        (executed_op_types, input_tensor_ids, output_tensor_ids,
+        (executed_op_types, _, input_tensor_ids, output_tensor_ids,
          tensor_debug_modes, tensor_values) = self._readAndCheckExecutionFile()
         # NOTE(b/142486213): Execution of the TF function happens with
         # Session.run() in v1 graph mode, hence it doesn't get logged to the
@@ -473,6 +574,15 @@ class TracingCallbackTest(
         for tensor_value in tensor_values:
           self.assertEqual(tensor_value.dtype, np.float32)
           self.assertEqual(tensor_value.shape, (0,))
+      elif tensor_debug_mode == "CURT_TENSOR":
+        for tensor_value in tensor_values:
+          self.assertLen(tensor_value, 2)
+          # 1st element: tensor_id, should be >= 0.
+          # TODO(cais): Assert on detailed value once Function-graph association
+          # is in place.
+          self.assertGreaterEqual(tensor_value[0], 0)
+          # 2nd element: 0 means there is no inf or nan.
+          self.assertEqual(tensor_value[1], 0)
       elif tensor_debug_mode == "FULL_TENSOR":
         less_values = [
             tensor_values[i]
@@ -558,7 +668,7 @@ class TracingCallbackTest(
     writer.FlushExecutionFiles()
     stack_frame_by_id = self._readAndCheckSourceFilesAndStackFrames()
     context_ids, _, _, _ = self._readAndCheckGraphsFile(stack_frame_by_id)
-    _, _, _, _, tensor_values = self._readAndCheckExecutionFile()
+    _, _, _, _, _, tensor_values = self._readAndCheckExecutionFile()
     self.assertEqual(tensor_values, [[]])
     (_, _, _,
      tensor_values) = self._readAndCheckGraphExecutionTracesFile(context_ids)
@@ -602,6 +712,7 @@ class TracingCallbackTest(
 
   @parameterized.named_parameters(
       ("NoTensor", "NO_TENSOR"),
+      ("CurtHealth", "CURT_HEALTH"),
       ("FullTensor", "FULL_TENSOR"),
   )
   def testMultiThreadedExecutionWithSameSetting(self, tensor_debug_mode):
@@ -654,6 +765,15 @@ class TracingCallbackTest(
       for tensor_value in tensor_values:
         self.assertEqual(tensor_value.dtype, np.float32)
         self.assertEqual(tensor_value.shape, (0,))
+    elif tensor_debug_mode == "CURT_HEALTH":
+      for tensor_value in tensor_values:
+        self.assertLen(tensor_value, 2)
+        # 1st element: tensor_id, should be >= 0.
+        # TODO(cais): Assert on detailed value once Function-graph association
+        # is in place.
+        self.assertGreaterEqual(tensor_value[0], 0)
+        # 2nd element: 0 means there is no inf or nan.
+        self.assertEqual(tensor_value[1], 0)
     elif tensor_debug_mode == "FULL_TENSOR":
       mul_values = [
           tensor_values[i]
@@ -702,7 +822,7 @@ class TracingCallbackTest(
     self.assertAllClose(v1.read_value(), -67084290.0)
     self.assertAllClose(v2.read_value(), -6.0)
 
-    (executed_op_types, _, _, _,
+    (executed_op_types, _, _, _, _,
      tensor_values) = self._readAndCheckExecutionFile(dump_root=dump_root_1)
     v1_squared_values = [
         tensor_values[i] for i, op_type in enumerate(executed_op_types)
@@ -714,7 +834,7 @@ class TracingCallbackTest(
     self.assertAllClose(
         negative_v1_squared_values, [[-100.0], [-8100.0], [-67076100.0]])
 
-    (executed_op_types, _, _, _,
+    (executed_op_types, _, _, _, _,
      tensor_values) = self._readAndCheckExecutionFile(dump_root=dump_root_2)
     self.assertNotIn("Neg", executed_op_types)
     v2_squared_values = tensor_values[executed_op_types.index("Pow")]
@@ -800,7 +920,7 @@ class TracingCallbackTest(
       # NOTE(b/142486213): Execution of the TF function happens with
       # Session.run() in v1 graph mode, hence it doesn't get logged to the
       # .execution file.
-      (executed_op_types, _, _, _,
+      (executed_op_types, _, _, _, _,
        tensor_values) = self._readAndCheckExecutionFile()
       self.assertTrue(executed_op_types)
 
@@ -867,7 +987,7 @@ class TracingCallbackTest(
       # NOTE(b/142486213): Execution of the TF function happens with
       # Session.run() in v1 graph mode, hence it doesn't get logged to the
       # .execution file.
-      (executed_op_types, _, _, _,
+      (executed_op_types, _, _, _, _,
        tensor_values) = self._readAndCheckExecutionFile()
       self.assertTrue(executed_op_types)
       if tensor_debug_mode == "NO_TENSOR":
@@ -940,7 +1060,7 @@ class TracingCallbackTest(
       # NOTE(b/142486213): Execution of the TF function happens with
       # Session.run() in v1 graph mode, hence it doesn't get logged to the
       # .execution file.
-      executed_op_types, _, _, _, _ = self._readAndCheckExecutionFile()
+      executed_op_types, _, _, _, _, _ = self._readAndCheckExecutionFile()
       self.assertTrue(executed_op_types)
 
     (op_names, _, _,
diff --git a/tensorflow/python/debug/lib/dumping_callback_test_lib.py b/tensorflow/python/debug/lib/dumping_callback_test_lib.py
index 74261f918ce..6144f2ba9cc 100644
--- a/tensorflow/python/debug/lib/dumping_callback_test_lib.py
+++ b/tensorflow/python/debug/lib/dumping_callback_test_lib.py
@@ -193,6 +193,11 @@ class DumpingCallbackTestBase(test_util.TensorFlowTestCase):
 
     Returns:
       executed_op_types: Types of ops that are created, as a `list` of `str`.
+      executed_graph_ids: A `list` of the same length as `executed_op_types`.
+        If the executed op is a FuncGraph, the corresponding element of the
+        `list` will be the ID of the FuncGraph. Else, the corresponding element
+        will be an empty string. This allows establishing connection between
+        eagerly executed FuncGraphs and their prior graph building.
       input_tensor_ids: Input tensor IDs for each of the ops executed, as a
         `list` of `list` of `int`s, with the same length as `executed_op_types`.
       output_tensor_ids: Output tensor IDs for each of the ops executed, as a
@@ -209,6 +214,7 @@ class DumpingCallbackTestBase(test_util.TensorFlowTestCase):
       execution_iter = reader.execution_iterator()
       prev_wall_time = 1
       executed_op_types = []
+      executed_graph_ids = []  # Empty string for execution of inidividual ops.
       input_tensor_ids = []
       output_tensor_ids = []
       tensor_debug_modes = []
@@ -218,6 +224,7 @@ class DumpingCallbackTestBase(test_util.TensorFlowTestCase):
         prev_wall_time = debug_event.wall_time
         execution = debug_event.execution
         executed_op_types.append(execution.op_type)
+        executed_graph_ids.append(execution.graph_id)
         input_tensor_ids.append(execution.input_tensor_ids)
         output_tensor_ids.append(execution.output_tensor_ids)
         tensor_debug_modes.append(execution.tensor_debug_mode)
@@ -227,8 +234,8 @@ class DumpingCallbackTestBase(test_util.TensorFlowTestCase):
         ])
       # TODO(cais): When tensor debug modes other than NO_TENSOR is supported,
       # return tensor_values as well.
-      return (executed_op_types, input_tensor_ids, output_tensor_ids,
-              tensor_debug_modes, tensor_values)
+      return (executed_op_types, executed_graph_ids, input_tensor_ids,
+              output_tensor_ids, tensor_debug_modes, tensor_values)
 
   def _readAndCheckGraphExecutionTracesFile(self, context_ids):
     """Read & verify the content of the .graph_execution_trace debug-event file.
diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD
index 16a49b08a7c..a04e0b2b3ea 100644
--- a/tensorflow/python/distribute/BUILD
+++ b/tensorflow/python/distribute/BUILD
@@ -56,7 +56,6 @@ tf_py_test(
         "//tensorflow/python:platform_test",
         "//tensorflow/python:state_ops",
     ],
-    python_version = "PY3",
 )
 
 py_library(
@@ -119,7 +118,6 @@ cuda_py_test(
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:framework_ops",
     ],
-    python_version = "PY3",
 )
 
 py_library(
@@ -172,7 +170,7 @@ py_test(
     name = "distribute_lib_test",
     size = "small",
     srcs = ["distribute_lib_test.py"],
-    python_version = "PY3",
+    python_version = "PY2",
     srcs_version = "PY2AND3",
     tags = [
         "no_rocm",
@@ -189,6 +187,7 @@ py_test(
         "//tensorflow/python:util",
         "//tensorflow/python:variable_scope",
         "//tensorflow/python:variables",
+        "//tensorflow/python/autograph/core:test_lib",
         "//tensorflow/python/data/ops:dataset_ops",
         "//third_party/py/numpy",
     ],
@@ -222,7 +221,7 @@ py_test(
     name = "distribute_coordinator_test",
     size = "medium",
     srcs = ["distribute_coordinator_test.py"],
-    python_version = "PY3",
+    python_version = "PY2",
     srcs_version = "PY2AND3",
     tags = ["no_oss_py2"],  # b/138443278
     deps = [
@@ -391,7 +390,6 @@ tf_py_test(
         "//tensorflow/python/eager:def_function",
         "//tensorflow/python/eager:test",
     ],
-    python_version = "PY3",
     tags = ["no_pip"],
 )
 
@@ -429,7 +427,6 @@ cuda_py_test(
         "//tensorflow/python/eager:test",
         "//tensorflow/python:framework_ops",
     ],
-    python_version = "PY3",
 )
 
 py_library(
@@ -451,7 +448,7 @@ py_test(
     name = "numpy_dataset_test",
     size = "small",
     srcs = ["numpy_dataset_test.py"],
-    python_version = "PY3",
+    python_version = "PY2",
     srcs_version = "PY2AND3",
     deps = [
         ":numpy_dataset",
@@ -502,13 +499,12 @@ cuda_py_test(
         "//tensorflow/python:io_ops",
         "//tensorflow/python:util",
     ],
-    python_version = "PY3",
 )
 
 py_test(
     name = "multi_worker_util_test",
     srcs = ["multi_worker_util_test.py"],
-    python_version = "PY3",
+    python_version = "PY2",
     srcs_version = "PY2AND3",
     deps = [
         ":multi_worker_util",
@@ -589,7 +585,7 @@ py_library(
 py_test(
     name = "shared_variable_creator_test",
     srcs = ["shared_variable_creator_test.py"],
-    python_version = "PY3",
+    python_version = "PY2",
     srcs_version = "PY2AND3",
     deps = [
         ":shared_variable_creator",
@@ -663,7 +659,7 @@ py_library(
 py_test(
     name = "strategy_combinations_test",
     srcs = ["strategy_combinations_test.py"],
-    python_version = "PY3",
+    python_version = "PY2",
     deps = [
         ":combinations",
         ":reduce_util",
@@ -717,7 +713,6 @@ cuda_py_test(
         "//tensorflow/python:variable_scope",
         "//tensorflow/python:variables",
     ],
-    python_version = "PY3",
     tags = [
         "multi_and_single_gpu",
     ],
@@ -727,7 +722,6 @@ distribute_py_test(
     name = "checkpointing_test",
     srcs = ["checkpointing_test.py"],
     main = "checkpointing_test.py",
-    python_version = "PY3",
     deps = [
         ":tpu_strategy",
         "//tensorflow/compiler/tests:xla_test",
@@ -781,7 +775,6 @@ cuda_py_test(
         "//tensorflow/python/eager:context",
         "//tensorflow/python/eager:test",
     ],
-    python_version = "PY3",
 )
 
 cuda_py_test(
@@ -803,7 +796,6 @@ cuda_py_test(
         "//tensorflow/python/eager:context",
         "//tensorflow/python/eager:test",
     ],
-    python_version = "PY3",
     tags = [
         # TODO(b/138143527): Re-enable after fixing Guitar failure.
         # "multi_and_single_gpu",
@@ -820,7 +812,6 @@ cuda_py_test(
         "//tensorflow/python/eager:test",
     ],
     grpc_enabled = True,
-    python_version = "PY3",
 )
 
 py_library(
@@ -847,7 +838,6 @@ tf_py_test(
         "//tensorflow/python/compat:v2_compat",
         "//tensorflow/python/training/tracking:util",
     ],
-    python_version = "PY3",
 )
 
 py_library(
@@ -883,7 +873,6 @@ distribute_py_test(
     name = "values_test",
     srcs = ["values_test.py"],
     main = "values_test.py",
-    python_version = "PY3",
     tags = [
         "no_oss",  # http://b/119349471
     ],
@@ -914,7 +903,6 @@ distribute_py_test(
     name = "moving_averages_test",
     srcs = ["moving_averages_test.py"],
     main = "moving_averages_test.py",
-    python_version = "PY3",
     deps = [
         "//tensorflow/python:array_ops",
         "//tensorflow/python:constant_op",
@@ -932,7 +920,6 @@ distribute_py_test(
     name = "custom_training_loop_test",
     srcs = ["custom_training_loop_test.py"],
     main = "custom_training_loop_test.py",
-    python_version = "PY3",
     tags = [
         "multi_and_single_gpu",
     ],
@@ -951,7 +938,6 @@ distribute_py_test(
     name = "minimize_loss_test",
     srcs = ["minimize_loss_test.py"],
     main = "minimize_loss_test.py",
-    python_version = "PY3",
     tags = [
         "multi_and_single_gpu",
         "no_oss",  # TODO(b/139815303): enable after this is fixed.
@@ -1007,7 +993,6 @@ distribute_py_test(
     name = "step_fn_test",
     srcs = ["step_fn_test.py"],
     main = "step_fn_test.py",
-    python_version = "PY3",
     tags = [
         "multi_and_single_gpu",
         "no_rocm",
@@ -1038,7 +1023,6 @@ cuda_py_test(
         "//tensorflow/python:variable_scope",
         "//tensorflow/python:variables",
     ],
-    python_version = "PY3",
     tags = [
         "multi_and_single_gpu",
     ],
@@ -1065,7 +1049,6 @@ cuda_py_test(
         "//tensorflow/python/eager:context",
         "//tensorflow/python/eager:test",
     ],
-    python_version = "PY3",
 )
 
 cuda_py_test(
@@ -1091,7 +1074,6 @@ cuda_py_test(
         "//tensorflow/python/eager:context",
         "//tensorflow/python/eager:test",
     ],
-    python_version = "PY3",
     shard_count = 5,
     tags = [
         "guitar",
@@ -1120,7 +1102,6 @@ cuda_py_test(
         "//tensorflow/python/eager:context",
         "//tensorflow/python/eager:test",
     ],
-    python_version = "PY3",
     tags = [
         "guitar",
         "multi_and_single_gpu",
@@ -1131,7 +1112,6 @@ distribute_py_test(
     name = "metrics_v1_test",
     srcs = ["metrics_v1_test.py"],
     main = "metrics_v1_test.py",
-    python_version = "PY3",
     tags = [
         "multi_and_single_gpu",
     ],
@@ -1152,7 +1132,6 @@ distribute_py_test(
     name = "keras_metrics_test",
     srcs = ["keras_metrics_test.py"],
     main = "keras_metrics_test.py",
-    python_version = "PY3",
     tags = [
         "multi_and_single_gpu",
     ],
@@ -1173,7 +1152,6 @@ distribute_py_test(
     name = "zero_batch_test",
     srcs = ["zero_batch_test.py"],
     main = "zero_batch_test.py",
-    python_version = "PY3",
     deps = [
         ":combinations",
         ":multi_worker_test_base",
@@ -1218,7 +1196,6 @@ distribute_py_test(
     srcs = ["saved_model_save_load_test.py"],
     full_precision = True,
     main = "saved_model_save_load_test.py",
-    python_version = "PY3",
     shard_count = 5,
     deps = [
         ":saved_model_test_base",
@@ -1232,7 +1209,6 @@ distribute_py_test(
     srcs = ["keras_save_load_test.py"],
     full_precision = True,
     main = "keras_save_load_test.py",
-    python_version = "PY3",
     shard_count = 5,
     deps = [
         ":saved_model_test_base",
@@ -1246,7 +1222,6 @@ distribute_py_test(
     srcs = ["saved_model_mixed_api_test.py"],
     full_precision = True,
     main = "saved_model_mixed_api_test.py",
-    python_version = "PY3",
     shard_count = 5,
     deps = [
         ":saved_model_test_base",
@@ -1259,7 +1234,6 @@ distribute_py_test(
     name = "ctl_correctness_test",
     srcs = ["ctl_correctness_test.py"],
     main = "ctl_correctness_test.py",
-    python_version = "PY3",
     shard_count = 10,
     tags = [
         "multi_and_single_gpu",
@@ -1304,10 +1278,10 @@ cuda_py_test(
         "//tensorflow/python/estimator:estimator_py",
         "//tensorflow/python/keras/mixed_precision/experimental:test_util",
     ],
-    python_version = "PY3",
     tags = [
         "multi_and_single_gpu",
     ],
+    xla_enable_strict_auto_jit = False,
 )
 
 cuda_py_test(
@@ -1339,7 +1313,6 @@ cuda_py_test(
         "//tensorflow/python/eager:context",
         "//tensorflow/python/estimator:estimator_py",
     ],
-    python_version = "PY3",
     tags = [
         "multi_and_single_gpu",
         "no_oss",  # TODO(b/133330625)
diff --git a/tensorflow/python/distribute/custom_training_loop_test.py b/tensorflow/python/distribute/custom_training_loop_test.py
index 55c2ae6a1ca..9be72c36c5f 100644
--- a/tensorflow/python/distribute/custom_training_loop_test.py
+++ b/tensorflow/python/distribute/custom_training_loop_test.py
@@ -36,9 +36,8 @@ class InputIterationTest(test.TestCase, parameterized.TestCase):
 
   @combinations.generate(
       combinations.combine(
-          distribution=strategy_combinations.all_strategies,
-          mode=["eager"]
-      ))
+          distribution=strategy_combinations.strategies_minus_tpu,
+          mode=["eager"]))
   def testFullEager(self, distribution):
     dataset = self._get_dataset()
 
diff --git a/tensorflow/python/distribute/distribute_lib.py b/tensorflow/python/distribute/distribute_lib.py
index 4a2a8af1840..e988499292e 100644
--- a/tensorflow/python/distribute/distribute_lib.py
+++ b/tensorflow/python/distribute/distribute_lib.py
@@ -652,8 +652,7 @@ class Strategy(object):
     `.shard` operation to the end of the processing pipeline. This will cause
     the entire preprocessing pipeline for all the data to be run on every
     worker, and each worker will do redundant work. We will print a warning
-    if this method of sharding is selected. In this case, consider using
-    `experimental_distribute_datasets_from_function` instead.
+    if this method of sharding is selected.
 
     You can disable dataset sharding across workers using the `auto_shard`
     option in `tf.data.experimental.DistributeOptions`.
@@ -1539,6 +1538,8 @@ class StrategyExtendedV2(object):
     _require_cross_replica_or_default_context_extended(self)
     if kwargs is None:
       kwargs = {}
+    fn = autograph.tf_convert(
+        fn, ag_ctx.control_status_ctx(), convert_by_default=False)
     with self._container_strategy().scope():
       return self._update(var, fn, args, kwargs, group)
 
@@ -1563,6 +1564,8 @@ class StrategyExtendedV2(object):
     _require_cross_replica_or_default_context_extended(self)
     if kwargs is None:
       kwargs = {}
+    fn = autograph.tf_convert(
+        fn, ag_ctx.control_status_ctx(), convert_by_default=False)
     with self._container_strategy().scope():
       return self._update_non_slot(colocate_with, fn, args, kwargs, group)
 
@@ -1946,6 +1949,8 @@ class ReplicaContext(object):
     require_replica_context(self)
     if kwargs is None:
       kwargs = {}
+    merge_fn = autograph.tf_convert(merge_fn, ag_ctx.control_status_ctx(),
+                                    convert_by_default=False)
     return self._merge_call(merge_fn, args, kwargs)
 
   def _merge_call(self, merge_fn, args, kwargs):
diff --git a/tensorflow/python/distribute/distribute_lib_test.py b/tensorflow/python/distribute/distribute_lib_test.py
index fb8116d4ab2..588605864e2 100644
--- a/tensorflow/python/distribute/distribute_lib_test.py
+++ b/tensorflow/python/distribute/distribute_lib_test.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 from absl.testing import parameterized
 import numpy as np
 
+from tensorflow.python.autograph.core import converter_testing
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.distribute import combinations
 from tensorflow.python.distribute import distribute_lib
@@ -29,6 +30,7 @@ from tensorflow.python.distribute import input_lib
 from tensorflow.python.distribute import reduce_util
 from tensorflow.python.distribute import values
 from tensorflow.python.eager import context
+from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -387,6 +389,21 @@ class TestStrategyTest(test.TestCase):
       self.assertIs(tt, t)
     dist.extended.update(v, assign_fn, (t,))
 
+  @_run_in_and_out_of_scope
+  def testUpdateAutoGraph(self, dist):
+    with dist.scope():
+      v = variables.Variable(1.)
+    t = constant_op.constant(2.)
+
+    def assign_fn(unused_vv, unused_tt):
+      self.assertTrue(converter_testing.is_inside_generated_code())
+
+    @def_function.function  # AutoGraph is default-on only within tf.function
+    def test_fn():
+      dist.extended.update(v, assign_fn, (t,))
+
+    test_fn()
+
   @_run_in_and_out_of_scope
   def testUpdateNonSlot(self, dist):
     t = constant_op.constant(2.)
@@ -394,6 +411,19 @@ class TestStrategyTest(test.TestCase):
     dist.extended.update_non_slot(t, lambda: update_calls.append(1))
     self.assertEqual(len(update_calls), 1)
 
+  @_run_in_and_out_of_scope
+  def testUpdateNonSlotAutoGraph(self, dist):
+    t = constant_op.constant(2.)
+
+    def update_fn():
+      self.assertTrue(converter_testing.is_inside_generated_code())
+
+    @def_function.function  # AutoGraph is default-on only within tf.function
+    def test_fn():
+      dist.extended.update_non_slot(t, update_fn)
+
+    test_fn()
+
 
 # _TestStrategy2 is like _TestStrategy, except it doesn't change variable
 # creation.
@@ -428,6 +458,20 @@ class DefaultDistributionStrategyTest(test.TestCase, parameterized.TestCase):
     self.assertEqual("foo_bar", replica_ctx.merge_call(merge_fn, args=("bar",)))
     _assert_in_default_state(self)
 
+  def testMergeCallAutoGraph(self):
+    _assert_in_default_state(self)
+
+    def merge_fn(_, s):
+      self.assertTrue(converter_testing.is_inside_generated_code())
+      return s
+
+    @def_function.function  # AutoGraph is default-on only within tf.function
+    def test_fn():
+      replica_ctx = ds_context.get_replica_context()
+      replica_ctx.merge_call(merge_fn, args=("bar",))
+
+    test_fn()
+
   def testScopeMostlyNoOp(self):
     _assert_in_default_state(self)
 
diff --git a/tensorflow/python/distribute/tpu_strategy.py b/tensorflow/python/distribute/tpu_strategy.py
index 2dd4309537a..8f32e8e2226 100644
--- a/tensorflow/python/distribute/tpu_strategy.py
+++ b/tensorflow/python/distribute/tpu_strategy.py
@@ -37,6 +37,7 @@ from tensorflow.python.distribute import values
 from tensorflow.python.distribute.cluster_resolver import TPUClusterResolver
 from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
+from tensorflow.python.eager import function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import device_spec
 from tensorflow.python.framework import dtypes
@@ -82,6 +83,29 @@ def maybe_init_scope():
       yield
 
 
+def validate_experimental_run_function(fn):
+  """Validate the function passed into strategy.experimental_run_v2."""
+
+  # We allow three types of functions/objects passed into TPUStrategy
+  # experimental_run_v2 in eager mode:
+  #   1. a user annotated tf.function
+  #   2. a ConcreteFunction, this is mostly what you get from loading a saved
+  #      model.
+  #   3. a callable object and the `__call__` method itself is a tf.function.
+  #
+  # Otherwise we return an error, because we don't support eagerly running
+  # experimental_run_v2 in TPUStrategy.
+
+  if context.executing_eagerly() and not isinstance(
+      fn, def_function.Function) and not isinstance(
+          fn, function.ConcreteFunction) and not (callable(fn) and isinstance(
+              fn.__call__, def_function.Function)):
+    raise NotImplementedError(
+        "TPUStrategy.experimental_run_v2(fn, ...) does not support eager "
+        "execution. Either convert `fn` into a tf.function or consider "
+        "calling strategy.experimental_run_v2 inside a tf.function.")
+
+
 @tf_export("distribute.experimental.TPUStrategy", v1=[])
 class TPUStrategy(distribute_lib.Strategy):
   """TPU distribution strategy implementation."""
@@ -89,14 +113,36 @@ class TPUStrategy(distribute_lib.Strategy):
   def __init__(self,
                tpu_cluster_resolver=None,
                device_assignment=None):
-    """Initializes the TPUStrategy object.
+    """Synchronous training in TPU donuts or Pods.
+
+    To construct a TPUStrategy object, you need to run the
+    initialization code as below:
+
+    ```python
+    resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu=FLAGS.tpu)
+    tf.config.experimental_connect_to_cluster(resolver)
+    tf.tpu.experimental.initialize_tpu_system(resolver)
+    strategy = tf.distribute.experimental.TPUStrategy(resolver)
+    ```
+
+    While using distribution strategies, the variables created within strategy's
+    scope will be replicated across all the replicas and can be kept in sync
+    using all-reduce algorithms.
+
+    To run TF2 programs on TPUs, you can either use `.compile` and
+    `.fit` APIs in `tf.keras` with TPUStrategy, or write your own customized
+    training loop by calling `strategy.experimental_run_v2` directly. Note that
+    TPUStrategy doesn't support pure eager execution, so please make sure the
+    function passed into `strategy.experimental_run_v2` is a `tf.function` or
+    `strategy.experimental_run_v2` us called inside a `tf.function` if running
+    in eager mode.
 
     Args:
       tpu_cluster_resolver: A tf.distribute.cluster_resolver.TPUClusterResolver,
-          which provides information about the TPU cluster.
+        which provides information about the TPU cluster.
       device_assignment: Optional `tf.tpu.experimental.DeviceAssignment` to
-          specify the placement of replicas on the TPU cluster. Currently only
-          supports the usecase of using a single core within a TPU cluster.
+        specify the placement of replicas on the TPU cluster. Currently only
+        supports the usecase of using a single core within a TPU cluster.
     """
     super(TPUStrategy, self).__init__(TPUExtended(
         self, tpu_cluster_resolver, device_assignment=device_assignment))
@@ -111,6 +157,8 @@ class TPUStrategy(distribute_lib.Strategy):
   # This implementation runs a single step. It does not use infeed or outfeed.
   def experimental_run_v2(self, fn, args=(), kwargs=None):
     """See base class."""
+    validate_experimental_run_function(fn)
+
     # Note: the target function is converted to graph even when in Eager mode,
     # so autograph is on by default here.
     fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx())
@@ -157,6 +205,8 @@ class TPUStrategyV1(distribute_lib.StrategyV1):
   # This implementation runs a single step. It does not use infeed or outfeed.
   def experimental_run_v2(self, fn, args=(), kwargs=None):
     """See base class."""
+    validate_experimental_run_function(fn)
+
     fn = autograph.tf_convert(fn, ag_ctx.control_status_ctx())
     return self.extended.tpu_run(fn, args, kwargs)
 
@@ -699,7 +749,7 @@ class TPUExtended(distribute_lib.StrategyExtendedV1):
         ]
 
       # Workaround for `tpu.replicate` behaviour when single `Tensor` returned.
-      if result[0] is None:
+      if result[0] is None or isinstance(result[0], ops.Operation):
         replicate_outputs = [None] * len(replicate_outputs)
       else:
         replicate_outputs = [
diff --git a/tensorflow/python/distribute/values_test.py b/tensorflow/python/distribute/values_test.py
index d97d1155c82..26d0eb3ac32 100644
--- a/tensorflow/python/distribute/values_test.py
+++ b/tensorflow/python/distribute/values_test.py
@@ -818,13 +818,31 @@ class SyncOnReadVariablePropertiesTest(test.TestCase):
     self.assertEqual(2., self.evaluate(add1(replica_local)))
 
 
-@combinations.generate(
-    combinations.combine(
-        distribution=[
-            strategy_combinations.mirrored_strategy_with_gpu_and_cpu,
-            strategy_combinations.tpu_strategy,
-        ],
-        mode=["graph", "eager"]))
+def mirrored_and_tpu_strategy_combinations():
+  return combinations.combine(
+      distribution=[
+          strategy_combinations.mirrored_strategy_with_gpu_and_cpu,
+          strategy_combinations.tpu_strategy,
+      ],
+      mode=["graph", "eager"])
+
+
+def strategy_and_run_tf_function_combinations():
+  # Test the combination of different strategies and whether a tf.function
+  # is passed into strategy.experimental_run_v2."""
+  return combinations.combine(
+      distribution=[
+          strategy_combinations.mirrored_strategy_with_gpu_and_cpu,
+      ],
+      mode=["graph", "eager"],
+      experimental_run_tf_function=[True, False]) + combinations.combine(
+          distribution=[
+              strategy_combinations.tpu_strategy,
+          ],
+          mode=["graph", "eager"],
+          experimental_run_tf_function=[True])
+
+
 class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase):
 
   def _assign_replica_local(self, v, new):
@@ -842,6 +860,7 @@ class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase):
     save_path, _ = self._save_return_saver(sess, var)
     return save_path
 
+  @combinations.generate(mirrored_and_tpu_strategy_combinations())
   def testSaveAndRestoreReplicaLocalSumOneGraph(self, distribution):
     with self.cached_session() as sess:
       v, replica_local = _make_replica_local(
@@ -862,6 +881,7 @@ class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase):
         saver.restore(sess, save_path)
         self.assertEqual([3.5, 3.5], self.evaluate([v[0], v[1]]))
 
+  @combinations.generate(mirrored_and_tpu_strategy_combinations())
   def testSaveAndRestoreReplicaLocalMeanOneGraph(self, distribution):
     if context.num_gpus() < 1 and context.executing_eagerly():
       self.skipTest("A GPU is not available for this test in eager mode.")
@@ -978,36 +998,46 @@ class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase):
         saver.restore(sess, save_path)
         self.assertEqual([1.75, 1.75], self.evaluate([v[0], v[1]]))
 
+  @combinations.generate(mirrored_and_tpu_strategy_combinations())
   def testSaveReplicaLocalRestoreReplicaLocalMean(self, distribution):
     save_path = self._save_replica_local_mean(distribution)
     self._restore_replica_local_mean(save_path, distribution)
 
+  @combinations.generate(mirrored_and_tpu_strategy_combinations())
   def testSaveReplicaLocalRestoreReplicaLocalSum(self, distribution):
     save_path = self._save_replica_local_sum(distribution)
     self._restore_replica_local_sum(save_path, distribution)
 
+  @combinations.generate(mirrored_and_tpu_strategy_combinations())
   def testSaveReplicaLocalMeanRestoreNormal(self, distribution):
     save_path = self._save_replica_local_mean(distribution)
     self._restore_normal(save_path)
 
+  @combinations.generate(mirrored_and_tpu_strategy_combinations())
   def testSaveReplicaLocalSumRestoreNormal(self, distribution):
     save_path = self._save_replica_local_sum(distribution)
     self._restore_normal(save_path)
 
+  @combinations.generate(mirrored_and_tpu_strategy_combinations())
   def testSaveNormalRestoreReplicaLocalMean(self, distribution):
     save_path = self._save_normal()
     self._restore_replica_local_mean(save_path, distribution)
 
+  @combinations.generate(mirrored_and_tpu_strategy_combinations())
   def testSaveNormalRestoreReplicaLocalSum(self, distribution):
     save_path = self._save_normal()
     self._restore_replica_local_sum(save_path, distribution)
 
-  def testAssign(self, distribution):
+  @combinations.generate(strategy_and_run_tf_function_combinations())
+  def testAssign(self, distribution, experimental_run_tf_function):
+
     def assign(fn, v, update_value, cross_replica):
       update_fn = lambda: getattr(v, fn)(update_value)
       if cross_replica:
         return update_fn()
       else:
+        if experimental_run_tf_function:
+          update_fn = def_function.function(update_fn)
         return distribution.experimental_local_results(
             distribution.experimental_run_v2(update_fn))
     updates = [("assign", 1.), ("assign_add", 1.), ("assign_sub", -1.)]
@@ -1033,12 +1063,17 @@ class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase):
         self.assertAllEqual(self.evaluate(component.read_value()),
                             self.evaluate(array_ops.ones_like(component)))
 
-  def testAssignDtypeConversion(self, distribution):
+  @combinations.generate(strategy_and_run_tf_function_combinations())
+  def testAssignDtypeConversion(self, distribution,
+                                experimental_run_tf_function):
+
     def assign(fn, v, update_value, cross_replica):
       update_fn = lambda: getattr(v, fn)(update_value)
       if cross_replica:
         return update_fn()
       else:
+        if experimental_run_tf_function:
+          update_fn = def_function.function(update_fn)
         return distribution.experimental_local_results(
             distribution.experimental_run_v2(update_fn))
     updates = [("assign", 1), ("assign_add", 1), ("assign_sub", -1)]
@@ -1064,6 +1099,7 @@ class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase):
         self.assertAllEqual(self.evaluate(component.read_value()),
                             self.evaluate(array_ops.ones_like(component)))
 
+  @combinations.generate(mirrored_and_tpu_strategy_combinations())
   def testAssignWithAggregationSum(self, distribution):
     with distribution.scope():
       v = variable_scope.variable(
@@ -1076,6 +1112,7 @@ class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase):
       self.assertAllEqual(self.evaluate(component.read_value()),
                           self.evaluate(array_ops.ones_like(component)))
 
+  @combinations.generate(mirrored_and_tpu_strategy_combinations())
   def testAssignAddSubWithAggregationSum(self, distribution):
     with distribution.scope():
       v = variable_scope.variable(
@@ -1090,7 +1127,9 @@ class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase):
         ValueError, "SyncOnReadVariable does not support "):
       self.evaluate(v.assign_sub(1.))
 
-  def testReadValueInReplicaContext(self, distribution):
+  @combinations.generate(strategy_and_run_tf_function_combinations())
+  def testReadValueInReplicaContext(self, distribution,
+                                    experimental_run_tf_function):
     aggregations = [
         variables_lib.VariableAggregation.NONE,
         variables_lib.VariableAggregation.SUM,
@@ -1104,12 +1143,19 @@ class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase):
             synchronization=variables_lib.VariableSynchronization.ON_READ,
             aggregation=aggregation)
       self.evaluate(variables_lib.global_variables_initializer())
-      results = self.evaluate(distribution.experimental_local_results(
-          distribution.experimental_run_v2(v.read_value)))
+      if experimental_run_tf_function:
+        read_var_fn = def_function.function(v.read_value)
+      else:
+        read_var_fn = v.read_value
+      results = self.evaluate(
+          distribution.experimental_local_results(
+              distribution.experimental_run_v2(read_var_fn)))
       for component, value in zip(v._values, results):
         self.assertAllEqual(self.evaluate(component.read_value()), value)
 
-  def testReadValueInCrossReplicaContext(self, distribution):
+  @combinations.generate(strategy_and_run_tf_function_combinations())
+  def testReadValueInCrossReplicaContext(self, distribution,
+                                         experimental_run_tf_function):
     aggregations = [
         variables_lib.VariableAggregation.SUM,
         variables_lib.VariableAggregation.MEAN,
@@ -1125,10 +1171,15 @@ class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase):
             synchronization=variables_lib.VariableSynchronization.ON_READ,
             aggregation=aggregation)
       self.evaluate(variables_lib.global_variables_initializer())
+
       def assign(v=v):
         ctx = distribution_strategy_context.get_replica_context()
         replica_id = ctx.replica_id_in_sync_group
         return v.assign(math_ops.cast(replica_id, dtypes.float32))
+
+      if experimental_run_tf_function:
+        assign = def_function.function(assign)
+
       self.evaluate(distribution.experimental_local_results(
           distribution.experimental_run_v2(assign)))
       result = self.evaluate(v.read_value())
@@ -1142,6 +1193,7 @@ class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase):
         expected = 0
       self.assertEqual(expected, result, aggregation)
 
+  @combinations.generate(mirrored_and_tpu_strategy_combinations())
   def testReadValueWithAggregationNoneInCrossReplicaContext(self, distribution):
     with distribution.scope():
       v = variable_scope.variable(
@@ -1153,6 +1205,7 @@ class SyncOnReadVariableTest(test.TestCase, parameterized.TestCase):
         ValueError, "Could not convert from .* VariableAggregation\\.NONE"):
       self.evaluate(v.read_value())
 
+  @combinations.generate(mirrored_and_tpu_strategy_combinations())
   def testInitializedToSameValueInsideEagerRun(self, distribution):
     if not context.executing_eagerly(): self.skipTest("eager only")
 
diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py
index dbcdd4a83d6..19626ec7059 100644
--- a/tensorflow/python/eager/context.py
+++ b/tensorflow/python/eager/context.py
@@ -461,27 +461,29 @@ class Context(object):
   def _initialize_logical_devices(self):
     """Helper to initialize devices."""
     # Store list of devices
-    self._logical_devices = []
-    self._context_devices = []
+    logical_devices = []
+    context_devices = []
     device_list = pywrap_tensorflow.TFE_ContextListDevices(
         self._context_handle)
     try:
       self._num_gpus = 0
       for i in range(pywrap_tensorflow.TF_DeviceListCount(device_list)):
         dev_name = pywrap_tensorflow.TF_DeviceListName(device_list, i)
-        self._context_devices.append(pydev.canonical_name(dev_name))
+        context_devices.append(pydev.canonical_name(dev_name))
         spec = pydev.DeviceSpec.from_string(dev_name)
         # If the job is localhost, we assume that the cluster has not yet been
         # configured and thus clear the job, replica & task.
         if spec.job == "localhost":
           spec = spec.replace(job=None, replica=None, task=None)
-        self._logical_devices.append(
+        logical_devices.append(
             LogicalDevice(name=spec.to_string(), device_type=spec.device_type))
         dev_type = pywrap_tensorflow.TF_DeviceListType(device_list, i)
         if dev_type == "GPU":
           self._num_gpus += 1
 
     finally:
+      self._logical_devices = logical_devices
+      self._context_devices = context_devices
       pywrap_tensorflow.TF_DeleteDeviceList(device_list)
 
   def ensure_initialized(self):
diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py
index 2f20179d1da..2c1ced2b87d 100644
--- a/tensorflow/python/eager/def_function.py
+++ b/tensorflow/python/eager/def_function.py
@@ -728,13 +728,20 @@ class Function(object):
               resource_variable_ops.var_is_initialized_op(v.handle))
         var_is_initialized = array_ops.stack(var_is_initialized).numpy()
 
+      inits = []
       for (v, init), is_initialized in zip(initializers, var_is_initialized):
         with ops.init_scope():
           if is_initialized:
             continue
+        inits.append(init)
 
+      if inits:
         op_map = lift_to_graph.lift_to_graph(
-            [init], ops.get_default_graph(), op_map=op_map)
+            inits, ops.get_default_graph(), op_map=op_map)
+      for (v, init), is_initialized in zip(initializers, var_is_initialized):
+        with ops.init_scope():
+          if is_initialized:
+            continue
         v.assign(op_map[init], read_value=False)
 
     with ops.init_scope():
diff --git a/tensorflow/python/eager/def_function_test.py b/tensorflow/python/eager/def_function_test.py
index dca257f91a3..b558412fd9a 100644
--- a/tensorflow/python/eager/def_function_test.py
+++ b/tensorflow/python/eager/def_function_test.py
@@ -137,6 +137,19 @@ class DefFunctionTest(test.TestCase, parameterized.TestCase):
 
     self.assertAllEqual(fn(constant_op.constant(1.0)), 2.0)
 
+  def testFunctionMultipleVariableInitializer(self):
+
+    state = []
+
+    @def_function.function
+    def fn(x):
+      if not state:
+        state.append(variables.Variable(lambda: 2.0))
+        state.append(variables.Variable(lambda: 5.0))
+      return state[0] * x, state[1] * x
+
+    self.assertAllEqual(fn(constant_op.constant(1.0)), [2.0, 5.0])
+
   def testFunctionInitializationFunction(self):
 
     state = []
diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py
index 2d8b442e1af..89d731332f8 100644
--- a/tensorflow/python/eager/function.py
+++ b/tensorflow/python/eager/function.py
@@ -345,19 +345,59 @@ class _InterpolateFunctionError(object):
     return False
 
 
+_function_callbacks = set()
+
+
+def add_function_callback(function_callback):
+  """Add a callback function for Function creation.
+
+  The callback function has the signature:
+
+    `def function_callback(function):`
+
+  wherein `function` is the just-created _EagerDefinedFunction.
+  The callback is invoked immediately after a new `_EagerDefinedFunction`
+  is created. The return value(s) of the callback fucntion (if any) is ignored.
+
+  Repeated registration of the same callback function is idempotent.
+  After a callback is added, it can be removed with the
+  `remove_function_callback()` method.
+
+  Args:
+    function_callback: The callback to add.
+  """
+  _function_callbacks.add(function_callback)
+
+
+def remove_function_callback(function_callback):
+  """Remove an already-added function callback.
+
+  See the doc string of `add_function_callback()` for more information.
+
+  Args:
+    function_callback: The callback to remove.
+  """
+  _function_callbacks.remove(function_callback)
+
+
+_FORWARD_PREFIX = "__forward_"
+_BACKWARD_PREFIX = "__backward_"
+_INFERENCE_PREFIX = "__inference_"
+
+
 def _forward_name(n):
   """The name of a generated forward defun named n."""
-  return "__forward_%s_%s" % (n, ops.uid())
+  return "%s%s_%s" % (_FORWARD_PREFIX, n, ops.uid())
 
 
 def _backward_name(n):
   """The name of a generated backward defun named n."""
-  return "__backward_%s_%s" % (n, ops.uid())
+  return "%s%s_%s" % (_BACKWARD_PREFIX, n, ops.uid())
 
 
 def _inference_name(n):
   """The name of a forward-but-no-gradient defun named n."""
-  return "__inference_%s_%s" % (n, ops.uid())
+  return "%s%s_%s" % (_INFERENCE_PREFIX, n, ops.uid())
 
 
 def _enclosing_xla_context():
@@ -463,7 +503,7 @@ class _EagerDefinedFunction(object):
       proto_data = pywrap_tensorflow.TF_GetBuffer(buffer_)
     function_def = function_pb2.FunctionDef()
     function_def.ParseFromString(compat.as_bytes(proto_data))
-    self.name = compat.as_bytes(function_def.signature.name)
+    self._name = compat.as_bytes(function_def.signature.name)
     with ops.init_scope():
       if context.executing_eagerly():
         context.ensure_initialized()
@@ -485,6 +525,9 @@ class _EagerDefinedFunction(object):
     self.graph = graph
     self._stateful_ops = tuple(op for op in operations if op._is_stateful)  # pylint: disable=protected-access
 
+    for function_callback in _function_callbacks:
+      function_callback(self)
+
   def add_to_graph(self, g=None):
     # pylint: disable=protected-access
     if not g and context.executing_eagerly():
@@ -497,6 +540,10 @@ class _EagerDefinedFunction(object):
           g._add_function(f)
     # pylint: enable=protected-access
 
+  @property
+  def name(self):
+    return self._name
+
   @property
   def stateful_ops(self):
     return self._stateful_ops
@@ -533,6 +580,7 @@ class _EagerDefinedFunction(object):
     executor_type = function_call_options.executor_type or ""
 
     executing_eagerly = ctx.executing_eagerly()
+    attrs = ("executor_type", executor_type, "config_proto", config)
     if executing_eagerly:
       with _InterpolateFunctionError(self):
         if cancellation_manager is None:
@@ -540,14 +588,14 @@ class _EagerDefinedFunction(object):
               str(self.signature.name),
               num_outputs=self._num_outputs,
               inputs=args,
-              attrs=("executor_type", executor_type, "config_proto", config),
+              attrs=attrs,
               ctx=ctx)
         else:
           outputs = execute.execute_with_cancellation(
               str(self.signature.name),
               num_outputs=self._num_outputs,
               inputs=args,
-              attrs=("executor_type", executor_type, "config_proto", config),
+              attrs=attrs,
               ctx=ctx,
               cancellation_manager=cancellation_manager)
       # Replace empty list with None
diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD
index ca58ad5730b..38c3657ef58 100644
--- a/tensorflow/python/feature_column/BUILD
+++ b/tensorflow/python/feature_column/BUILD
@@ -110,7 +110,6 @@ tf_py_test(
     additional_deps = [
         ":feature_column_test_main_lib",
     ],
-    python_version = "PY3",
     tags = [
         "no_cuda_on_cpu_tap",
         "no_pip",
@@ -124,7 +123,6 @@ tf_py_test(
     additional_deps = [
         ":feature_column_test_main_lib",
     ],
-    python_version = "PY3",
     tags = ["no_pip"],
 )
 
@@ -161,7 +159,6 @@ tf_py_test(
     name = "feature_column_v2_test",
     srcs = ["feature_column_v2_test.py"],
     additional_deps = [":feature_column_v2_test_main_lib"],
-    python_version = "PY3",
     shard_count = 5,
     tags = [
         "no_cuda_on_cpu_tap",
@@ -176,7 +173,6 @@ tf_py_test(
     additional_deps = [
         ":feature_column_v2_test_main_lib",
     ],
-    python_version = "PY3",
     tags = ["no_pip"],
 )
 
@@ -239,13 +235,12 @@ tf_py_test(
         "//tensorflow/python:sparse_tensor",
         "//tensorflow/python:training",
     ],
-    python_version = "PY3",
 )
 
 py_test(
     name = "sequence_feature_column_integration_test",
     srcs = ["sequence_feature_column_integration_test.py"],
-    python_version = "PY3",
+    python_version = "PY2",
     srcs_version = "PY2AND3",
     tags = ["no_pip"],
     deps = [
@@ -268,5 +263,4 @@ tf_py_test(
         "//tensorflow/python:client_testlib",
         "//tensorflow/python:util",
     ],
-    python_version = "PY3",
 )
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index 5209e30c7a2..faeb868da72 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -439,29 +439,32 @@ class Tensor(_TensorLike):
     for more details of what a shape represents.
 
     The inferred shape of a tensor is used to provide shape
-    information without having to launch the graph in a session. This
-    can be used for debugging, and providing early error messages. For
+    information without having to execute the underlying kernel. This
+    can be used for debugging and providing early error messages. For
     example:
 
     ```python
-    c = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+    >>> c = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
+    >>> print(c.shape) # will be TensorShape([2, 3])
+    (2, 3)
 
-    print(c.shape)
-    ==> TensorShape([Dimension(2), Dimension(3)])
-
-    d = tf.constant([[1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0]])
-
-    print(d.shape)
-    ==> TensorShape([Dimension(4), Dimension(2)])
+    >>> d = tf.constant([[1.0, 0.0], [0.0, 1.0], [1.0, 0.0], [0.0, 1.0]])
+    >>> print(d.shape)
+    (4, 2)
 
     # Raises a ValueError, because `c` and `d` do not have compatible
     # inner dimensions.
-    e = tf.matmul(c, d)
+    >>> e = tf.matmul(c, d)
+    Traceback (most recent call last):
+        ...
+    tensorflow.python.framework.errors_impl.InvalidArgumentError: Matrix
+    size-incompatible: In[0]: [2,3], In[1]: [4,2] [Op:MatMul] name: MatMul/
 
-    f = tf.matmul(c, d, transpose_a=True, transpose_b=True)
+    # This works because we have compatible shapes.
+    >>> f = tf.matmul(c, d, transpose_a=True, transpose_b=True)
+    >>> print(f.shape)
+    (3, 4)
 
-    print(f.shape)
-    ==> TensorShape([Dimension(3), Dimension(4)])
     ```
 
     In some cases, the inferred shape may have unknown dimensions. If
@@ -470,7 +473,7 @@ class Tensor(_TensorLike):
     inferred shape.
 
     Returns:
-      A `TensorShape` representing the shape of this tensor.
+      A `tf.TensorShape` representing the shape of this tensor.
 
     """
     if self._shape_val is None:
@@ -570,7 +573,7 @@ class Tensor(_TensorLike):
     return self.shape.ndims
 
   def get_shape(self):
-    """Alias of Tensor.shape."""
+    """Alias of `tf.Tensor.shape`."""
     return self.shape
 
   def set_shape(self, shape):
@@ -1206,18 +1209,26 @@ def convert_to_tensor_v2(value, dtype=None, dtype_hint=None, name=None):
   objects. It accepts `Tensor` objects, numpy arrays, Python lists,
   and Python scalars. For example:
 
-  ```python
-  import numpy as np
+  >>> def my_func(arg):
+  ...   arg = tf.convert_to_tensor(arg, dtype=tf.float32)
+  ...   return arg
 
-  def my_func(arg):
-    arg = tf.convert_to_tensor(arg, dtype=tf.float32)
-    return tf.matmul(arg, arg) + arg
-
-  # The following calls are equivalent.
-  value_1 = my_func(tf.constant([[1.0, 2.0], [3.0, 4.0]]))
-  value_2 = my_func([[1.0, 2.0], [3.0, 4.0]])
-  value_3 = my_func(np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32))
-  ```
+  >>> # The following calls are equivalent.
+  >>> value_1 = my_func(tf.constant([[1.0, 2.0], [3.0, 4.0]]))
+  >>> print(value_1)
+  tf.Tensor(
+    [[1. 2.]
+     [3. 4.]], shape=(2, 2), dtype=float32)
+  >>> value_2 = my_func([[1.0, 2.0], [3.0, 4.0]])
+  >>> print(value_2)
+  tf.Tensor(
+    [[1. 2.]
+     [3. 4.]], shape=(2, 2), dtype=float32)
+  >>> value_3 = my_func(np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32))
+  >>> print(value_3)
+  tf.Tensor(
+    [[1. 2.]
+     [3. 4.]], shape=(2, 2), dtype=float32)
 
   This function can be useful when composing a new operation in Python
   (such as `my_func` in the example above). All standard Python op
@@ -3092,8 +3103,16 @@ class Graph(object):
               op = func_graph.get_operation_by_name(node.name)
             except KeyError:
               continue
+            outputs = op.outputs
+
+            if op.type == "StatefulPartitionedCall":
+              # Filter out any extra outputs (possibly added by function
+              # backpropagation rewriting).
+              num_outputs = len(node.attr["Tout"].list.type)
+              outputs = outputs[:num_outputs]
+
             node.attr["_output_shapes"].list.shape.extend(
-                [output.get_shape().as_proto() for output in op.outputs])
+                [output.get_shape().as_proto() for output in outputs])
 
     return graph, self._version
 
@@ -5042,10 +5061,16 @@ def device(device_name_or_function):
 def device_v2(device_name):
   """Specifies the device for ops created/executed in this context.
 
-  `device_name` can be fully specified, as in "/job:worker/task:1/device:cpu:0",
-  or partially specified, containing only a subset of the "/"-separated
-  fields. Any fields which are specified override device annotations from outer
-  scopes. For example:
+  This function specifies the device to be used for ops created/executed in a
+  particular context. Nested contexts will inherit and also create/execute
+  their ops on the specified device. If a specific device is not required,
+  consider not using this function so that a device can be automatically
+  assigned.  In general the use of this function is optional. `device_name` can
+  be fully specified, as in "/job:worker/task:1/device:cpu:0", or partially
+  specified, containing only a subset of the "/"-separated fields. Any fields
+  which are specified will override device annotations from outer scopes.
+
+  For example:
 
   ```python
   with tf.device('/job:foo'):
diff --git a/tensorflow/python/framework/python_op_gen_internal.cc b/tensorflow/python/framework/python_op_gen_internal.cc
index a94e6d8e1ab..05102db0189 100644
--- a/tensorflow/python/framework/python_op_gen_internal.cc
+++ b/tensorflow/python/framework/python_op_gen_internal.cc
@@ -449,7 +449,12 @@ string AttrValueToPython(const string& type, const AttrValue& value,
       std::ostringstream s;
       s.imbue(std::locale::classic());
       s << std::setprecision(FLT_DIG) << value.f();
-      return s.str();
+      // If there is no I/O error for `std::ostringstream s` return s.str(),
+      // otherwise fallback to strings::StrCat(value.f()).
+      if (s.good()) {
+        return s.str();
+      }
+      return strings::StrCat(value.f());
     }
   } else if (type == "bool") {
     return value.b() ? "True" : "False";
diff --git a/tensorflow/python/framework/random_seed.py b/tensorflow/python/framework/random_seed.py
index cb7e0320dfa..eff0434f52e 100644
--- a/tensorflow/python/framework/random_seed.py
+++ b/tensorflow/python/framework/random_seed.py
@@ -96,9 +96,12 @@ def set_random_seed(seed):
     1. If neither the graph-level nor the operation seed is set:
       A random seed is used for this op.
     2. If the graph-level seed is set, but the operation seed is not:
-      The system deterministically (determined by the current graph size) picks
-      an operation seed in conjunction with the graph-level seed so that it gets
-      a unique random sequence.
+      The system deterministically picks an operation seed in conjunction with
+      the graph-level seed so that it gets a unique random sequence. Within the
+      same version of tensorflow and user code, this sequence is deterministic.
+      However across different versions, this sequence might change. If the
+      code depends on particular seeds to work, specify both graph-level
+      and operation-level seeds explicitly.
     3. If the graph-level seed is not set, but the operation seed is set:
       A default graph-level seed and the specified operation seed are used to
       determine the random sequence.
@@ -198,9 +201,13 @@ def set_seed(seed):
 
     1. If neither the global seed nor the operation seed is set: A randomly
       picked seed is used for this op.
-    2. If the operation seed is not set but the global seed is set: The system
-      picks an operation seed from a stream of seeds determined by the global
-      seed.
+    2. If the graph-level seed is set, but the operation seed is not:
+      The system deterministically picks an operation seed in conjunction with
+      the graph-level seed so that it gets a unique random sequence. Within the
+      same version of tensorflow and user code, this sequence is deterministic.
+      However across different versions, this sequence might change. If the
+      code depends on particular seeds to work, specify both graph-level
+      and operation-level seeds explicitly.
     3. If the operation seed is set, but the global seed is not set:
       A default global seed and the specified operation seed are used to
       determine the random sequence.
@@ -308,5 +315,4 @@ def set_seed(seed):
   Args:
     seed: integer.
   """
-  # TODO(go/tf2-random): change doc, update to match design doc
   set_random_seed(seed)
diff --git a/tensorflow/python/framework/tensor_shape.py b/tensorflow/python/framework/tensor_shape.py
index dc787d54269..e6470bf815d 100644
--- a/tensorflow/python/framework/tensor_shape.py
+++ b/tensorflow/python/framework/tensor_shape.py
@@ -808,7 +808,14 @@ class TensorShape(object):
 
   @property
   def dims(self):
-    """Returns a list of Dimensions, or None if the shape is unspecified."""
+    """Deprecated.  Returns list of dimensions for this shape.
+
+    Suggest `TensorShape.as_list` instead.
+
+    Returns:
+      A list containing `tf.compat.v1.Dimension`s, or None if the shape is
+      unspecified.
+    """
     return self._dims
 
   @property
diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py
index f28b10aaf1d..4fcee63f464 100644
--- a/tensorflow/python/framework/tensor_util.py
+++ b/tensorflow/python/framework/tensor_util.py
@@ -566,9 +566,9 @@ def MakeNdarray(tensor):
   """Create a numpy ndarray from a tensor.
 
   Create a numpy ndarray with the same shape and data as the tensor.
-  
+
   For example:
-  
+
   ```python
   # Tensor a has shape (2,3)
   a = tf.constant([[1,2,3],[4,5,6]])
diff --git a/tensorflow/python/framework/test_combinations.py b/tensorflow/python/framework/test_combinations.py
index 95a3dc4827e..0986585fc21 100644
--- a/tensorflow/python/framework/test_combinations.py
+++ b/tensorflow/python/framework/test_combinations.py
@@ -400,6 +400,9 @@ class NamedObject(object):
   def __call__(self, *args, **kwargs):
     return self._obj(*args, **kwargs)
 
+  def __iter__(self):
+    return self._obj.__iter__()
+
   def __repr__(self):
     return self._name
 
diff --git a/tensorflow/python/grappler/cluster_test.py b/tensorflow/python/grappler/cluster_test.py
index 2014c0dde3f..b192ba726f4 100644
--- a/tensorflow/python/grappler/cluster_test.py
+++ b/tensorflow/python/grappler/cluster_test.py
@@ -81,9 +81,9 @@ class ClusterTest(test.TestCase):
         self.assertLessEqual(1, len(peak_mem))
         snapshot = peak_mem['/job:localhost/replica:0/task:0/device:CPU:0']
         peak_usage = snapshot[0]
-        self.assertEqual(52, peak_usage)
+        self.assertEqual(12, peak_usage)
         live_tensors = snapshot[1]
-        self.assertEqual(15, len(live_tensors))
+        self.assertEqual(5, len(live_tensors))
 
   def testVirtualCluster(self):
     with ops.Graph().as_default() as g:
@@ -107,8 +107,8 @@ class ClusterTest(test.TestCase):
           disable_timeline=False,
           devices=[named_device])
       op_perfs, run_time, _ = grappler_cluster.MeasureCosts(grappler_item)
-      self.assertEqual(run_time, 0.000545)
-      self.assertEqual(len(op_perfs), 15)
+      self.assertEqual(run_time, 0.000209)
+      self.assertEqual(len(op_perfs), 5)
 
       estimated_perf = grappler_cluster.EstimatePerformance(named_device)
       self.assertEqual(7680.0, estimated_perf)
diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD
index d6fb60fd724..b90a208292b 100755
--- a/tensorflow/python/keras/BUILD
+++ b/tensorflow/python/keras/BUILD
@@ -450,7 +450,6 @@ py_library(
         "layers/cudnn_recurrent.py",
         "layers/dense_attention.py",
         "layers/embeddings.py",
-        "layers/image_preprocessing.py",
         "layers/kernelized.py",
         "layers/local.py",
         "layers/merge.py",
@@ -458,6 +457,7 @@ py_library(
         "layers/normalization.py",
         "layers/normalization_v2.py",
         "layers/pooling.py",
+        "layers/preprocessing/image_preprocessing.py",
         "layers/preprocessing/normalization.py",
         "layers/preprocessing/normalization_v1.py",
         "layers/preprocessing/text_vectorization.py",
@@ -766,7 +766,7 @@ cuda_py_test(
 cuda_py_test(
     name = "image_preprocessing_test",
     size = "medium",
-    srcs = ["layers/image_preprocessing_test.py"],
+    srcs = ["layers/preprocessing/image_preprocessing_test.py"],
     additional_deps = [
         ":keras",
         "@absl_py//absl/testing:parameterized",
diff --git a/tensorflow/python/keras/activations.py b/tensorflow/python/keras/activations.py
index 17af5d36b41..f26c5a117c2 100644
--- a/tensorflow/python/keras/activations.py
+++ b/tensorflow/python/keras/activations.py
@@ -260,8 +260,8 @@ def sigmoid(x):
 
   >>> a = tf.constant([-20, -1.0, 0.0, 1.0, 20], dtype = tf.float32)
   >>> b = tf.keras.activations.sigmoid(a)
-  >>> b.numpy() > 0.0
-  array([False,  True,  True,  True,  True])
+  >>> b.numpy() >= 0.0
+  array([ True,  True,  True,  True,  True])
 
   Arguments:
       x: Input tensor.
diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py
index 411dccac202..7122d6e3cc1 100644
--- a/tensorflow/python/keras/backend.py
+++ b/tensorflow/python/keras/backend.py
@@ -4440,6 +4440,12 @@ def softsign(x):
   return nn.softsign(x)
 
 
+def _backtrack_identity(tensor):
+  while tensor.op.type == 'Identity':
+    tensor = tensor.op.inputs[0]
+  return tensor
+
+
 @keras_export('keras.backend.categorical_crossentropy')
 def categorical_crossentropy(target, output, from_logits=False, axis=-1):
   """Categorical crossentropy between an output tensor and a target tensor.
@@ -4484,24 +4490,28 @@ def categorical_crossentropy(target, output, from_logits=False, axis=-1):
   dtype=float32)
 
   """
-  if not from_logits:
-    if (isinstance(output, (ops.EagerTensor, variables_module.Variable)) or
-        output.op.type != 'Softmax'):
-      # scale preds so that the class probas of each sample sum to 1
-      output = output / math_ops.reduce_sum(output, axis, True)
-      # Compute cross entropy from probabilities.
-      epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype)
-      output = clip_ops.clip_by_value(output, epsilon_, 1. - epsilon_)
-      return -math_ops.reduce_sum(target * math_ops.log(output), axis)
-    else:
+  if from_logits:
+    return nn.softmax_cross_entropy_with_logits_v2(
+        labels=target, logits=output, axis=axis)
+
+  if not isinstance(output, (ops.EagerTensor, variables_module.Variable)):
+    output = _backtrack_identity(output)
+    if output.op.type == 'Softmax':
       # When softmax activation function is used for output operation, we
       # use logits from the softmax function directly to compute loss in order
       # to prevent collapsing zero when training.
       # See b/117284466
       assert len(output.op.inputs) == 1
       output = output.op.inputs[0]
-  return nn.softmax_cross_entropy_with_logits_v2(
-      labels=target, logits=output, axis=axis)
+      return nn.softmax_cross_entropy_with_logits_v2(
+          labels=target, logits=output, axis=axis)
+
+  # scale preds so that the class probas of each sample sum to 1
+  output = output / math_ops.reduce_sum(output, axis, True)
+  # Compute cross entropy from probabilities.
+  epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype)
+  output = clip_ops.clip_by_value(output, epsilon_, 1. - epsilon_)
+  return -math_ops.reduce_sum(target * math_ops.log(output), axis)
 
 
 @keras_export('keras.backend.sparse_categorical_crossentropy')
@@ -4525,19 +4535,22 @@ def sparse_categorical_crossentropy(target, output, from_logits=False, axis=-1):
   Raises:
       ValueError: if `axis` is neither -1 nor one of the axes of `output`.
   """
-  if not from_logits:
-    if (isinstance(output, (ops.EagerTensor, variables_module.Variable)) or
-        output.op.type != 'Softmax'):
-      epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype)
-      output = clip_ops.clip_by_value(output, epsilon_, 1 - epsilon_)
-      output = math_ops.log(output)
-    else:
+  if not from_logits and not isinstance(
+      output, (ops.EagerTensor, variables_module.Variable)):
+    output = _backtrack_identity(output)
+    if output.op.type == 'Softmax':
       # When softmax activation function is used for output operation, we
       # use logits from the softmax function directly to compute loss in order
       # to prevent collapsing zero when training.
       # See b/117284466
       assert len(output.op.inputs) == 1
       output = output.op.inputs[0]
+      from_logits = True
+
+  if not from_logits:
+    epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype)
+    output = clip_ops.clip_by_value(output, epsilon_, 1 - epsilon_)
+    output = math_ops.log(output)
 
   if isinstance(output.shape, (tuple, list)):
     output_rank = len(output.shape)
@@ -4596,23 +4609,26 @@ def binary_crossentropy(target, output, from_logits=False):
   Returns:
       A tensor.
   """
-  if not from_logits:
-    if (isinstance(output, (ops.EagerTensor, variables_module.Variable)) or
-        output.op.type != 'Sigmoid'):
-      epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype)
-      output = clip_ops.clip_by_value(output, epsilon_, 1. - epsilon_)
+  if from_logits:
+    return nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
 
-      # Compute cross entropy from probabilities.
-      bce = target * math_ops.log(output + epsilon())
-      bce += (1 - target) * math_ops.log(1 - output + epsilon())
-      return -bce
-    else:
+  if not isinstance(output, (ops.EagerTensor, variables_module.Variable)):
+    output = _backtrack_identity(output)
+    if output.op.type == 'Sigmoid':
       # When sigmoid activation function is used for output operation, we
       # use logits from the sigmoid function directly to compute loss in order
       # to prevent collapsing zero when training.
       assert len(output.op.inputs) == 1
       output = output.op.inputs[0]
-  return nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
+      return nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
+
+  epsilon_ = _constant_to_tensor(epsilon(), output.dtype.base_dtype)
+  output = clip_ops.clip_by_value(output, epsilon_, 1. - epsilon_)
+
+  # Compute cross entropy from probabilities.
+  bce = target * math_ops.log(output + epsilon())
+  bce += (1 - target) * math_ops.log(1 - output + epsilon())
+  return -bce
 
 
 @keras_export('keras.backend.sigmoid')
diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py
index 9b6f92385ba..8d8d24fae2c 100644
--- a/tensorflow/python/keras/backend_test.py
+++ b/tensorflow/python/keras/backend_test.py
@@ -1578,6 +1578,15 @@ class BackendNNOpsTest(test.TestCase, parameterized.TestCase):
 
 class BackendCrossEntropyLossesTest(test.TestCase):
 
+  @test_util.run_in_graph_and_eager_modes
+  def test_binary_crossentropy_with_sigmoid(self):
+    t = keras.backend.constant([[0, 1, 0]])
+    logits = keras.backend.constant([[8., 1., 1.]])
+    p = keras.backend.sigmoid(logits)
+    p = array_ops.identity(array_ops.identity(p))
+    result = self.evaluate(keras.backend.binary_crossentropy(t, p))
+    self.assertArrayNear(result[0], [8., 0.313, 1.313], 1e-3)
+
   @test_util.run_in_graph_and_eager_modes
   def test_categorical_crossentropy_loss(self):
     t = keras.backend.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
@@ -1637,6 +1646,15 @@ class BackendCrossEntropyLossesTest(test.TestCase):
     result = f([t_val, p_val])
     self.assertArrayNear(result, [.002, .003, .036], 1e-3)
 
+  @test_util.run_in_graph_and_eager_modes
+  def test_categorical_crossentropy_with_softmax(self):
+    t = keras.backend.constant([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
+    logits = keras.backend.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]])
+    p = keras.backend.softmax(logits)
+    p = array_ops.identity(array_ops.identity(p))
+    result = self.evaluate(keras.backend.categorical_crossentropy(t, p))
+    self.assertArrayNear(result, [0.002, 0.0005, 0.17], 1e-3)
+
   @test_util.run_in_graph_and_eager_modes
   def test_sparse_categorical_crossentropy_loss(self):
     t = keras.backend.constant([0, 1, 2])
@@ -1702,6 +1720,15 @@ class BackendCrossEntropyLossesTest(test.TestCase):
 
       _ = f([t_val, p_val])
 
+  @test_util.run_in_graph_and_eager_modes
+  def test_sparse_categorical_crossentropy_with_softmax(self):
+    t = keras.backend.constant([0, 1, 2])
+    logits = keras.backend.constant([[8., 1., 1.], [0., 9., 1.], [2., 3., 5.]])
+    p = keras.backend.softmax(logits)
+    p = array_ops.identity(array_ops.identity(p))
+    result = self.evaluate(keras.backend.sparse_categorical_crossentropy(t, p))
+    self.assertArrayNear(result, [0.002, 0.0005, 0.17], 1e-3)
+
 
 @test_util.run_all_in_graph_and_eager_modes
 @test_util.with_control_flow_v2
diff --git a/tensorflow/python/keras/callbacks.py b/tensorflow/python/keras/callbacks.py
index bc2f0461fbc..1239ab40f98 100644
--- a/tensorflow/python/keras/callbacks.py
+++ b/tensorflow/python/keras/callbacks.py
@@ -811,18 +811,51 @@ class History(Callback):
 
 @keras_export('keras.callbacks.ModelCheckpoint')
 class ModelCheckpoint(Callback):
-  """Save the model after every epoch.
+  """Callback to save the Keras model or model weights at some frequency.
 
-  `filepath` can contain named formatting options,
-  which will be filled the value of `epoch` and
-  keys in `logs` (passed in `on_epoch_end`).
+  `ModelCheckpoint` callback is used in conjunction with training using
+  `model.fit()` to save a model or weights (in a checkpoint file) at some
+  interval, so the model or weights can be loaded later to continue the training
+  from the state saved.
 
-  For example: if `filepath` is `weights.{epoch:02d}-{val_loss:.2f}.hdf5`,
-  then the model checkpoints will be saved with the epoch number and
-  the validation loss in the filename.
+  A few options this callback provides include:
+
+  - Whether to only keep the model that has achieved the "best performance" so
+    far, or whether to save the model at the end of every epoch regardless of
+    performance.
+  - Definition of 'best'; which quantity to monitor and whether it should be
+    maximized or minimized.
+  - The frequency it should save at. Currently, the callback supports saving at
+    the end of every epoch, or after a fixed number of training samples.
+  - Whether only weights are saved, or the whole model is saved.
+
+  Example:
+
+  ```python
+  EPOCHS = 10
+  checkpoint_filepath = '/tmp/checkpoint'
+  model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
+      filepath=checkpoint_filepath,
+      save_weights_only=True,
+      monitor='val_acc',
+      mode='max',
+      save_best_only=True)
+
+  # Model weights are saved at the end of every epoch, if it's the best seen
+  # so far.
+  model.fit(epochs=EPOCHS, callbacks=[model_checkpoint_callback])
+
+  # The model weights (that are considered the best) are loaded into the model.
+  model.load_weights(checkpoint_filepath)
+  ```
 
   Arguments:
-      filepath: string, path to save the model file.
+      filepath: string, path to save the model file. `filepath` can contain
+        named formatting options, which will be filled the value of `epoch` and
+        keys in `logs` (passed in `on_epoch_end`). For example: if `filepath` is
+        `weights.{epoch:02d}-{val_loss:.2f}.hdf5`, then the model checkpoints
+        will be saved with the epoch number and the validation loss in the
+        filename.
       monitor: quantity to monitor.
       verbose: verbosity mode, 0 or 1.
       save_best_only: if `save_best_only=True`, the latest best model according
@@ -1052,7 +1085,14 @@ class ModelCheckpoint(Callback):
     # pylint: disable=protected-access
     if not self.model._in_multi_worker_mode(
     ) or multi_worker_util.should_save_checkpoint():
-      return self.filepath.format(epoch=epoch + 1, **logs)
+      try:
+        # `filepath` may contain placeholders such as `{epoch:02d}` and
+        # `{mape:.2f}`. A mismatch between logged metrics and the path's
+        # placeholders can cause formatting to fail.
+        return self.filepath.format(epoch=epoch + 1, **logs)
+      except KeyError as e:
+        raise KeyError('Failed to format this callback filepath: "{}". '
+                       'Reason: {}'.format(self.filepath, e))
     else:
       # If this is multi-worker training, and this worker should not
       # save checkpoint, we use a temp filepath to store a dummy checkpoint, so
diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py
index 46c11a14838..164d9ba01b8 100644
--- a/tensorflow/python/keras/callbacks_test.py
+++ b/tensorflow/python/keras/callbacks_test.py
@@ -828,6 +828,18 @@ class KerasCallbacksTest(keras_parameterized.TestCase):
                                           'filepath for ModelCheckpoint.'):
       model.fit(train_ds, epochs=1, callbacks=[callback])
 
+  def test_ModelCheckpoint_with_bad_path_placeholders(self):
+    (model, train_ds, callback,
+     filepath) = self._get_dummy_resource_for_model_checkpoint_testing()
+
+    temp_dir = self.get_temp_dir()
+    filepath = os.path.join(temp_dir, 'chkpt_{epoch:02d}_{mape:.2f}.h5')
+    callback = keras.callbacks.ModelCheckpoint(filepath=filepath)
+
+    with self.assertRaisesRegexp(KeyError, 'Failed to format this callback '
+                                           'filepath.*'):
+      model.fit(train_ds, epochs=1, callbacks=[callback])
+
   def test_EarlyStopping(self):
     with self.cached_session():
       np.random.seed(123)
diff --git a/tensorflow/python/keras/distribute/keras_utils_test.py b/tensorflow/python/keras/distribute/keras_utils_test.py
index bf328e447c1..f88783c1d15 100644
--- a/tensorflow/python/keras/distribute/keras_utils_test.py
+++ b/tensorflow/python/keras/distribute/keras_utils_test.py
@@ -349,32 +349,6 @@ class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
             verbose=0,
             callbacks=[keras.callbacks.ReduceLROnPlateau()])
 
-  @combinations.generate(
-      combinations.combine(
-          distribution=[strategy_combinations.one_device_strategy],
-          mode=['eager'],
-          experimental_run_tf_function=[True, False]))
-  def test_distribution_strategy_with_run_eagerly(self, distribution,
-                                                  experimental_run_tf_function):
-    with distribution.scope():
-      x = keras.layers.Input(shape=(1,))
-      y = keras.layers.Dense(1, kernel_initializer='ones')(x)
-      model = keras.models.Model(x, y)
-
-      if experimental_run_tf_function:
-        model.compile(
-            'sgd',
-            run_eagerly=True,
-            experimental_run_tf_function=experimental_run_tf_function)
-      else:
-        err_msg = ('We currently do not support enabling `run_eagerly` with '
-                   'distribution strategy.')
-        with self.assertRaisesRegex(ValueError, err_msg):
-          model.compile(
-              'sgd',
-              run_eagerly=True,
-              experimental_run_tf_function=experimental_run_tf_function)
-
   @combinations.generate(
       combinations.combine(
           distribution=[
@@ -631,11 +605,8 @@ class TestDistributionStrategyValidation(test.TestCase, parameterized.TestCase):
               experimental_run_tf_function=experimental_run_tf_function)
 
   @combinations.generate(
-      combinations.times(
-          keras_test_lib.all_strategy_combinations_minus_default(),
-          combinations.combine(experimental_run_tf_function=[True, False])))
-  def test_model_outside_scope(self, distribution,
-                               experimental_run_tf_function):
+      keras_test_lib.all_strategy_combinations_minus_default())
+  def test_model_outside_scope(self, distribution):
     with self.cached_session():
       with self.assertRaisesRegexp(
           ValueError, 'was not created in the distribution strategy'):
@@ -646,11 +617,7 @@ class TestDistributionStrategyValidation(test.TestCase, parameterized.TestCase):
           optimizer = gradient_descent.GradientDescentOptimizer(0.001)
           loss = 'mse'
           metrics = ['mae', keras.metrics.CategoricalAccuracy()]
-          model.compile(
-              optimizer,
-              loss,
-              metrics=metrics,
-              experimental_run_tf_function=experimental_run_tf_function)
+          model.compile(optimizer, loss, metrics=metrics)
 
 
 class TestDistributionStrategyWithStaticShapes(test.TestCase,
diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py
index 276431b615b..b5d1d31219b 100644
--- a/tensorflow/python/keras/engine/base_layer.py
+++ b/tensorflow/python/keras/engine/base_layer.py
@@ -124,7 +124,7 @@ class Layer(module.Module):
       using Python control flow. If `False`, we assume that the layer can
       safely be used to generate a static computation graph.
 
-  Read-only properties:
+  Attributes (read-only properties):
     name: The name of the layer (string).
     dtype: The dtype of the layer's computations and weights. If mixed
       precision is used with a `tf.keras.mixed_precision.experimental.Policy`,
@@ -1264,6 +1264,36 @@ class Layer(module.Module):
   def set_weights(self, weights):
     """Sets the weights of the layer, from Numpy arrays.
 
+    The weights of a layer represent the state of the layer. This function
+    sets the weight values from numpy arrays. The weight values should be
+    passed in the order they are created by the layer. Note that the layer's
+    weights must be instantiated before calling this function by calling
+    the layer.
+
+    For example, a Dense layer returns a list of two values-- per-output
+    weights and the bias value. These can be used to set the weights of another
+    Dense layer:
+
+    >>> a = tf.keras.layers.Dense(1,
+    ...   kernel_initializer=tf.constant_initializer(1.))
+    >>> a_out = a(tf.convert_to_tensor([[1., 2., 3.]]))
+    >>> a.get_weights()
+    [array([[1.],
+           [1.],
+           [1.]], dtype=float32), array([0.], dtype=float32)]
+    >>> b = tf.keras.layers.Dense(1,
+    ...   kernel_initializer=tf.constant_initializer(2.))
+    >>> b_out = b(tf.convert_to_tensor([[10., 20., 30.]]))
+    >>> b.get_weights()
+    [array([[2.],
+           [2.],
+           [2.]], dtype=float32), array([0.], dtype=float32)]
+    >>> b.set_weights(a.get_weights())
+    >>> b.get_weights()
+    [array([[1.],
+           [1.],
+           [1.]], dtype=float32), array([0.], dtype=float32)]
+
     Arguments:
         weights: a list of Numpy arrays. The number
             of arrays and their shape must match
@@ -1314,6 +1344,35 @@ class Layer(module.Module):
   def get_weights(self):
     """Returns the current weights of the layer.
 
+    The weights of a layer represent the state of the layer. This function
+    returns both trainable and non-trainable weight values associated with this
+    layer as a list of Numpy arrays, which can in turn be used to load state
+    into similarly parameterized layers.
+
+    For example, a Dense layer returns a list of two values-- per-output
+    weights and the bias value. These can be used to set the weights of another
+    Dense layer:
+
+    >>> a = tf.keras.layers.Dense(1,
+    ...   kernel_initializer=tf.constant_initializer(1.))
+    >>> a_out = a(tf.convert_to_tensor([[1., 2., 3.]]))
+    >>> a.get_weights()
+    [array([[1.],
+           [1.],
+           [1.]], dtype=float32), array([0.], dtype=float32)]
+    >>> b = tf.keras.layers.Dense(1,
+    ...   kernel_initializer=tf.constant_initializer(2.))
+    >>> b_out = b(tf.convert_to_tensor([[10., 20., 30.]]))
+    >>> b.get_weights()
+    [array([[2.],
+           [2.],
+           [2.]], dtype=float32), array([0.], dtype=float32)]
+    >>> b.set_weights(a.get_weights())
+    >>> b.get_weights()
+    [array([[1.],
+           [1.],
+           [1.]], dtype=float32), array([0.], dtype=float32)]
+
     Returns:
         Weights values as a list of numpy arrays.
     """
diff --git a/tensorflow/python/keras/engine/data_adapter.py b/tensorflow/python/keras/engine/data_adapter.py
index 1474cf7a127..50db978e77a 100644
--- a/tensorflow/python/keras/engine/data_adapter.py
+++ b/tensorflow/python/keras/engine/data_adapter.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import abc
+import collections
 import itertools
 import math
 import random
@@ -744,6 +745,7 @@ class GeneratorDataAdapter(DataAdapter):
     # Since we have to know the dtype of the python generator when we build the
     # dataset, we have to look at a batch to infer the structure.
     peek, x = self._peek_and_restore(x)
+    assert_not_namedtuple(peek)
 
     (peek, wrap_in_tuple, elements_to_keep, partial_sample_weight,
      sample_weight_modes, nested_shape, nested_dtypes
@@ -1093,3 +1095,18 @@ def broadcast_sample_weight_modes(target_structure, sample_weight_modes):
             "structure:\n  {}\n    to  \n  {}".format(target_str, mode_str))
 
   return sample_weight_modes
+
+
+def assert_not_namedtuple(x):
+  if (isinstance(x, tuple) and
+      # TODO(b/144192902): Use a namedtuple checking utility.
+      hasattr(x, "_fields") and
+      isinstance(x._fields, collections.Sequence) and
+      all(isinstance(f, six.string_types) for f in x._fields)):
+    raise ValueError(
+        "Received namedtuple ({}) with fields `{}` as input. namedtuples "
+        "cannot, in general, be unambiguously resolved into `x`, `y`, "
+        "and `sample_weight`. For this reason Keras has elected not to "
+        "support them. If you would like the value to be unpacked, "
+        "please explicitly convert it to a tuple before passing it to "
+        "Keras.".format(x.__class__, x._fields))
diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py
index 36570e36cc8..78c4feb7be9 100644
--- a/tensorflow/python/keras/engine/training.py
+++ b/tensorflow/python/keras/engine/training.py
@@ -21,10 +21,9 @@ from __future__ import print_function
 import collections
 import numpy as np
 
-from tensorflow.python import tf2
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import iterator_ops
-from tensorflow.python.distribute import distribution_strategy_context
+from tensorflow.python.distribute import distribution_strategy_context as ds_context
 from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
 from tensorflow.python.eager import monitoring
@@ -43,10 +42,7 @@ from tensorflow.python.keras import metrics as metrics_module
 from tensorflow.python.keras import optimizers
 from tensorflow.python.keras.distribute import distributed_training_utils
 from tensorflow.python.keras.engine import network
-from tensorflow.python.keras.engine import training_arrays
 from tensorflow.python.keras.engine import training_distributed
-from tensorflow.python.keras.engine import training_eager
-from tensorflow.python.keras.engine import training_generator
 from tensorflow.python.keras.engine import training_utils
 from tensorflow.python.keras.engine import training_v2
 from tensorflow.python.keras.engine import training_v2_utils
@@ -66,7 +62,6 @@ from tensorflow.python.training.tracking import layer_utils as trackable_layer_u
 from tensorflow.python.util import deprecation
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_inspect
-from tensorflow.python.util.compat import collections_abc
 from tensorflow.python.util.tf_export import keras_export
 
 try:
@@ -146,27 +141,14 @@ class Model(network.Network, version_utils.VersionSelector):
   def __init__(self, *args, **kwargs):
     super(Model, self).__init__(*args, **kwargs)
     _keras_api_gauge.get_cell('model').set(True)
-    # initializing _distribution_strategy here since it is possible to call
-    # predict on a model without compiling it.
-    self._distribution_strategy = None
-    self._compile_time_distribution_strategy = None
-    if (ops.executing_eagerly_outside_functions() and
-        distribution_strategy_context.has_strategy()):
-      self._set_strategy(
-          distribution_strategy_context.get_strategy())
-
-    # This flag is used to track if the user is using the deprecated path of
-    # passing distribution strategy to compile rather than creating the model
-    # under distribution strategy scope.
-    self._compile_distribution = False
 
+    # Model must be created under scope of DistStrat it will be trained with.
+    if ds_context.has_strategy():
+      self._distribution_strategy = ds_context.get_strategy()
+    else:
+      self._distribution_strategy = None
+    # Defaults to value of `tf.config.experimental_functions_run_eagerly`.
     self._run_eagerly = None
-    self._experimental_run_tf_function = (
-        ops.executing_eagerly_outside_functions())
-
-  @trackable.no_automatic_dependency_tracking
-  def _set_strategy(self, strategy):
-    self._compile_time_distribution_strategy = strategy
 
   def get_weights(self):
     """Retrieves the weights of the model.
@@ -174,12 +156,8 @@ class Model(network.Network, version_utils.VersionSelector):
     Returns:
         A flat list of Numpy arrays.
     """
-    strategy = (self._distribution_strategy or
-                self._compile_time_distribution_strategy)
-    if strategy:
-      with strategy.scope():
-        return super(Model, self).get_weights()
-    return super(Model, self).get_weights()
+    with self.distribute_strategy.scope():
+      return super(Model, self).get_weights()
 
   def load_weights(self, filepath, by_name=False, skip_mismatch=False):
     """Loads all layer weights, either from a TensorFlow or an HDF5 weight file.
@@ -242,8 +220,6 @@ class Model(network.Network, version_utils.VersionSelector):
               loss_weights=None,
               sample_weight_mode=None,
               weighted_metrics=None,
-              target_tensors=None,
-              distribute=None,
               **kwargs):
     """Configures the model for training.
 
@@ -283,84 +259,15 @@ class Model(network.Network, version_utils.VersionSelector):
             dictionary or a list of modes.
         weighted_metrics: List of metrics to be evaluated and weighted
             by sample_weight or class_weight during training and testing.
-        target_tensors: By default, Keras will create placeholders for the
-            model's target, which will be fed with the target data during
-            training. If instead you would like to use your own
-            target tensors (in turn, Keras will not expect external
-            Numpy data for these targets at training time), you
-            can specify them via the `target_tensors` argument. It can be
-            a single tensor (for a single-output model), a list of tensors,
-            or a dict mapping output names to target tensors.
-        distribute: NOT SUPPORTED IN TF 2.0, please create and compile the
-            model under distribution strategy scope instead of passing it to
-            compile.
         **kwargs: Any additional arguments.
 
     Raises:
         ValueError: In case of invalid arguments for
             `optimizer`, `loss`, `metrics` or `sample_weight_mode`.
     """
+    self._validate_compile(optimizer, **kwargs)
     self._run_eagerly = kwargs.pop('run_eagerly', None)
-    self._experimental_run_tf_function = kwargs.pop(
-        'experimental_run_tf_function', True)
-
-    # Prepare Session arguments (legacy).
-    kwargs.pop('cloning', None)  # Legacy DistStrat argument, never used.
-    allowed_kwargs = {'feed_dict', 'fetches', 'options', 'run_metadata'}
-    unknown_kwargs = set(kwargs.keys()) - allowed_kwargs
-    if unknown_kwargs:
-      raise TypeError(
-          'Invalid keyword argument(s) in `compile`: %s' % (unknown_kwargs,))
-    self._function_kwargs = kwargs
-    if self._function_kwargs:
-      self._experimental_run_tf_function = False
-      if self.run_eagerly:
-        raise ValueError(
-            'Session keyword arguments are not supported '
-            'when `run_eagerly=True`. You passed the following '
-            'Session arguments: %s' % (self._function_kwargs,))
-
     self._set_optimizer(optimizer)
-    is_any_keras_optimizer_v1 = any(
-        (isinstance(opt, optimizers.Optimizer)
-         and not isinstance(opt, optimizers.TFOptimizer)
-        ) for opt in nest.flatten(self.optimizer))
-
-    if is_any_keras_optimizer_v1 and ops.executing_eagerly_outside_functions():
-      raise ValueError('`tf.compat.v1.keras` Optimizer (', optimizer, ') is '
-                       'not supported when eager execution is enabled. Use a '
-                       '`tf.keras` Optimizer instead, or disable eager '
-                       'execution.')
-
-    if ((target_tensors is not None)
-        or not ops.executing_eagerly_outside_functions()):
-      # Fallback out of things that aren't supported with v2 loops
-      self._experimental_run_tf_function = False
-
-    if distribute is not None:
-      if tf2.enabled() or self._experimental_run_tf_function:
-        raise ValueError(
-            'Distribute argument in compile is not available in TF 2.0 please '
-            'create the model under the distribution strategy scope.')
-      logging.warning('Distribute argument in compile is deprecated please '
-                      'create the model under the distribution strategy scope.')
-      self._distribution_strategy = distribute
-      self._compile_distribution = True
-    else:
-      if distribution_strategy_context.has_strategy():
-        # When the user builds the model in the DS scope and cross replica
-        # context we want distribution strategy to be set but when building the
-        # replica copies of the models internally we should not be compiling
-        # with distribution strategy and use the default compilation path.
-        if distribution_strategy_context.in_cross_replica_context():
-          self._distribution_strategy = (
-              distribution_strategy_context.get_strategy())
-
-    if not self._experimental_run_tf_function:
-      self._validate_compile_param_for_distribution_strategy(self.run_eagerly,
-                                                             sample_weight_mode,
-                                                             target_tensors,
-                                                             weighted_metrics)
     # We've disabled automatic dependency tracking for this method, but do want
     # to add a checkpoint dependency on the optimizer if it's trackable.
     if isinstance(self.optimizer, trackable.Trackable):
@@ -371,10 +278,6 @@ class Model(network.Network, version_utils.VersionSelector):
     self.sample_weight_mode = sample_weight_mode
     self._compile_metrics = metrics or []
     self._compile_weighted_metrics = weighted_metrics
-    if self.run_eagerly and target_tensors is not None:
-      raise ValueError(
-          'target_tensors argument is not supported when '
-          'running a model eagerly.')
 
     # _training_endpoints contains a list of _TrainingEndpoint object, which has
     # all the model output/target/loss and related metadata.
@@ -387,14 +290,9 @@ class Model(network.Network, version_utils.VersionSelector):
     self._distributed_model_cache = {}
     self._distributed_function_cache = {}
 
-    # Clear any `_eager_losses` that was added.
+    # Clear any `_eager_losses` cached from a previous `Model.__call__`.
     self._clear_losses()
 
-    if (not context.executing_eagerly() and
-        self._distribution_strategy is not None):
-      # Ensures a Session is created and configured correctly for Distribution
-      # Strategy.
-      K.configure_and_create_distributed_session(self._distribution_strategy)
     # Initialize model metric attributes.
     self._init_metric_attributes()
     if not self.built or not self.inputs or not self.outputs:
@@ -409,8 +307,7 @@ class Model(network.Network, version_utils.VersionSelector):
     self.loss_functions = training_utils.prepare_loss_functions(
         self.loss, self.output_names)
 
-    target_tensors = self._process_target_tensor_for_compile(target_tensors)
-
+    target_tensors = self._process_target_tensor_for_compile(None)
     for o, n, l, t in zip(self.outputs, self.output_names,
                           self.loss_functions, target_tensors):
       endpoint = _TrainingEndpoint(o, n, l)
@@ -456,21 +353,6 @@ class Model(network.Network, version_utils.VersionSelector):
       # Collected trainable weights, sorted in topological order.
       self._collected_trainable_weights = self.trainable_weights
 
-      # Validate all variables were correctly created in distribution scope.
-      if self._distribution_strategy and not self._compile_distribution:
-        for v in self.variables:
-          strategy = self._distribution_strategy
-          if not strategy.extended.variable_created_in_scope(v):
-            raise ValueError(
-                'Variable (%s) was not created in the distribution strategy '
-                'scope of (%s). It is most likely due to not all layers or '
-                'the model or optimizer being created outside the distribution '
-                'strategy scope. Try to make sure your code looks similar '
-                'to the following.\n'
-                'with strategy.scope():\n'
-                '  model=_create_model()\n'
-                '  model.compile(...)'% (v, strategy))
-
   @trackable.no_automatic_dependency_tracking
   def _init_distributed_function_cache_if_not_compiled(self):
     if not hasattr(self, '_distributed_function_cache'):
@@ -506,6 +388,13 @@ class Model(network.Network, version_utils.VersionSelector):
     metrics_names += [m.name for m in self.metrics]
     return metrics_names
 
+  @property
+  def distribute_strategy(self):
+    """The `tf.distribute.Strategy` this model was created under."""
+    if self._distribution_strategy is None:
+      return ds_context._get_default_strategy()  # pylint: disable=protected-access
+    return self._distribution_strategy
+
   @property
   def run_eagerly(self):
     """Settable attribute indicating whether the model should run eagerly.
@@ -563,36 +452,11 @@ class Model(network.Network, version_utils.VersionSelector):
                        'original `Dataset` object instead of passing in '
                        '`iter(dataset)`.')
 
-    # Experiment training loop with default DS path.
-    if context.executing_eagerly() and self._experimental_run_tf_function:
-      if self._in_multi_worker_mode():
-        return training_distributed.DistributionMultiWorkerTrainingLoop(
-            training_v2.Loop())
-      else:
-        return training_v2.Loop()
-
-    # Case 1: distribution strategy.
-    if self._distribution_strategy:
-      if self._in_multi_worker_mode():
-        return training_distributed.DistributionMultiWorkerTrainingLoop(
-            training_distributed.DistributionSingleWorkerTrainingLoop())
-      else:
-        return training_distributed.DistributionSingleWorkerTrainingLoop()
-
-    # Case 2: generator-like. Input is Python generator, or Sequence object,
-    # or a non-distributed Dataset or iterator in eager execution.
-    if data_utils.is_generator_or_sequence(inputs):
-      return training_generator.GeneratorOrSequenceTrainingLoop()
-    if training_utils.is_eager_dataset_or_iterator(inputs):
-      return training_generator.EagerDatasetOrIteratorTrainingLoop()
-
-    # Case 3: Symbolic tensors or Numpy array-like.
-    # This includes Datasets and iterators in graph mode (since they
-    # generate symbolic tensors).
-    if self.run_eagerly:
-      return training_generator.GeneratorLikeTrainingLoop()
+    if self._in_multi_worker_mode():
+      return training_distributed.DistributionMultiWorkerTrainingLoop(
+          training_v2.Loop())
     else:
-      return training_arrays.ArrayLikeTrainingLoop()
+      return training_v2.Loop()
 
   def fit(self,
           x=None,
@@ -629,6 +493,8 @@ class Model(network.Network, version_utils.VersionSelector):
             `(inputs, targets, sample_weights)`.
           - A generator or `keras.utils.Sequence` returning `(inputs, targets)`
             or `(inputs, targets, sample weights)`.
+          A more detailed description of unpacking behavior for iterator types
+          (Dataset, generator, Sequence) is given below.
         y: Target data. Like the input data `x`,
           it could be either Numpy array(s) or TensorFlow tensor(s).
           It should be consistent with `x` (you cannot have Numpy inputs and
@@ -750,6 +616,30 @@ class Model(network.Network, version_utils.VersionSelector):
             the generator as they can't be passed easily to children processes.
         **kwargs: Used for backwards compatibility.
 
+    Unpacking behavior for iterator-like inputs:
+        A common pattern is to pass a tf.data.Dataset, generator, or
+      tf.keras.utils.Sequence to the `x` argument of fit, which will in fact
+      yield not only features (x) but optionally targets (y) and sample weights.
+      Keras requires that the output of such iterator-likes be unambiguous. The
+      iterator should return a tuple of length 1, 2, or 3, where the optional
+      second and third elements will be used for y and sample_weight
+      respectively. Any other type provided will be wrapped in a length one
+      tuple, effectively treating everything as 'x'. When yielding dicts, they
+      should still adhere to the top-level tuple structure.
+      e.g. `({"x0": x0, "x1": x1}, y)`. Keras will not attempt to separate
+      features, targets, and weights from the keys of a single dict.
+        A notable unsupported data type is the namedtuple. The reason is that
+      it behaves like both an ordered datatype (tuple) and a mapping
+      datatype (dict). So given a namedtuple of the form:
+          `namedtuple("example_tuple", ["y", "x"])`
+      it is ambiguous whether to reverse the order of the elements when
+      interpreting the value. Even worse is a tuple of the form:
+          `namedtuple("other_tuple", ["x", "y", "z"])`
+      where it is unclear if the tuple was intended to be unpacked into x, y,
+      and sample_weight or passed through as a single element to `x`. As a
+      result the data processing code will simply raise a ValueError if it
+      encounters a namedtuple. (Along with instructions to remedy the issue.)
+
     Returns:
         A `History` object. Its `History.history` attribute is
         a record of training loss values and metrics values
@@ -821,6 +711,9 @@ class Model(network.Network, version_utils.VersionSelector):
             if the model has named inputs.
           - A `tf.data` dataset.
           - A generator or `keras.utils.Sequence` instance.
+          A more detailed description of unpacking behavior for iterator types
+          (Dataset, generator, Sequence) is given in the `Unpacking behavior
+          for iterator-like inputs` section of `Model.fit`.
         y: Target data. Like the input data `x`,
           it could be either Numpy array(s) or TensorFlow tensor(s).
           It should be consistent with `x` (you cannot have Numpy inputs and
@@ -874,6 +767,9 @@ class Model(network.Network, version_utils.VersionSelector):
             multiprocessing, you should not pass non-picklable arguments to
             the generator as they can't be passed easily to children processes.
 
+    See the discussion of `Unpacking behavior for iterator-like inputs` for
+    `Model.fit`.
+
     Returns:
         Scalar test loss (if the model has a single output and no metrics)
         or list of scalars (if the model has multiple outputs
@@ -923,6 +819,9 @@ class Model(network.Network, version_utils.VersionSelector):
             (in case the model has multiple inputs).
           - A `tf.data` dataset.
           - A generator or `keras.utils.Sequence` instance.
+          A more detailed description of unpacking behavior for iterator types
+          (Dataset, generator, Sequence) is given in the `Unpacking behavior
+          for iterator-like inputs` section of `Model.fit`.
         batch_size: Integer or `None`.
             Number of samples per gradient update.
             If unspecified, `batch_size` will default to 32.
@@ -953,6 +852,10 @@ class Model(network.Network, version_utils.VersionSelector):
             multiprocessing, you should not pass non-picklable arguments to
             the generator as they can't be passed easily to children processes.
 
+    See the discussion of `Unpacking behavior for iterator-like inputs` for
+    `Model.fit`. Note that Model.predict uses the same interpretation rules as
+    `Model.fit` and `Model.evaluate`, so inputs must be unambiguous for all
+    three methods.
 
     Returns:
         Numpy array(s) of predictions.
@@ -985,10 +888,6 @@ class Model(network.Network, version_utils.VersionSelector):
     for m in metrics:
       m.reset_states()
 
-    # Reset metrics on all the distributed (cloned) models.
-    if self._distribution_strategy:
-      distributed_training_utils._reset_metrics(self)  # pylint: disable=protected-access
-
   def train_on_batch(self,
                      x,
                      y=None,
@@ -1038,62 +937,19 @@ class Model(network.Network, version_utils.VersionSelector):
     """
     self._assert_compile_was_called()
     self._check_call_args('train_on_batch')
-    if self._experimental_run_tf_function:
-      outputs = training_v2_utils.train_on_batch(
-          self, x, y=y, sample_weight=sample_weight,
-          class_weight=class_weight, reset_metrics=reset_metrics,
-          standalone=True)
-      outputs = (outputs['total_loss'] + outputs['output_losses'] +
-                 outputs['metrics'])
-      outputs = [
-          training_v2_utils._non_none_constant_value(v) for v in outputs]  # pylint: disable=protected-access
-      if len(outputs) == 1:
-        outputs = outputs[0]
-      return outputs
-
-    # If at this point we are in the replica context, then it is okay to execute
-    # the Eager code path.  The expected way to get here is to call `fit` that
-    # calls `train_on_batch` on each replica.
-    if (self._distribution_strategy and
-        distribution_strategy_context.in_cross_replica_context()):
-      raise NotImplementedError('`train_on_batch` is not supported for models '
-                                'distributed with tf.distribute.Strategy.')
-    # Validate and standardize user data.
-    x, y, sample_weights = self._standardize_user_data(
-        x, y, sample_weight=sample_weight, class_weight=class_weight,
-        extract_tensors_from_dataset=True)
-
-    # If `self._distribution_strategy` is True, then we are in a replica context
-    # at this point because of the check above.  `train_on_batch` is being run
-    # for each replica by `self._distribution_strategy` and the same code path
-    # as Eager is expected to be taken.
-    if self.run_eagerly or self._distribution_strategy:
-      output_dict = training_eager.train_on_batch(
-          self,
-          x,
-          y,
-          sample_weights=sample_weights,
-          output_loss_metrics=self._output_loss_metrics)
-      outputs = (output_dict['total_loss'] + output_dict['output_losses']
-                 + output_dict['metrics'])
-      outputs = [
-          training_v2_utils._non_none_constant_value(v) for v in outputs]  # pylint: disable=protected-access
-    else:
-      x = training_utils.ModelInputs(x).as_list()
-      ins = x + list(y or []) + list(sample_weights or [])
-
-      if not isinstance(K.symbolic_learning_phase(), int):
-        ins += [True]  # Add learning phase value.
-
-      self._update_sample_weight_modes(sample_weights=sample_weights)
-      self._make_train_function()
-      outputs = self.train_function(ins)  # pylint: disable=not-callable
-
-    if reset_metrics:
-      self.reset_metrics()
-
+    outputs = training_v2_utils.train_on_batch(
+        self,
+        x,
+        y=y,
+        sample_weight=sample_weight,
+        class_weight=class_weight,
+        reset_metrics=reset_metrics,
+        standalone=True)
+    outputs = (
+        outputs['total_loss'] + outputs['output_losses'] + outputs['metrics'])
+    outputs = [training_v2_utils._non_none_constant_value(v) for v in outputs]  # pylint: disable=protected-access
     if len(outputs) == 1:
-      return outputs[0]
+      outputs = outputs[0]
     return outputs
 
   def test_on_batch(self, x, y=None, sample_weight=None, reset_metrics=True):
@@ -1136,52 +992,18 @@ class Model(network.Network, version_utils.VersionSelector):
     """
     self._assert_compile_was_called()
     self._check_call_args('test_on_batch')
-    if self._experimental_run_tf_function:
-      outputs = training_v2_utils.test_on_batch(
-          self, x, y=y, sample_weight=sample_weight,
-          reset_metrics=reset_metrics, standalone=True)
-      outputs = (outputs['total_loss'] + outputs['output_losses'] +
-                 outputs['metrics'])
-      outputs = [
-          training_v2_utils._non_none_constant_value(v) for v in outputs]  # pylint: disable=protected-access
-      if len(outputs) == 1:
-        outputs = outputs[0]
-      return outputs
-
-    if (self._distribution_strategy and
-        distribution_strategy_context.in_cross_replica_context()):
-      raise NotImplementedError('`test_on_batch` is not supported for models '
-                                'distributed with tf.distribute.Strategy.')
-    # Validate and standardize user data.
-    x, y, sample_weights = self._standardize_user_data(
-        x, y, sample_weight=sample_weight, extract_tensors_from_dataset=True)
-
-    # If `self._distribution_strategy` is True, then we are in a replica context
-    # at this point.
-    if self.run_eagerly or self._distribution_strategy:
-      output_dict = training_eager.test_on_batch(
-          self,
-          x,
-          y,
-          sample_weights=sample_weights,
-          output_loss_metrics=self._output_loss_metrics)
-      outputs = (output_dict['total_loss'] + output_dict['output_losses']
-                 + output_dict['metrics'])
-      outputs = [
-          training_v2_utils._non_none_constant_value(v) for v in outputs]  # pylint: disable=protected-access
-    else:
-      x = training_utils.ModelInputs(x).as_list()
-      inputs = x + list(y or []) + list(sample_weights or [])
-
-      self._update_sample_weight_modes(sample_weights=sample_weights)
-      self._make_test_function()
-      outputs = self.test_function(inputs)  # pylint: disable=not-callable
-
-    if reset_metrics:
-      self.reset_metrics()
-
+    outputs = training_v2_utils.test_on_batch(
+        self,
+        x,
+        y=y,
+        sample_weight=sample_weight,
+        reset_metrics=reset_metrics,
+        standalone=True)
+    outputs = (
+        outputs['total_loss'] + outputs['output_losses'] + outputs['metrics'])
+    outputs = [training_v2_utils._non_none_constant_value(v) for v in outputs]  # pylint: disable=protected-access
     if len(outputs) == 1:
-      return outputs[0]
+      outputs = outputs[0]
     return outputs
 
   def predict_on_batch(self, x):
@@ -1203,34 +1025,7 @@ class Model(network.Network, version_utils.VersionSelector):
           expectations of the model.
     """
     self._check_call_args('predict_on_batch')
-    if self._experimental_run_tf_function:
-      return training_v2_utils.predict_on_batch(self, x, standalone=True)
-
-    if (self._distribution_strategy and
-        distribution_strategy_context.in_cross_replica_context()):
-      raise NotImplementedError(
-          '`predict_on_batch` is not supported for models distributed with'
-          ' tf.distribute.Strategy.')
-    # Validate and standardize user data.
-    inputs, _, _ = self._standardize_user_data(
-        x, extract_tensors_from_dataset=True)
-    # If `self._distribution_strategy` is True, then we are in a replica context
-    # at this point.
-    if self.run_eagerly or self._distribution_strategy:
-      inputs = training_utils.cast_if_floating_dtype(inputs)
-      if isinstance(inputs, collections_abc.Sequence):
-        # Unwrap lists with only one input, as we do when training on batch
-        if len(inputs) == 1:
-          inputs = inputs[0]
-
-      return self(inputs)  # pylint: disable=not-callable
-
-    self._make_predict_function()
-    outputs = self.predict_function(inputs)
-
-    if len(outputs) == 1:
-      return outputs[0]
-    return outputs
+    return training_v2_utils.predict_on_batch(self, x, standalone=True)
 
   @deprecation.deprecated(
       None, 'Please use Model.fit, which supports generators.')
@@ -1386,6 +1181,48 @@ class Model(network.Network, version_utils.VersionSelector):
                       % (self.optimizer.loss_scale,
                          self._dtype_policy.loss_scale))
 
+  def _validate_compile(self, optimizer, **kwargs):
+    """Performs validation checks for the default `compile`."""
+    is_any_keras_optimizer_v1 = any(
+        (isinstance(opt, optimizers.Optimizer) and
+         not isinstance(opt, optimizers.TFOptimizer))
+        for opt in nest.flatten(optimizer))
+    if is_any_keras_optimizer_v1:
+      raise ValueError(
+          '`tf.compat.v1.keras` Optimizer (', optimizer, ') is '
+          'not supported when eager execution is enabled. Use a '
+          '`tf.keras` Optimizer instead, or disable eager '
+          'execution.')
+
+    kwargs.pop('cloning', None)  # Legacy DistStrat argument, never used.
+    kwargs.pop('experimental_run_tf_function', None)  # Always `True`.
+    if kwargs.pop('distribute', None) is not None:
+      raise ValueError(
+          'Distribute argument in compile is not available in TF 2.0 please '
+          'create the model under the distribution strategy scope.')
+    if kwargs.pop('target_tensors', None) is not None:
+      raise ValueError(
+          'target_tensors argument is not supported when executing eagerly.')
+    invalid_kwargs = set(kwargs) - {'run_eagerly'}
+    if invalid_kwargs:
+      raise TypeError('Invalid keyword argument(s) in `compile`: %s' %
+                      (invalid_kwargs,))
+
+    # Model must be created and compiled with the same DistStrat.
+    if self.built and ds_context.has_strategy():
+      strategy = ds_context.get_strategy()
+      for v in self.variables:
+        if not strategy.extended.variable_created_in_scope(v):
+          raise ValueError(
+              'Variable (%s) was not created in the distribution strategy '
+              'scope of (%s). It is most likely due to not all layers or '
+              'the model or optimizer being created outside the distribution '
+              'strategy scope. Try to make sure your code looks similar '
+              'to the following.\n'
+              'with strategy.scope():\n'
+              '  model=_create_model()\n'
+              '  model.compile(...)' % (v, strategy))
+
   def _prepare_validation_data(self, validation_data, batch_size,
                                validation_steps):
     """Unpack and check the validation data."""
@@ -1399,33 +1236,6 @@ class Model(network.Network, version_utils.VersionSelector):
         steps=validation_steps,
         steps_name='validation_steps')
 
-  def _validate_compile_param_for_distribution_strategy(
-      self, run_eagerly, sample_weight_mode, target_tensors, weighted_metrics):
-    # Validate that arguments passed by the user to `compile` are supported by
-    # tf.distribute.Strategy.
-    if self._distribution_strategy:
-      if sample_weight_mode:
-        raise NotImplementedError('sample_weight_mode is not supported with '
-                                  'tf.distribute.Strategy.')
-      if weighted_metrics:
-        raise NotImplementedError('weighted_metrics is not supported with '
-                                  'tf.distribute.Strategy.')
-      if target_tensors:
-        raise ValueError('target_tensors is not supported with '
-                         'tf.distribute.Strategy.')
-
-      if run_eagerly:
-        raise ValueError(
-            'We currently do not support enabling `run_eagerly` with '
-            'distribution strategy.')
-
-      if (distributed_training_utils.is_distributing_by_cloning(self) and
-          (not self.built or not self.inputs or not self.outputs)):
-        raise ValueError(
-            'We currently do not support distribution strategy with a '
-            '`Sequential` model that is created without `input_shape`/'
-            '`input_dim` set in its first layer or a subclassed model.')
-
   def _process_target_tensor_for_compile(self, target_tensors):
     if self.run_eagerly:
       # target tensor is not supported with run_eagerly. Create a list with None
@@ -1683,14 +1493,6 @@ class Model(network.Network, version_utils.VersionSelector):
       return self.callback_model
     return self
 
-  @trackable.no_automatic_dependency_tracking
-  def _make_callback_model(self, grouped_model):
-    first_replicated_model = self._distribution_strategy.unwrap(
-        grouped_model)[0]
-    # We initialize the callback model with the first replicated model.
-    self._replicated_model = DistributedCallbackModel(first_replicated_model)
-    self._replicated_model.set_original_model(self)
-
   def _validate_or_infer_batch_size(self, batch_size, steps, x):
     """Validates that the `batch_size` provided is consistent with InputLayer.
 
@@ -2037,8 +1839,7 @@ class Model(network.Network, version_utils.VersionSelector):
         fn = K.function(
             inputs, [self.total_loss] + metrics_tensors,
             updates=updates,
-            name='train_function',
-            **self._function_kwargs)
+            name='train_function')
         setattr(self, 'train_function', fn)
 
       # Restore the current trainable state
@@ -2067,8 +1868,7 @@ class Model(network.Network, version_utils.VersionSelector):
         fn = K.function(
             inputs, [self.total_loss] + metrics_tensors,
             updates=updates,
-            name='test_function',
-            **self._function_kwargs)
+            name='test_function')
         setattr(self, 'test_function', fn)
 
   def _make_predict_function(self):
@@ -2581,8 +2381,7 @@ class Model(network.Network, version_utils.VersionSelector):
         loss_weights=self.loss_weights,
         target_tensors=target_tensors,
         sample_weight_mode=self.sample_weight_mode,
-        run_eagerly=self.run_eagerly,
-        experimental_run_tf_function=self._experimental_run_tf_function)
+        run_eagerly=self.run_eagerly)
 
   # TODO(omalleyt): Consider changing to a more descriptive function name.
   def _set_inputs(self, inputs, outputs=None, training=None):
@@ -2822,8 +2621,8 @@ class Model(network.Network, version_utils.VersionSelector):
     strategy = self._distribution_strategy
 
     # Otherwise, use the strategy whose scope this is in.
-    if not strategy and distribution_strategy_context.has_strategy():
-      strategy = distribution_strategy_context.get_strategy()
+    if not strategy and ds_context.has_strategy():
+      strategy = ds_context.get_strategy()
 
     return strategy
 
@@ -2832,46 +2631,6 @@ class Model(network.Network, version_utils.VersionSelector):
     return model_serialization.ModelSavedModelSaver(self)
 
 
-class DistributedCallbackModel(Model):
-  """Model that is used for callbacks with tf.distribute.Strategy."""
-
-  def __init__(self, model):
-    super(DistributedCallbackModel, self).__init__()
-    self.optimizer = model.optimizer
-
-  def set_original_model(self, orig_model):
-    self._original_model = orig_model
-
-  def save_weights(self, filepath, overwrite=True, save_format=None):
-    self._replicated_model.save_weights(filepath, overwrite=overwrite,
-                                        save_format=save_format)
-
-  def save(self, filepath, overwrite=True, include_optimizer=True):
-    # save weights from the distributed model to the original model
-    distributed_model_weights = self.get_weights()
-    self._original_model.set_weights(distributed_model_weights)
-    # TODO(anjalisridhar): Do we need to save the original model here?
-    # Saving the first replicated model works as well.
-    self._original_model.save(filepath, overwrite=True, include_optimizer=False)
-
-  def load_weights(self, filepath, by_name=False):
-    self._original_model.load_weights(filepath, by_name=False)
-    # Copy the weights from the original model to each of the replicated models.
-    orig_model_weights = self._original_model.get_weights()
-    distributed_training_utils.set_weights(
-        self._original_model._distribution_strategy, self,  # pylint: disable=protected-access
-        orig_model_weights)
-
-  def __getattr__(self, item):
-    # Whitelisted atttributes of the model that can be accessed by the user
-    # during a callback.
-    if item not in ('_setattr_tracking', '_layers'):
-      logging.warning('You are accessing attribute ' + item + ' of the '
-                      'DistributedCallbackModel that may not have been set '
-                      'correctly.')
-    return super(DistributedCallbackModel, self).__getattr__(item)
-
-
 class _TrainingEndpoint(object):
   """A container for the training output/target and related entities.
 
diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py
index fc3f3413f7b..1209215c1b6 100644
--- a/tensorflow/python/keras/engine/training_test.py
+++ b/tensorflow/python/keras/engine/training_test.py
@@ -18,8 +18,10 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import collections
 import io
 import logging
+import re
 import sys
 
 from absl.testing import parameterized
@@ -244,7 +246,7 @@ class CompileTest(keras_parameterized.TestCase):
           run_eagerly=testing_utils.should_run_eagerly(),
           experimental_run_tf_function=testing_utils.should_run_tf_function())
 
-  @keras_parameterized.run_all_keras_modes
+  @tf_test_util.run_deprecated_v1
   def test_compile_with_session_kwargs(self):
     model = testing_utils.get_small_sequential_mlp(
         num_hidden=10, num_classes=2, input_dim=3)
@@ -258,24 +260,6 @@ class CompileTest(keras_parameterized.TestCase):
           loss='mse',
           foo=True)
 
-    if testing_utils.should_run_eagerly():
-      # Test that Session kwargs cannot be used with run_eagerly
-      with self.assertRaisesRegexp(
-          ValueError,
-          r'not supported when `run_eagerly=True`'):
-        model.compile(
-            optimizer='adam',
-            loss='mse',
-            run_eagerly=True,
-            feed_dict={})
-    else:
-      # Test that Session kwargs trigger legacy path execution
-      model.compile(
-          optimizer='adam',
-          loss='mse',
-          feed_dict={})
-      self.assertFalse(model._experimental_run_tf_function)
-
 
 class TrainingTest(keras_parameterized.TestCase):
 
@@ -753,6 +737,125 @@ class TrainingTest(keras_parameterized.TestCase):
     })
     self.assertEqual(len(out), 2)
 
+  def _make_sequence_input_functions(self, input_type):
+    # train and test
+    xy_namedtuple = collections.namedtuple('xy_namedtuple', ['x', 'y'])
+
+    # predict
+    x_namedtuple = collections.namedtuple('x_namedtuple', ['x'])
+
+    if input_type == 'dataset':
+      dataset = dataset_ops.Dataset.range(16).map(
+          lambda _: array_ops.ones(shape=(1,)))
+
+      xy_dataset = dataset_ops.Dataset.zip((dataset, dataset)).batch(4)
+      x_dataset = dataset.batch(4)
+      def xy_function(use_namedtuple):
+        return xy_dataset.map(xy_namedtuple) if use_namedtuple else xy_dataset
+
+      def x_function(use_namedtuple):
+        return x_dataset.map(x_namedtuple) if use_namedtuple else x_dataset
+
+      return xy_function, x_function
+
+    elif input_type == 'generator':
+      def xy_generator(use_namedtuple):
+        x, y = np.ones((4, 1)), np.ones((4, 1))
+        for _ in range(4):
+          if use_namedtuple:
+            yield xy_namedtuple(x, y)
+          else:
+            yield x, y
+
+      def x_generator(use_namedtuple):
+        x = np.ones((4, 1))
+        for _ in range(4):
+          if use_namedtuple:
+            yield x_namedtuple(x)
+          else:
+            yield x
+
+      return xy_generator, x_generator
+
+    elif input_type == 'sequence':
+      class XYSequence(data_utils.Sequence):
+
+        def __init__(self, use_namedtuple):
+          self._use_namedtuple = use_namedtuple
+          super(XYSequence, self).__init__()
+
+        def __getitem__(self, idx):
+          x, y = np.ones((4, 1)), np.ones((4, 1))
+          if self._use_namedtuple:
+            return xy_namedtuple(x, y)
+          return x, y
+
+        def __len__(self):
+          return 4
+
+      class XSequence(data_utils.Sequence):
+
+        def __init__(self, use_namedtuple):
+          self._use_namedtuple = use_namedtuple
+          super(XSequence, self).__init__()
+
+        def __getitem__(self, idx):
+          x = np.ones((4, 1))
+          if self._use_namedtuple:
+            return x_namedtuple(x)
+          return x
+
+        def __len__(self):
+          return 4
+
+      return XYSequence, XSequence
+
+  @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
+  @keras_parameterized.run_with_all_model_types
+  @parameterized.named_parameters(
+      ('dataset', 'dataset'),
+      ('generator', 'generator'),
+      ('sequence', 'sequence'),
+  )
+  def test_sequence_input_types(self, input_type):
+    """Ensure that namedtuples and tuples are plumbed identically."""
+    if not testing_utils.should_run_tf_function():
+      self.skipTest('Improved checking is only present in data_adapter.')
+
+    xy_function, x_function = self._make_sequence_input_functions(input_type)
+    fit_kwargs, evaluate_kwargs, predict_kwargs = {}, {}, {}
+    if input_type == 'generator':
+      fit_kwargs['steps_per_epoch'] = 4
+      evaluate_kwargs['steps'] = 4
+      predict_kwargs['steps'] = 4
+
+    model = testing_utils.get_small_mlp(1, 1, 1)
+    model.compile(
+        loss='mse',
+        optimizer='sgd',
+        run_eagerly=testing_utils.should_run_eagerly(),
+        experimental_run_tf_function=testing_utils.should_run_tf_function())
+
+    model.fit(xy_function(use_namedtuple=False), **fit_kwargs)
+    model.evaluate(xy_function(use_namedtuple=False), **evaluate_kwargs)
+    model.predict(x_function(use_namedtuple=False), **predict_kwargs)
+
+    xy_pattern = re.escape(
+        "Received namedtuple (<class '__main__.xy_namedtuple'>) with fields "
+        "`('x', 'y')` as input.")
+    x_pattern = re.escape(
+        "Received namedtuple (<class '__main__.x_namedtuple'>) with fields "
+        "`('x',)` as input.")
+
+    with self.assertRaisesRegex(ValueError, xy_pattern):
+      model.fit(xy_function(use_namedtuple=True), **fit_kwargs)
+
+    with self.assertRaisesRegex(ValueError, xy_pattern):
+      model.evaluate(xy_function(use_namedtuple=True), **evaluate_kwargs)
+
+    with self.assertRaisesRegex(ValueError, x_pattern):
+      model.predict(x_function(use_namedtuple=True), **predict_kwargs)
+
   @keras_parameterized.run_all_keras_modes
   @keras_parameterized.run_with_all_model_types
   def test_activity_regularizer_fit(self):
@@ -1675,23 +1778,10 @@ class TestExceptionsAndWarnings(keras_parameterized.TestCase):
         experimental_run_tf_function=False)
     err_msg = 'When passing input data as arrays, do not specify'
 
-    if testing_utils.should_run_eagerly():
-      with self.assertRaisesRegex(ValueError, err_msg):
-        model.fit(x=np.zeros((100, 1)), y=np.ones((100, 1)), steps_per_epoch=4)
-
-      with self.assertRaisesRegex(ValueError, err_msg):
-        model.evaluate(x=np.zeros((100, 1)), y=np.ones((100, 1)), steps=4)
-
-      with self.assertRaisesRegex(ValueError, err_msg):
-        model.predict(np.zeros((100, 1)), steps=4)
-    else:
-      with test.mock.patch.object(logging, 'warning') as mock_log:
-        model._standardize_user_data(
-            np.zeros((100, 1)),
-            np.ones((100, 1)),
-            check_steps=True,
-            steps=4)
-        self.assertRegexpMatches(str(mock_log.call_args), err_msg)
+    with test.mock.patch.object(logging, 'warning') as mock_log:
+      model._standardize_user_data(
+          np.zeros((100, 1)), np.ones((100, 1)), check_steps=True, steps=4)
+      self.assertRegexpMatches(str(mock_log.call_args), err_msg)
 
   @keras_parameterized.run_with_all_model_types
   @keras_parameterized.run_all_keras_modes
@@ -2971,7 +3061,7 @@ class TestTrainingWithDataTensors(keras_parameterized.TestCase):
       self.assertEqual(out[0].shape, (10 * 3, 4))
       self.assertEqual(out[1].shape, (10 * 3, 4))
 
-  @keras_parameterized.run_all_keras_modes
+  @tf_test_util.run_deprecated_v1
   def test_target_tensors(self):
     with self.cached_session():
       # single-output, as list
diff --git a/tensorflow/python/keras/engine/training_v1.py b/tensorflow/python/keras/engine/training_v1.py
index 1afd525d202..69acc360054 100644
--- a/tensorflow/python/keras/engine/training_v1.py
+++ b/tensorflow/python/keras/engine/training_v1.py
@@ -175,8 +175,8 @@ class Model(training_lib.Model):
                 self._compile_time_distribution_strategy)
     if strategy:
       with strategy.scope():
-        return super(Model, self).get_weights()
-    return super(Model, self).get_weights()
+        return network.Network.get_weights(self)
+    return network.Network.get_weights(self)
 
   def load_weights(self, filepath, by_name=False, skip_mismatch=False):
     """Loads all layer weights, either from a TensorFlow or an HDF5 weight file.
diff --git a/tensorflow/python/keras/engine/training_v2.py b/tensorflow/python/keras/engine/training_v2.py
index 476da84bcf7..a920b4f484e 100644
--- a/tensorflow/python/keras/engine/training_v2.py
+++ b/tensorflow/python/keras/engine/training_v2.py
@@ -28,7 +28,7 @@ import functools
 import numpy as np
 
 from tensorflow.python.data.ops import dataset_ops
-from tensorflow.python.distribute import distribution_strategy_context
+from tensorflow.python.distribute import distribution_strategy_context as ds_context
 from tensorflow.python.framework import errors
 from tensorflow.python.keras import callbacks as cbks
 from tensorflow.python.keras.distribute import distributed_training_utils as dist_utils
@@ -204,7 +204,7 @@ class Loop(training_utils.TrainingLoop):
     batch_size = model._validate_or_infer_batch_size(
         batch_size, steps_per_epoch, x)
 
-    strategy = _get_distribution_strategy(model)
+    strategy = model.distribute_strategy
     batch_size, steps_per_epoch = dist_utils.process_batch_and_step_size(
         strategy,
         x,
@@ -320,8 +320,7 @@ class Loop(training_utils.TrainingLoop):
           with training_context.on_epoch(epoch, ModeKeys.TRAIN) as epoch_logs:
             model.reset_metrics()
             if training_data_iter is None or recreate_training_iterator:
-              if (training_data_iter is not None and
-                  distribution_strategy_context.has_strategy()):
+              if training_data_iter is not None and ds_context.has_strategy():
                 # TODO(kaftan): remove this when MultiDeviceIterator is a
                 ## compositetensor (unless this is more efficient)
                 training_data_iter._initializer  # pylint: disable=pointless-statement
@@ -353,8 +352,7 @@ class Loop(training_utils.TrainingLoop):
             if (do_validation and
                 training_utils.should_run_validation(validation_freq, epoch) and
                 not training_callbacks.model.stop_training):
-              if (eval_data_iter is not None and
-                  distribution_strategy_context.has_strategy()):
+              if eval_data_iter is not None and ds_context.has_strategy():
                 # TODO(kaftan): remove this when MultiDeviceIterator is a
                 ## compositetensor (unless this is more efficient)
                 eval_data_iter._initializer  # pylint: disable=pointless-statement
@@ -405,7 +403,7 @@ class Loop(training_utils.TrainingLoop):
 
     batch_size = model._validate_or_infer_batch_size(
         batch_size, steps, x)
-    strategy = _get_distribution_strategy(model)
+    strategy = model.distribute_strategy
     batch_size, steps = dist_utils.process_batch_and_step_size(
         strategy, x, batch_size, steps, mode)
     dist_utils.validate_callbacks(input_callbacks=callbacks,
@@ -498,17 +496,6 @@ class Loop(training_utils.TrainingLoop):
         workers=workers, use_multiprocessing=use_multiprocessing, **kwargs)
 
 
-def _get_distribution_strategy(model):
-  """Get the model's distribution strategy."""
-  if model._compile_time_distribution_strategy:
-    strategy = model._compile_time_distribution_strategy
-  else:
-    # Grab the active strategy if the model was never compiled
-    # but it is now predicting.
-    strategy = distribution_strategy_context.get_strategy()
-  return strategy
-
-
 def _process_training_inputs(model,
                              x,
                              y,
@@ -662,6 +649,12 @@ def _process_inputs(model,
       # Then we map using only the tensor standardization portion.
       def map_fn(x, y=None, sample_weights=None):
         """Tensor manipulation portion of standardization for Dataset.map."""
+        if (y is None and sample_weights is None):
+          # namedtuples are forbidden because it is ambiguous if they should be
+          # unpacked. If y or sample_weights is present then `x` was not the
+          # top level structure, and the correct behavior is unambiguous.
+          data_adapter.assert_not_namedtuple(x)
+
         standardized = model._standardize_tensors(
             x, y, sample_weights,
             run_eagerly=False,
diff --git a/tensorflow/python/keras/keras_parameterized.py b/tensorflow/python/keras/keras_parameterized.py
index 40117909ecc..f4691e2fe04 100644
--- a/tensorflow/python/keras/keras_parameterized.py
+++ b/tensorflow/python/keras/keras_parameterized.py
@@ -370,7 +370,7 @@ def run_all_keras_modes(test_or_class=None,
       a target dependency.
   """
 
-  params = [('_v2_function', 'v2_function'), ('_v2_funcgraph', 'v2_funcgraph')]
+  params = [('_v2_function', 'v2_function')]
   if not always_skip_eager:
     params.append(('_v2_eager', 'v2_eager'))
   if not (always_skip_v1 or tf2.enabled()):
@@ -386,8 +386,6 @@ def run_all_keras_modes(test_or_class=None,
       """A run of a single test case w/ specified run mode."""
       if run_mode == 'v1_session':
         _v1_session_test(f, self, config, *args, **kwargs)
-      elif run_mode == 'v2_funcgraph':
-        _v2_graph_functions_test(f, self, *args, **kwargs)
       elif run_mode == 'v2_eager':
         _v2_eager_test(f, self, *args, **kwargs)
       elif run_mode == 'v2_function':
@@ -407,13 +405,6 @@ def _v1_session_test(f, test_or_class, config, *args, **kwargs):
         f(test_or_class, *args, **kwargs)
 
 
-def _v2_graph_functions_test(f, test_or_class, *args, **kwargs):
-  with context.eager_mode():
-    with testing_utils.run_eagerly_scope(False):
-      with testing_utils.experimental_run_tf_function_scope(False):
-        f(test_or_class, *args, **kwargs)
-
-
 def _v2_eager_test(f, test_or_class, *args, **kwargs):
   with context.eager_mode():
     with testing_utils.run_eagerly_scope(True):
diff --git a/tensorflow/python/keras/keras_parameterized_test.py b/tensorflow/python/keras/keras_parameterized_test.py
index 0017fcb6e3e..b750f4ca9f5 100644
--- a/tensorflow/python/keras/keras_parameterized_test.py
+++ b/tensorflow/python/keras/keras_parameterized_test.py
@@ -217,27 +217,13 @@ class KerasParameterizedTest(keras_parameterized.TestCase):
     if not tf2.enabled():
       e.testBody_v1_session()
     e.testBody_v2_eager()
-    e.testBody_v2_funcgraph()
     e.testBody_v2_function()
 
     if not tf2.enabled():
-      self.assertLen(l, 4)
+      self.assertLen(l, 3)
       self.assertAllEqual(l, [
           ("graph", False, False),
           ("eager", True, True),
-          ("eager", False, False),
-          ("eager", False, True),
-      ])
-
-      ts = unittest.makeSuite(ExampleTest)
-      res = unittest.TestResult()
-      ts.run(res)
-      self.assertLen(l, 8)
-    else:
-      self.assertLen(l, 3)
-      self.assertAllEqual(l, [
-          ("eager", True, True),
-          ("eager", False, False),
           ("eager", False, True),
       ])
 
@@ -245,6 +231,17 @@ class KerasParameterizedTest(keras_parameterized.TestCase):
       res = unittest.TestResult()
       ts.run(res)
       self.assertLen(l, 6)
+    else:
+      self.assertLen(l, 2)
+      self.assertAllEqual(l, [
+          ("eager", True, True),
+          ("eager", False, True),
+      ])
+
+      ts = unittest.makeSuite(ExampleTest)
+      res = unittest.TestResult()
+      ts.run(res)
+      self.assertLen(l, 4)
 
   def test_run_all_keras_modes_extra_params(self):
     l = []
@@ -272,18 +269,14 @@ class KerasParameterizedTest(keras_parameterized.TestCase):
       e.testBody_1_v1_session()
 
     e.testBody_0_v2_eager()
-    e.testBody_0_v2_funcgraph()
     e.testBody_0_v2_function()
     e.testBody_1_v2_eager()
-    e.testBody_1_v2_funcgraph()
     e.testBody_1_v2_function()
 
     expected_combinations = {
         ("with_brackets", "eager", True, True),
-        ("with_brackets", "eager", False, False),
         ("with_brackets", "eager", False, True),
         ("without_brackets", "eager", True, True),
-        ("without_brackets", "eager", False, False),
         ("without_brackets", "eager", False, True),
     }
 
@@ -322,16 +315,13 @@ class KerasParameterizedTest(keras_parameterized.TestCase):
       e.testBody_v1_session()
     if hasattr(e, "testBody_v2_eager"):
       e.testBody_v2_eager()
-    if hasattr(e, "testBody_v2_funcgraph"):
-      e.testBody_v2_funcgraph()
     if hasattr(e, "testBody_v2_function"):
       e.testBody_v2_function()
 
-    self.assertLen(l, 3)
+    self.assertLen(l, 2)
     self.assertEqual(
         set(l), {
             ("eager", True, True),
-            ("eager", False, False),
             ("eager", False, True),
         })
 
@@ -354,13 +344,10 @@ class KerasParameterizedTest(keras_parameterized.TestCase):
 
     e = ExampleTest()
     e.testBody_v2_eager_functional()
-    e.testBody_v2_funcgraph_functional()
     e.testBody_v2_function_functional()
     e.testBody_v2_eager_sequential()
-    e.testBody_v2_funcgraph_sequential()
     e.testBody_v2_function_sequential()
     e.testBody_v2_eager_subclass()
-    e.testBody_v2_funcgraph_subclass()
     e.testBody_v2_function_subclass()
 
     if not tf2.enabled():
@@ -370,13 +357,10 @@ class KerasParameterizedTest(keras_parameterized.TestCase):
 
     expected_combinations = {
         ("eager", True, True, "functional"),
-        ("eager", False, False, "functional"),
         ("eager", False, True, "functional"),
         ("eager", True, True, "sequential"),
-        ("eager", False, False, "sequential"),
         ("eager", False, True, "sequential"),
         ("eager", True, True, "subclass"),
-        ("eager", False, False, "subclass"),
         ("eager", False, True, "subclass"),
     }
 
@@ -415,13 +399,10 @@ class KerasParameterizedTest(keras_parameterized.TestCase):
 
     e = ExampleTest()
     e.testBody_functional_v2_eager()
-    e.testBody_functional_v2_funcgraph()
     e.testBody_functional_v2_function()
     e.testBody_sequential_v2_eager()
-    e.testBody_sequential_v2_funcgraph()
     e.testBody_sequential_v2_function()
     e.testBody_subclass_v2_eager()
-    e.testBody_subclass_v2_funcgraph()
     e.testBody_subclass_v2_function()
 
     if not tf2.enabled():
@@ -431,13 +412,10 @@ class KerasParameterizedTest(keras_parameterized.TestCase):
 
     expected_combinations = {
         ("eager", True, True, "functional"),
-        ("eager", False, False, "functional"),
         ("eager", False, True, "functional"),
         ("eager", True, True, "sequential"),
-        ("eager", False, False, "sequential"),
         ("eager", False, True, "sequential"),
         ("eager", True, True, "subclass"),
-        ("eager", False, False, "subclass"),
         ("eager", False, True, "subclass"),
     }
 
@@ -478,13 +456,10 @@ class KerasParameterizedTest(keras_parameterized.TestCase):
 
     e = ExampleTest()
     e.testBody_arg_v2_eager_functional()
-    e.testBody_arg_v2_funcgraph_functional()
     e.testBody_arg_v2_function_functional()
     e.testBody_arg_v2_eager_sequential()
-    e.testBody_arg_v2_funcgraph_sequential()
     e.testBody_arg_v2_function_sequential()
     e.testBody_arg_v2_eager_subclass()
-    e.testBody_arg_v2_funcgraph_subclass()
     e.testBody_arg_v2_function_subclass()
 
     if not tf2.enabled():
@@ -494,13 +469,10 @@ class KerasParameterizedTest(keras_parameterized.TestCase):
 
     expected_combinations = {
         ("eager", True, True, "functional"),
-        ("eager", False, False, "functional"),
         ("eager", False, True, "functional"),
         ("eager", True, True, "sequential"),
-        ("eager", False, False, "sequential"),
         ("eager", False, True, "sequential"),
         ("eager", True, True, "subclass"),
-        ("eager", False, False, "subclass"),
         ("eager", False, True, "subclass"),
     }
 
@@ -541,13 +513,10 @@ class KerasParameterizedTest(keras_parameterized.TestCase):
 
     e = ExampleTest()
     e.testBody_arg_v2_eager_functional()
-    e.testBody_arg_v2_funcgraph_functional()
     e.testBody_arg_v2_function_functional()
     e.testBody_arg_v2_eager_sequential()
-    e.testBody_arg_v2_funcgraph_sequential()
     e.testBody_arg_v2_function_sequential()
     e.testBody_arg_v2_eager_subclass()
-    e.testBody_arg_v2_funcgraph_subclass()
     e.testBody_arg_v2_function_subclass()
 
     if not tf2.enabled():
@@ -557,13 +526,10 @@ class KerasParameterizedTest(keras_parameterized.TestCase):
 
     expected_combinations = {
         ("eager", True, True, "functional"),
-        ("eager", False, False, "functional"),
         ("eager", False, True, "functional"),
         ("eager", True, True, "sequential"),
-        ("eager", False, False, "sequential"),
         ("eager", False, True, "sequential"),
         ("eager", True, True, "subclass"),
-        ("eager", False, False, "subclass"),
         ("eager", False, True, "subclass"),
     }
 
diff --git a/tensorflow/python/keras/layers/convolutional.py b/tensorflow/python/keras/layers/convolutional.py
index fefbd1951e9..fd7402a28c3 100644
--- a/tensorflow/python/keras/layers/convolutional.py
+++ b/tensorflow/python/keras/layers/convolutional.py
@@ -77,8 +77,8 @@ class Conv(Layer):
       the dilation rate to use for dilated convolution.
       Currently, specifying any `dilation_rate` value != 1 is
       incompatible with specifying any `strides` value != 1.
-    activation: Activation function. Set it to None to maintain a
-      linear activation.
+    activation: Activation function to use.
+      If you don't specify anything, no activation is applied.
     use_bias: Boolean, whether the layer uses a bias.
     kernel_initializer: An initializer for the convolution kernel.
     bias_initializer: An initializer for the bias vector. If None, the default
@@ -340,6 +340,17 @@ class Conv1D(Conv):
   `(10, 128)` for sequences of 10 vectors of 128-dimensional vectors,
   or `(None, 128)` for variable-length sequences of 128-dimensional vectors.
 
+  Examples:
+
+  >>> # The inputs are 128-length vectors with 10 timesteps, and the batch size
+  >>> # is 4.
+  >>> input_shape = (4, 10, 128)
+  >>> x = tf.random.normal(input_shape)
+  >>> y = tf.keras.layers.Conv1D(
+  ... 32, 3, activation='relu',input_shape=input_shape)(x)
+  >>> print(y.shape)
+  (4, 8, 32)
+
   Arguments:
     filters: Integer, the dimensionality of the output space
       (i.e. the number of output filters in the convolution).
@@ -350,8 +361,8 @@ class Conv1D(Conv):
       Specifying any stride value != 1 is incompatible with specifying
       any `dilation_rate` value != 1.
     padding: One of `"valid"`, `"causal"` or `"same"` (case-insensitive).
-      `"causal"` results in causal (dilated) convolutions, e.g. output[t]
-      does not depend on input[t+1:]. Useful when modeling temporal data
+      `"causal"` results in causal (dilated) convolutions, e.g. `output[t]`
+      does not depend on `input[t+1:]`. Useful when modeling temporal data
       where the model should not violate the temporal order.
       See [WaveNet: A Generative Model for Raw Audio, section
         2.1](https://arxiv.org/abs/1609.03499).
@@ -362,31 +373,24 @@ class Conv1D(Conv):
       Currently, specifying any `dilation_rate` value != 1 is
       incompatible with specifying any `strides` value != 1.
     activation: Activation function to use.
-      If you don't specify anything, no activation is applied
-      (ie. "linear" activation: `a(x) = x`).
+      If you don't specify anything, no activation is applied (
+      see `keras.activations`).
     use_bias: Boolean, whether the layer uses a bias vector.
-    kernel_initializer: Initializer for the `kernel` weights matrix.
-    bias_initializer: Initializer for the bias vector.
+    kernel_initializer: Initializer for the `kernel` weights matrix (
+      see `keras.initializers`).
+    bias_initializer: Initializer for the bias vector (
+      see `keras.initializers`).
     kernel_regularizer: Regularizer function applied to
-      the `kernel` weights matrix.
-    bias_regularizer: Regularizer function applied to the bias vector.
+      the `kernel` weights matrix (see `keras.regularizers`).
+    bias_regularizer: Regularizer function applied to the bias vector (
+      see `keras.regularizers`).
     activity_regularizer: Regularizer function applied to
-      the output of the layer (its "activation")..
-    kernel_constraint: Constraint function applied to the kernel matrix.
-    bias_constraint: Constraint function applied to the bias vector.
-
-  Examples:
-    ```python
-    # Small convolutional model for 128-length vectors with 6 timesteps
-    # model.input_shape == (None, 6, 128)
-    
-    model = Sequential()
-    model.add(Conv1D(32, 3, 
-              activation='relu', 
-              input_shape=(6, 128)))
-    
-    # now: model.output_shape == (None, 4, 32)
-    ```
+      the output of the layer (its "activation") (
+      see `keras.regularizers`).
+    kernel_constraint: Constraint function applied to the kernel matrix (
+      see `keras.constraints`).
+    bias_constraint: Constraint function applied to the bias vector (
+      see `keras.constraints`).
 
   Input shape:
     3D tensor with shape: `(batch_size, steps, input_dim)`
@@ -394,6 +398,13 @@ class Conv1D(Conv):
   Output shape:
     3D tensor with shape: `(batch_size, new_steps, filters)`
       `steps` value might have changed due to padding or strides.
+
+  Returns:
+    A tensor of rank 3 representing
+    `activation(conv1d(inputs, kernel) + bias)`.
+
+  Raises:
+    ValueError: when both `strides` > 1 and `dilation_rate` > 1.
   """
 
   def __init__(self,
@@ -449,6 +460,34 @@ class Conv2D(Conv):
   e.g. `input_shape=(128, 128, 3)` for 128x128 RGB pictures
   in `data_format="channels_last"`.
 
+  Examples:
+
+  >>> # The inputs are 28x28 RGB images with `channels_last` and the batch
+  >>> # size is 4.
+  >>> input_shape = (4, 28, 28, 3)
+  >>> x = tf.random.normal(input_shape)
+  >>> y = tf.keras.layers.Conv2D(
+  ... 2, 3, activation='relu', input_shape=input_shape)(x)
+  >>> print(y.shape)
+  (4, 26, 26, 2)
+
+  >>> # With `dilation_rate` as 2.
+  >>> input_shape = (4, 28, 28, 3)
+  >>> x = tf.random.normal(input_shape)
+  >>> y = tf.keras.layers.Conv2D(
+  ... 2, 3, activation='relu', dilation_rate=2, input_shape=input_shape)(x)
+  >>> print(y.shape)
+  (4, 24, 24, 2)
+
+  >>> # With `padding` as "same".
+  >>> input_shape = (4, 28, 28, 3)
+  >>> x = tf.random.normal(input_shape)
+  >>> y = tf.keras.layers.Conv2D(
+  ... 2, 3, activation='relu', padding="same", input_shape=input_shape)(x)
+  >>> print(y.shape)
+  (4, 28, 28, 2)
+
+
   Arguments:
     filters: Integer, the dimensionality of the output space
       (i.e. the number of output filters in the convolution).
@@ -480,31 +519,45 @@ class Conv2D(Conv):
       Currently, specifying any `dilation_rate` value != 1 is
       incompatible with specifying any stride value != 1.
     activation: Activation function to use.
-      If you don't specify anything, no activation is applied
-      (ie. "linear" activation: `a(x) = x`).
+      If you don't specify anything, no activation is applied (
+      see `keras.activations`).
     use_bias: Boolean, whether the layer uses a bias vector.
-    kernel_initializer: Initializer for the `kernel` weights matrix.
-    bias_initializer: Initializer for the bias vector.
+    kernel_initializer: Initializer for the `kernel` weights matrix (
+      see `keras.initializers`).
+    bias_initializer: Initializer for the bias vector (
+      see `keras.initializers`).
     kernel_regularizer: Regularizer function applied to
-      the `kernel` weights matrix.
-    bias_regularizer: Regularizer function applied to the bias vector.
+      the `kernel` weights matrix (see `keras.regularizers`).
+    bias_regularizer: Regularizer function applied to the bias vector (
+      see `keras.regularizers`).
     activity_regularizer: Regularizer function applied to
-      the output of the layer (its "activation")..
-    kernel_constraint: Constraint function applied to the kernel matrix.
-    bias_constraint: Constraint function applied to the bias vector.
+      the output of the layer (its "activation") (
+      see `keras.regularizers`).
+    kernel_constraint: Constraint function applied to the kernel matrix (
+      see `keras.constraints`).
+    bias_constraint: Constraint function applied to the bias vector (
+      see `keras.constraints`).
 
   Input shape:
     4D tensor with shape:
-    `(samples, channels, rows, cols)` if data_format='channels_first'
+    `(batch_size, channels, rows, cols)` if data_format='channels_first'
     or 4D tensor with shape:
-    `(samples, rows, cols, channels)` if data_format='channels_last'.
+    `(batch_size, rows, cols, channels)` if data_format='channels_last'.
 
   Output shape:
     4D tensor with shape:
-    `(samples, filters, new_rows, new_cols)` if data_format='channels_first'
+    `(batch_size, filters, new_rows, new_cols)` if data_format='channels_first'
     or 4D tensor with shape:
-    `(samples, new_rows, new_cols, filters)` if data_format='channels_last'.
+    `(batch_size, new_rows, new_cols, filters)` if data_format='channels_last'.
     `rows` and `cols` values might have changed due to padding.
+
+  Returns:
+    A tensor of rank 4 representing
+    `activation(conv2d(inputs, kernel) + bias)`.
+
+  Raises:
+    ValueError: if `padding` is "causal".
+    ValueError: when both `strides` > 1 and `dilation_rate` > 1.
   """
 
   def __init__(self,
@@ -561,6 +614,17 @@ class Conv3D(Conv):
   with a single channel,
   in `data_format="channels_last"`.
 
+  Examples:
+
+  >>> # The inputs are 28x28x28 volumes with a single channel, and the
+  >>> # batch size is 4
+  >>> input_shape =(4, 28, 28, 28, 1)
+  >>> x = tf.random.normal(input_shape)
+  >>> y = tf.keras.layers.Conv3D(
+  ... 2, 3, activation='relu', input_shape=input_shape)(x)
+  >>> print(y.shape)
+  (4, 26, 26, 26, 2)
+
   Arguments:
     filters: Integer, the dimensionality of the output space
       (i.e. the number of output filters in the convolution).
@@ -593,36 +657,51 @@ class Conv3D(Conv):
       Currently, specifying any `dilation_rate` value != 1 is
       incompatible with specifying any stride value != 1.
     activation: Activation function to use.
-      If you don't specify anything, no activation is applied
-      (ie. "linear" activation: `a(x) = x`).
+      If you don't specify anything, no activation is applied (
+      see `keras.activations`).
     use_bias: Boolean, whether the layer uses a bias vector.
-    kernel_initializer: Initializer for the `kernel` weights matrix.
-    bias_initializer: Initializer for the bias vector.
+    kernel_initializer: Initializer for the `kernel` weights matrix (
+      see `keras.initializers`).
+    bias_initializer: Initializer for the bias vector (
+      see `keras.initializers`).
     kernel_regularizer: Regularizer function applied to
-      the `kernel` weights matrix.
-    bias_regularizer: Regularizer function applied to the bias vector.
+      the `kernel` weights matrix (
+      see `keras.regularizers`).
+    bias_regularizer: Regularizer function applied to the bias vector (
+      see `keras.regularizers`).
     activity_regularizer: Regularizer function applied to
-      the output of the layer (its "activation")..
-    kernel_constraint: Constraint function applied to the kernel matrix.
-    bias_constraint: Constraint function applied to the bias vector.
+      the output of the layer (its "activation") (
+      see `keras.regularizers`).
+    kernel_constraint: Constraint function applied to the kernel matrix (
+      see `keras.constraints`).
+    bias_constraint: Constraint function applied to the bias vector (
+      see `keras.constraints`).
 
   Input shape:
     5D tensor with shape:
-    `(samples, channels, conv_dim1, conv_dim2, conv_dim3)` if
+    `(batch_size, channels, conv_dim1, conv_dim2, conv_dim3)` if
       data_format='channels_first'
     or 5D tensor with shape:
-    `(samples, conv_dim1, conv_dim2, conv_dim3, channels)` if
+    `(batch_size, conv_dim1, conv_dim2, conv_dim3, channels)` if
       data_format='channels_last'.
 
   Output shape:
     5D tensor with shape:
-    `(samples, filters, new_conv_dim1, new_conv_dim2, new_conv_dim3)` if
+    `(batch_size, filters, new_conv_dim1, new_conv_dim2, new_conv_dim3)` if
       data_format='channels_first'
     or 5D tensor with shape:
-    `(samples, new_conv_dim1, new_conv_dim2, new_conv_dim3, filters)` if
+    `(batch_size, new_conv_dim1, new_conv_dim2, new_conv_dim3, filters)` if
       data_format='channels_last'.
     `new_conv_dim1`, `new_conv_dim2` and `new_conv_dim3` values might have
       changed due to padding.
+
+  Returns:
+    A tensor of rank 5 representing
+    `activation(conv3d(inputs, kernel) + bias)`.
+
+  Raises:
+    ValueError: if `padding` is "causal".
+    ValueError: when both `strides` > 1 and `dilation_rate` > 1.
   """
 
   def __init__(self,
@@ -719,31 +798,51 @@ class Conv2DTranspose(Conv2D):
       Currently, specifying any `dilation_rate` value != 1 is
       incompatible with specifying any stride value != 1.
     activation: Activation function to use.
-      If you don't specify anything, no activation is applied
-      (ie. "linear" activation: `a(x) = x`).
+      If you don't specify anything, no activation is applied (
+      see `keras.activations`).
     use_bias: Boolean, whether the layer uses a bias vector.
-    kernel_initializer: Initializer for the `kernel` weights matrix.
-    bias_initializer: Initializer for the bias vector.
+    kernel_initializer: Initializer for the `kernel` weights matrix (
+      see `keras.initializers`).
+    bias_initializer: Initializer for the bias vector (
+      see `keras.initializers`).
     kernel_regularizer: Regularizer function applied to
-      the `kernel` weights matrix.
-    bias_regularizer: Regularizer function applied to the bias vector.
+      the `kernel` weights matrix (see `keras.regularizers`).
+    bias_regularizer: Regularizer function applied to the bias vector (
+      see `keras.regularizers`).
     activity_regularizer: Regularizer function applied to
-      the output of the layer (its "activation")..
-    kernel_constraint: Constraint function applied to the kernel matrix.
-    bias_constraint: Constraint function applied to the bias vector.
+      the output of the layer (its "activation") (see `keras.regularizers`).
+    kernel_constraint: Constraint function applied to the kernel matrix (
+      see `keras.constraints`).
+    bias_constraint: Constraint function applied to the bias vector (
+      see `keras.constraints`).
 
   Input shape:
     4D tensor with shape:
-    `(batch, channels, rows, cols)` if data_format='channels_first'
+    `(batch_size, channels, rows, cols)` if data_format='channels_first'
     or 4D tensor with shape:
-    `(batch, rows, cols, channels)` if data_format='channels_last'.
+    `(batch_size, rows, cols, channels)` if data_format='channels_last'.
 
   Output shape:
     4D tensor with shape:
-    `(batch, filters, new_rows, new_cols)` if data_format='channels_first'
+    `(batch_size, filters, new_rows, new_cols)` if data_format='channels_first'
     or 4D tensor with shape:
-    `(batch, new_rows, new_cols, filters)` if data_format='channels_last'.
+    `(batch_size, new_rows, new_cols, filters)` if data_format='channels_last'.
     `rows` and `cols` values might have changed due to padding.
+    If `output_padding` is specified:
+    ```
+    new_rows = ((rows - 1) * strides[0] + kernel_size[0] - 2 * padding[0] +
+    output_padding[0])
+    new_cols = ((cols - 1) * strides[1] + kernel_size[1] - 2 * padding[1] +
+    output_padding[1])
+    ```
+
+  Returns:
+    A tensor of rank 4 representing
+    `activation(conv2dtranspose(inputs, kernel) + bias)`.
+
+  Raises:
+    ValueError: if `padding` is "causal".
+    ValueError: when both `strides` > 1 and `dilation_rate` > 1.
 
   References:
     - [A guide to convolution arithmetic for deep
@@ -989,33 +1088,55 @@ class Conv3DTranspose(Conv3D):
       Currently, specifying any `dilation_rate` value != 1 is
       incompatible with specifying any stride value != 1.
     activation: Activation function to use.
-      If you don't specify anything, no activation is applied
-      (ie. "linear" activation: `a(x) = x`).
+      If you don't specify anything, no activation is applied (
+      see `keras.activations`).
     use_bias: Boolean, whether the layer uses a bias vector.
     kernel_initializer: Initializer for the `kernel` weights matrix.
     bias_initializer: Initializer for the bias vector.
     kernel_regularizer: Regularizer function applied to
-      the `kernel` weights matrix.
-    bias_regularizer: Regularizer function applied to the bias vector.
+      the `kernel` weights matrix (
+      see `keras.regularizers`).
+    bias_regularizer: Regularizer function applied to the bias vector (
+      see `keras.regularizers`).
     activity_regularizer: Regularizer function applied to
-      the output of the layer (its "activation").
-    kernel_constraint: Constraint function applied to the kernel matrix.
-    bias_constraint: Constraint function applied to the bias vector.
+      the output of the layer (its "activation") (
+      see `keras.regularizers`).
+    kernel_constraint: Constraint function applied to the kernel matrix (
+      see `keras.constraints`).
+    bias_constraint: Constraint function applied to the bias vector (
+      see `keras.constraints`).
 
   Input shape:
     5D tensor with shape:
-    `(batch, channels, depth, rows, cols)` if data_format='channels_first'
+    `(batch_size, channels, depth, rows, cols)` if data_format='channels_first'
     or 5D tensor with shape:
-    `(batch, depth, rows, cols, channels)` if data_format='channels_last'.
+    `(batch_size, depth, rows, cols, channels)` if data_format='channels_last'.
 
   Output shape:
     5D tensor with shape:
-    `(batch, filters, new_depth, new_rows, new_cols)` if
+    `(batch_size, filters, new_depth, new_rows, new_cols)` if
       data_format='channels_first'
     or 5D tensor with shape:
-    `(batch, new_depth, new_rows, new_cols, filters)` if
+    `(batch_size, new_depth, new_rows, new_cols, filters)` if
       data_format='channels_last'.
     `depth` and `rows` and `cols` values might have changed due to padding.
+    If `output_padding` is specified::
+    ```
+    new_depth = ((depth - 1) * strides[0] + kernel_size[0] - 2 * padding[0] +
+    output_padding[0])
+    new_rows = ((rows - 1) * strides[1] + kernel_size[1] - 2 * padding[1] +
+    output_padding[1])
+    new_cols = ((cols - 1) * strides[2] + kernel_size[2] - 2 * padding[2] +
+    output_padding[2])
+    ```
+
+  Returns:
+    A tensor of rank 5 representing
+    `activation(conv3dtranspose(inputs, kernel) + bias)`.
+
+  Raises:
+    ValueError: if `padding` is "causal".
+    ValueError: when both `strides` > 1 and `dilation_rate` > 1.
 
   References:
     - [A guide to convolution arithmetic for deep
@@ -1251,8 +1372,9 @@ class SeparableConv(Conv):
     depth_multiplier: The number of depthwise convolution output channels for
       each input channel. The total number of depthwise convolution output
       channels will be equal to `num_filters_in * depth_multiplier`.
-    activation: Activation function. Set it to None to maintain a
-      linear activation.
+    activation: Activation function to use.
+      If you don't specify anything, no activation is applied (
+      see `keras.activations`).
     use_bias: Boolean, whether the layer uses a bias.
     depthwise_initializer: An initializer for the depthwise convolution kernel.
     pointwise_initializer: An initializer for the pointwise convolution kernel.
@@ -1452,32 +1574,60 @@ class SeparableConv1D(SeparableConv):
     depth_multiplier: The number of depthwise convolution output channels for
       each input channel. The total number of depthwise convolution output
       channels will be equal to `num_filters_in * depth_multiplier`.
-    activation: Activation function. Set it to None to maintain a
-      linear activation.
+    activation: Activation function to use.
+      If you don't specify anything, no activation is applied (
+      see `keras.activations`).
     use_bias: Boolean, whether the layer uses a bias.
-    depthwise_initializer: An initializer for the depthwise convolution kernel.
-    pointwise_initializer: An initializer for the pointwise convolution kernel.
+    depthwise_initializer: An initializer for the depthwise convolution kernel (
+      see `keras.initializers`).
+    pointwise_initializer: An initializer for the pointwise convolution kernel (
+      see `keras.initializers`).
     bias_initializer: An initializer for the bias vector. If None, the default
-      initializer will be used.
+      initializer will be used (see `keras.initializers`).
     depthwise_regularizer: Optional regularizer for the depthwise
-      convolution kernel.
+      convolution kernel (see `keras.regularizers`).
     pointwise_regularizer: Optional regularizer for the pointwise
-      convolution kernel.
-    bias_regularizer: Optional regularizer for the bias vector.
-    activity_regularizer: Optional regularizer function for the output.
+      convolution kernel (see `keras.regularizers`).
+    bias_regularizer: Optional regularizer for the bias vector (
+      see `keras.regularizers`).
+    activity_regularizer: Optional regularizer function for the output (
+      see `keras.regularizers`).
     depthwise_constraint: Optional projection function to be applied to the
       depthwise kernel after being updated by an `Optimizer` (e.g. used for
       norm constraints or value constraints for layer weights). The function
       must take as input the unprojected variable and must return the
       projected variable (which must have the same shape). Constraints are
-      not safe to use when doing asynchronous distributed training.
+      not safe to use when doing asynchronous distributed training (
+      see `keras.constraints`).
     pointwise_constraint: Optional projection function to be applied to the
-      pointwise kernel after being updated by an `Optimizer`.
+      pointwise kernel after being updated by an `Optimizer` (
+      see `keras.constraints`).
     bias_constraint: Optional projection function to be applied to the
-      bias after being updated by an `Optimizer`.
+      bias after being updated by an `Optimizer` (
+      see `keras.constraints`).
     trainable: Boolean, if `True` the weights of this layer will be marked as
       trainable (and listed in `layer.trainable_weights`).
     name: A string, the name of the layer.
+
+  Input shape:
+    3D tensor with shape:
+    `(batch_size, channels, steps)` if data_format='channels_first'
+    or 5D tensor with shape:
+    `(batch_size, steps, channels)` if data_format='channels_last'.
+
+  Output shape:
+    3D tensor with shape:
+    `(batch_size, filters, new_steps)` if data_format='channels_first'
+    or 3D tensor with shape:
+    `(batch_size,  new_steps, filters)` if data_format='channels_last'.
+    `new_steps` value might have changed due to padding or strides.
+
+  Returns:
+    A tensor of rank 3 representing
+    `activation(separableconv1d(inputs, kernel) + bias)`.
+
+  Raises:
+    ValueError: when both `strides` > 1 and `dilation_rate` > 1.
   """
 
   def __init__(self,
@@ -1616,37 +1766,53 @@ class SeparableConv2D(SeparableConv):
       The total number of depthwise convolution output
       channels will be equal to `filters_in * depth_multiplier`.
     activation: Activation function to use.
-      If you don't specify anything, no activation is applied
-      (ie. "linear" activation: `a(x) = x`).
+      If you don't specify anything, no activation is applied (
+      see `keras.activations`).
     use_bias: Boolean, whether the layer uses a bias vector.
-    depthwise_initializer: Initializer for the depthwise kernel matrix.
-    pointwise_initializer: Initializer for the pointwise kernel matrix.
-    bias_initializer: Initializer for the bias vector.
+    depthwise_initializer: Initializer for the depthwise kernel matrix (
+      see `keras.initializers`).
+    pointwise_initializer: Initializer for the pointwise kernel matrix (
+      see `keras.initializers`).
+    bias_initializer: Initializer for the bias vector (
+      see `keras.initializers`).
     depthwise_regularizer: Regularizer function applied to
-      the depthwise kernel matrix.
+      the depthwise kernel matrix (see `keras.regularizers`).
     pointwise_regularizer: Regularizer function applied to
-      the pointwise kernel matrix.
-    bias_regularizer: Regularizer function applied to the bias vector.
+      the pointwise kernel matrix (see `keras.regularizers`).
+    bias_regularizer: Regularizer function applied to the bias vector (
+      see `keras.regularizers`).
     activity_regularizer: Regularizer function applied to
-      the output of the layer (its "activation")..
+      the output of the layer (its "activation") (
+      see `keras.regularizers`).
     depthwise_constraint: Constraint function applied to
-      the depthwise kernel matrix.
+      the depthwise kernel matrix (
+      see `keras.constraints`).
     pointwise_constraint: Constraint function applied to
-      the pointwise kernel matrix.
-    bias_constraint: Constraint function applied to the bias vector.
+      the pointwise kernel matrix (
+      see `keras.constraints`).
+    bias_constraint: Constraint function applied to the bias vector (
+      see `keras.constraints`).
 
   Input shape:
     4D tensor with shape:
-    `(batch, channels, rows, cols)` if data_format='channels_first'
+    `(batch_size, channels, rows, cols)` if data_format='channels_first'
     or 4D tensor with shape:
-    `(batch, rows, cols, channels)` if data_format='channels_last'.
+    `(batch_size, rows, cols, channels)` if data_format='channels_last'.
 
   Output shape:
     4D tensor with shape:
-    `(batch, filters, new_rows, new_cols)` if data_format='channels_first'
+    `(batch_size, filters, new_rows, new_cols)` if data_format='channels_first'
     or 4D tensor with shape:
-    `(batch, new_rows, new_cols, filters)` if data_format='channels_last'.
+    `(batch_size, new_rows, new_cols, filters)` if data_format='channels_last'.
     `rows` and `cols` values might have changed due to padding.
+
+  Returns:
+    A tensor of rank 4 representing
+    `activation(separableconv2d(inputs, kernel) + bias)`.
+
+  Raises:
+    ValueError: if `padding` is "causal".
+    ValueError: when both `strides` > 1 and `dilation_rate` > 1.
   """
 
   def __init__(self,
@@ -1756,19 +1922,25 @@ class DepthwiseConv2D(Conv2D):
       Keras config file at `~/.keras/keras.json`.
       If you never set it, then it will be 'channels_last'.
     activation: Activation function to use.
-      If you don't specify anything, no activation is applied
-      (ie. 'linear' activation: `a(x) = x`).
+      If you don't specify anything, no activation is applied (
+      see `keras.activations`).
     use_bias: Boolean, whether the layer uses a bias vector.
-    depthwise_initializer: Initializer for the depthwise kernel matrix.
-    bias_initializer: Initializer for the bias vector.
+    depthwise_initializer: Initializer for the depthwise kernel matrix (
+      see `keras.initializers`).
+    bias_initializer: Initializer for the bias vector (
+      see `keras.initializers`).
     depthwise_regularizer: Regularizer function applied to
-      the depthwise kernel matrix.
-    bias_regularizer: Regularizer function applied to the bias vector.
+      the depthwise kernel matrix (see `keras.regularizers`).
+    bias_regularizer: Regularizer function applied to the bias vector (
+      see `keras.regularizers`).
     activity_regularizer: Regularizer function applied to
-      the output of the layer (its 'activation').
+      the output of the layer (its 'activation') (
+      see `keras.regularizers`).
     depthwise_constraint: Constraint function applied to
-      the depthwise kernel matrix.
-    bias_constraint: Constraint function applied to the bias vector.
+      the depthwise kernel matrix (
+      see `keras.constraints`).
+    bias_constraint: Constraint function applied to the bias vector (
+      see `keras.constraints`).
 
   Input shape:
     4D tensor with shape:
@@ -1782,6 +1954,14 @@ class DepthwiseConv2D(Conv2D):
     or 4D tensor with shape:
     `[batch, new_rows, new_cols, filters]` if data_format='channels_last'.
     `rows` and `cols` values might have changed due to padding.
+
+  Returns:
+    A tensor of rank 4 representing
+    `activation(depthwiseconv2d(inputs, kernel) + bias)`.
+
+  Raises:
+    ValueError: if `padding` is "causal".
+    ValueError: when both `strides` > 1 and `dilation_rate` > 1.
   """
 
   def __init__(self,
diff --git a/tensorflow/python/keras/layers/normalization.py b/tensorflow/python/keras/layers/normalization.py
index 1e3e2efa651..467b6c6eef3 100644
--- a/tensorflow/python/keras/layers/normalization.py
+++ b/tensorflow/python/keras/layers/normalization.py
@@ -144,23 +144,23 @@ class BatchNormalizationBase(Layer):
 
   Normalization equations:
     Consider the intermediate activations \(x\) of a mini-batch of size
-    \(m\):
+    \\(m\\):
 
     We can compute the mean and variance of the batch
 
-    \({\mu_B} = \frac{1}{m} \sum_{i=1}^{m} {x_i}\)
+    \\({\mu_B} = \frac{1}{m} \sum_{i=1}^{m} {x_i}\\)
 
-    \({\sigma_B^2} = \frac{1}{m} \sum_{i=1}^{m} ({x_i} - {\mu_B})^2\)
+    \\({\sigma_B^2} = \frac{1}{m} \sum_{i=1}^{m} ({x_i} - {\mu_B})^2\\)
 
-    and then compute a normalized \(x\), including a small factor
-    \({\epsilon}\) for numerical stability.
+    and then compute a normalized \\(x\\), including a small factor
+    \\({\epsilon}\\) for numerical stability.
 
-    \(\hat{x_i} = \frac{x_i - \mu_B}{\sqrt{\sigma_B^2 + \epsilon}}\)
+    \\(\hat{x_i} = \frac{x_i - \mu_B}{\sqrt{\sigma_B^2 + \epsilon}}\\)
 
-    And finally \(\hat{x}\) is linearly transformed by \({\gamma}\)
-    and \({\beta}\), which are learned parameters:
+    And finally \\(\hat{x}\) is linearly transformed by \({\gamma}\\)
+    and \\({\beta}\\), which are learned parameters:
 
-    \({y_i} = {\gamma * \hat{x_i} + \beta}\)
+    \\({y_i} = {\gamma * \hat{x_i} + \beta}\\)
 
   References:
   - [Batch Normalization: Accelerating Deep Network Training by Reducing
diff --git a/tensorflow/python/keras/layers/normalization_test.py b/tensorflow/python/keras/layers/normalization_test.py
index 48e08ba763f..cbff4b48977 100644
--- a/tensorflow/python/keras/layers/normalization_test.py
+++ b/tensorflow/python/keras/layers/normalization_test.py
@@ -378,8 +378,8 @@ def _run_batchnorm_correctness_test(layer, dtype='float32', fused=False):
   out -= keras.backend.eval(norm.beta)
   out /= keras.backend.eval(norm.gamma)
 
-  np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1)
-  np.testing.assert_allclose(out.std(), 1.0, atol=1e-1)
+  np.testing.assert_allclose(out.mean(), 0.0, atol=2e-1)
+  np.testing.assert_allclose(out.std(), 1.0, atol=2e-1)
 
 
 @parameterized.parameters(
diff --git a/tensorflow/python/keras/layers/pooling.py b/tensorflow/python/keras/layers/pooling.py
index 36858674646..4b3083c9143 100644
--- a/tensorflow/python/keras/layers/pooling.py
+++ b/tensorflow/python/keras/layers/pooling.py
@@ -280,18 +280,81 @@ class Pooling2D(Layer):
 
 @keras_export('keras.layers.MaxPool2D', 'keras.layers.MaxPooling2D')
 class MaxPooling2D(Pooling2D):
-  """Max pooling operation for spatial data.
+  """Max pooling operation for 2D spatial data.
+
+  Downsamples the input representation by taking the maximum value over the
+  window defined by `pool_size` for each dimension along the features axis.
+  The window is shifted by `strides` in each dimension.  The resulting output
+  when using "valid" padding option has a shape(number of rows or columns) of:
+  `output_shape = (input_shape - pool_size + 1) / strides)`
+
+  The resulting output shape when using the "same" padding option is:
+  `output_shape = input_shape / strides`
+
+  For example, for stride=(1,1) and padding="valid":
+
+  >>> x = tf.constant([[1., 2., 3.],
+  ...                  [4., 5., 6.],
+  ...                  [7., 8., 9.]])
+  >>> x = tf.reshape(x, [1, 3, 3, 1])
+  >>> max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
+  ...    strides=(1, 1), padding='valid')
+  >>> max_pool_2d(x)
+  <tf.Tensor: shape=(1, 2, 2, 1), dtype=float32, numpy=
+    array([[[[5.],
+             [6.]],
+            [[8.],
+             [9.]]]], dtype=float32)>
+
+  For example, for stride=(2,2) and padding="valid":
+
+  >>> x = tf.constant([[1., 2., 3., 4.],
+  ...                  [5., 6., 7., 8.],
+  ...                  [9., 10., 11., 12.]])
+  >>> x = tf.reshape(x, [1, 3, 4, 1])
+  >>> max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
+  ...    strides=(1, 1), padding='valid')
+  >>> max_pool_2d(x)
+  <tf.Tensor: shape=(1, 2, 3, 1), dtype=float32, numpy=
+    array([[[[ 6.],
+             [ 7.],
+             [ 8.]],
+            [[10.],
+             [11.],
+             [12.]]]], dtype=float32)>
+
+  For example, for stride=(1,1) and padding="same":
+
+  >>> x = tf.constant([[1., 2., 3.],
+  ...                  [4., 5., 6.],
+  ...                  [7., 8., 9.]])
+  >>> x = tf.reshape(x, [1, 3, 3, 1])
+  >>> max_pool_2d = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
+  ...    strides=(1, 1), padding='same')
+  >>> max_pool_2d(x)
+  <tf.Tensor: shape=(1, 3, 3, 1), dtype=float32, numpy=
+    array([[[[5.],
+             [6.],
+             [6.]],
+            [[8.],
+             [9.],
+             [9.]],
+            [[8.],
+             [9.],
+             [9.]]]], dtype=float32)>
 
   Arguments:
     pool_size: integer or tuple of 2 integers,
-      factors by which to downscale (vertical, horizontal).
-      `(2, 2)` will halve the input in both spatial dimension.
+      window size over which to take the maximum.
+      `(2, 2)` will take the max value over a 2x2 pooling window.
       If only one integer is specified, the same window length
       will be used for both dimensions.
     strides: Integer, tuple of 2 integers, or None.
-      Strides values.
-      If None, it will default to `pool_size`.
+      Strides values.  Specifies how far the pooling window moves
+      for each pooling step. If None, it will default to `pool_size`.
     padding: One of `"valid"` or `"same"` (case-insensitive).
+      "valid" adds no zero padding.  "same" adds padding such that if the stride
+      is 1, the output shape is the same as input shape.
     data_format: A string,
       one of `channels_last` (default) or `channels_first`.
       The ordering of the dimensions in the inputs.
@@ -314,6 +377,10 @@ class MaxPooling2D(Pooling2D):
       4D tensor with shape `(batch_size, pooled_rows, pooled_cols, channels)`.
     - If `data_format='channels_first'`:
       4D tensor with shape `(batch_size, channels, pooled_rows, pooled_cols)`.
+
+  Returns:
+    A tensor of rank 4 representing the maximum pooled values.  See above for
+    output shape.
   """
 
   def __init__(self,
diff --git a/tensorflow/python/keras/layers/image_preprocessing.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing.py
similarity index 100%
rename from tensorflow/python/keras/layers/image_preprocessing.py
rename to tensorflow/python/keras/layers/preprocessing/image_preprocessing.py
diff --git a/tensorflow/python/keras/layers/image_preprocessing_test.py b/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py
similarity index 99%
rename from tensorflow/python/keras/layers/image_preprocessing_test.py
rename to tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py
index 672cb181974..19433b8290f 100644
--- a/tensorflow/python/keras/layers/image_preprocessing_test.py
+++ b/tensorflow/python/keras/layers/preprocessing/image_preprocessing_test.py
@@ -24,7 +24,7 @@ import numpy as np
 from tensorflow.python.framework import errors
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
-from tensorflow.python.keras.layers import image_preprocessing
+from tensorflow.python.keras.layers.preprocessing import image_preprocessing
 from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
 from tensorflow.python.ops import image_ops_impl as image_ops
 from tensorflow.python.ops import stateless_random_ops
diff --git a/tensorflow/python/keras/layers/recurrent_v2.py b/tensorflow/python/keras/layers/recurrent_v2.py
index a62e3fc8600..68d0884c54b 100644
--- a/tensorflow/python/keras/layers/recurrent_v2.py
+++ b/tensorflow/python/keras/layers/recurrent_v2.py
@@ -64,6 +64,23 @@ class GRUCell(recurrent.GRUCell):
   This class processes one step within the whole time sequence input, whereas
   `tf.keras.layer.GRU` processes the whole sequence.
 
+  For example:
+
+  >>> inputs = tf.random.normal([32, 10, 8])
+  >>> rnn = tf.keras.layers.RNN(tf.keras.layers.GRUCell(4))
+  >>> output = rnn(inputs)
+  >>> print(output.shape)
+  (32, 4)
+  >>> rnn = tf.keras.layers.RNN(
+  ...    tf.keras.layers.GRUCell(4),
+  ...    return_sequences=True,
+  ...    return_state=True)
+  >>> whole_sequence_output, final_state = rnn(inputs)
+  >>> print(whole_sequence_output.shape)
+  (32, 10, 4)
+  >>> print(final_state.shape)
+  (32, 4)
+
   Arguments:
     units: Positive integer, dimensionality of the output space.
     activation: Activation function to use. Default: hyperbolic tangent
@@ -114,24 +131,6 @@ class GRUCell(recurrent.GRUCell):
     training: Python boolean indicating whether the layer should behave in
       training mode or in inference mode. Only relevant when `dropout` or
       `recurrent_dropout` is used.
-
-  Examples:
-
-  ```python
-  inputs = np.random.random([32, 10, 8]).astype(np.float32)
-  rnn = tf.keras.layers.RNN(tf.keras.layers.GRUCell(4))
-
-  output = rnn(inputs)  # The output has shape `[32, 4]`.
-
-  rnn = tf.keras.layers.RNN(
-      tf.keras.layers.GRUCell(4),
-      return_sequences=True,
-      return_state=True)
-
-  # whole_sequence_output has shape `[32, 10, 4]`.
-  # final_state has shape `[32, 4]`.
-  whole_sequence_output, final_state = rnn(inputs)
-  ```
   """
 
   def __init__(self,
@@ -207,6 +206,20 @@ class GRU(recurrent.DropoutRNNCellMixin, recurrent.GRU):
   `recurrent_kernel`. To use this variant, set `'reset_after'=True` and
   `recurrent_activation='sigmoid'`.
 
+  For example:
+
+  >>> inputs = tf.random.normal([32, 10, 8])
+  >>> gru = tf.keras.layers.GRU(4)
+  >>> output = gru(inputs)
+  >>> print(output.shape)
+  (32, 4)
+  >>> gru = tf.keras.layers.GRU(4, return_sequences=True, return_state=True)
+  >>> whole_sequence_output, final_state = gru(inputs)
+  >>> print(whole_sequence_output.shape)
+  (32, 10, 4)
+  >>> print(final_state.shape)
+  (32, 4)
+
   Arguments:
     units: Positive integer, dimensionality of the output space.
     activation: Activation function to use.
@@ -289,21 +302,6 @@ class GRU(recurrent.DropoutRNNCellMixin, recurrent.GRU):
     initial_state: List of initial state tensors to be passed to the first
       call of the cell  (optional, defaults to `None` which causes creation
       of zero-filled initial state tensors).
-
-  Examples:
-
-  ```python
-  inputs = np.random.random([32, 10, 8]).astype(np.float32)
-  gru = tf.keras.layers.GRU(4)
-
-  output = gru(inputs)  # The output has shape `[32, 4]`.
-
-  gru = tf.keras.layers.GRU(4, return_sequences=True, return_state=True)
-
-  # whole_sequence_output has shape `[32, 10, 4]`.
-  # final_state has shape `[32, 4]`.
-  whole_sequence_output, final_state = gru(inputs)
-  ```
   """
 
   def __init__(self,
@@ -775,6 +773,25 @@ class LSTMCell(recurrent.LSTMCell):
   This class processes one step within the whole time sequence input, whereas
   `tf.keras.layer.LSTM` processes the whole sequence.
 
+  For example:
+
+  >>> inputs = tf.random.normal([32, 10, 8])
+  >>> rnn = tf.keras.layers.RNN(tf.keras.layers.LSTMCell(4))
+  >>> output = rnn(inputs)
+  >>> print(output.shape)
+  (32, 4)
+  >>> rnn = tf.keras.layers.RNN(
+  ...    tf.keras.layers.LSTMCell(4),
+  ...    return_sequences=True,
+  ...    return_state=True)
+  >>> whole_seq_output, final_memory_state, final_carry_state = rnn(inputs)
+  >>> print(whole_seq_output.shape)
+  (32, 10, 4)
+  >>> print(final_memory_state.shape)
+  (32, 4)
+  >>> print(final_carry_state.shape)
+  (32, 4)
+
   Arguments:
     units: Positive integer, dimensionality of the output space.
     activation: Activation function to use. Default: hyperbolic tangent
@@ -826,24 +843,6 @@ class LSTMCell(recurrent.LSTMCell):
     training: Python boolean indicating whether the layer should behave in
       training mode or in inference mode. Only relevant when `dropout` or
       `recurrent_dropout` is used.
-
-  Examples:
-
-  ```python
-  inputs = np.random.random([32, 10, 8]).astype(np.float32)
-  rnn = tf.keras.layers.RNN(tf.keras.layers.LSTMCell(4))
-
-  output = rnn(inputs)  # The output has shape `[32, 4]`.
-
-  rnn = tf.keras.layers.RNN(
-      tf.keras.layers.LSTMCell(4),
-      return_sequences=True,
-      return_state=True)
-
-  # whole_sequence_output has shape `[32, 10, 4]`.
-  # final_memory_state and final_carry_state both have shape `[32, 4]`.
-  whole_sequence_output, final_memory_state, final_carry_state = rnn(inputs)
-  ```
   """
 
   def __init__(self,
@@ -908,6 +907,22 @@ class LSTM(recurrent.DropoutRNNCellMixin, recurrent.LSTM):
   5. `use_bias` is `True`
   6. Inputs are not masked or strictly right padded.
 
+  For example:
+
+  >>> inputs = tf.random.normal([32, 10, 8])
+  >>> lstm = tf.keras.layers.LSTM(4)
+  >>> output = lstm(inputs)
+  >>> print(output.shape)
+  (32, 4)
+  >>> lstm = tf.keras.layers.LSTM(4, return_sequences=True, return_state=True)
+  >>> whole_seq_output, final_memory_state, final_carry_state = lstm(inputs)
+  >>> print(whole_seq_output.shape)
+  (32, 10, 4)
+  >>> print(final_memory_state.shape)
+  (32, 4)
+  >>> print(final_carry_state.shape)
+  (32, 4)
+
   Arguments:
     units: Positive integer, dimensionality of the output space.
     activation: Activation function to use.
@@ -983,21 +998,6 @@ class LSTM(recurrent.DropoutRNNCellMixin, recurrent.LSTM):
     initial_state: List of initial state tensors to be passed to the first
       call of the cell (optional, defaults to `None` which causes creation
       of zero-filled initial state tensors).
-
-  Examples:
-
-  ```python
-  inputs = np.random.random([32, 10, 8]).astype(np.float32)
-  lstm = tf.keras.layers.LSTM(4)
-
-  output = lstm(inputs)  # The output has shape `[32, 4]`.
-
-  lstm = tf.keras.layers.LSTM(4, return_sequences=True, return_state=True)
-
-  # whole_sequence_output has shape `[32, 10, 4]`.
-  # final_memory_state and final_carry_state both have shape `[32, 4]`.
-  whole_sequence_output, final_memory_state, final_carry_state = lstm(inputs)
-  ```
   """
 
   def __init__(self,
diff --git a/tensorflow/python/keras/losses.py b/tensorflow/python/keras/losses.py
index 8370fdf03fc..4ddf176094a 100644
--- a/tensorflow/python/keras/losses.py
+++ b/tensorflow/python/keras/losses.py
@@ -48,7 +48,7 @@ class Loss(object):
   * `call()`: Contains the logic for loss calculation using `y_true`, `y_pred`.
 
   Example subclass implementation:
-  ```
+  ```python
   class MeanSquaredError(Loss):
     def call(self, y_true, y_pred):
       y_pred = ops.convert_to_tensor(y_pred)
@@ -66,7 +66,7 @@ class Loss(object):
   details on this.
 
   You can implement 'SUM_OVER_BATCH_SIZE' using global batch size like:
-  ```
+  ```python
   with strategy.scope():
     loss_obj = tf.keras.losses.CategoricalCrossentropy(
         reduction=tf.keras.losses.Reduction.NONE)
@@ -236,11 +236,15 @@ class MeanSquaredError(LossFunctionWrapper):
 
   Usage:
 
-  ```python
-  mse = tf.keras.losses.MeanSquaredError()
-  loss = mse([0., 0., 1., 1.], [1., 1., 1., 0.])
-  print('Loss: ', loss.numpy())  # Loss: 0.75
-  ```
+  >>> mse = tf.keras.losses.MeanSquaredError()
+  >>> loss = mse([[0., 1.], [0., 0.]], [[1., 1.], [1., 0.]])
+  >>> loss.numpy()
+  0.5
+
+  >>> loss = mse([[0., 1.], [0., 0.]], [[1., 1.], [1., 0.]],
+  ...            sample_weight=[0.7, 0.3])
+  >>> loss.numpy()
+  0.25
 
   Usage with the `compile` API:
 
@@ -265,11 +269,15 @@ class MeanAbsoluteError(LossFunctionWrapper):
 
   Usage:
 
-  ```python
-  mae = tf.keras.losses.MeanAbsoluteError()
-  loss = mae([0., 0., 1., 1.], [1., 1., 1., 0.])
-  print('Loss: ', loss.numpy())  # Loss: 0.75
-  ```
+  >>> mae = tf.keras.losses.MeanAbsoluteError()
+  >>> loss = mae([[0., 1.], [0., 0.]], [[1., 1.], [1., 0.]])
+  >>> loss.numpy()
+  0.5
+
+  >>> loss = mae([[0., 1.], [0., 0.]], [[1., 1.], [1., 0.]],
+  ...            sample_weight=[0.7, 0.3])
+  >>> loss.numpy()
+  0.25
 
   Usage with the `compile` API:
 
@@ -294,11 +302,15 @@ class MeanAbsolutePercentageError(LossFunctionWrapper):
 
   Usage:
 
-  ```python
-  mape = tf.keras.losses.MeanAbsolutePercentageError()
-  loss = mape([0., 0., 1., 1.], [1., 1., 1., 0.])
-  print('Loss: ', loss.numpy())  # Loss: 5e+08
-  ```
+  >>> mape = tf.keras.losses.MeanAbsolutePercentageError()
+  >>> loss = mape([[0., 1.], [0., 0.]], [[1., 1.], [1., 0.]])
+  >>> loss.numpy()
+  500000000.0
+
+  >>> loss = mape([[0., 1.], [0., 0.]], [[1., 1.], [1., 0.]],
+  ...             sample_weight=[0.7, 0.3])
+  >>> loss.numpy()
+  250000000.0
 
   Usage with the `compile` API:
 
@@ -323,11 +335,15 @@ class MeanSquaredLogarithmicError(LossFunctionWrapper):
 
   Usage:
 
-  ```python
-  msle = tf.keras.losses.MeanSquaredLogarithmicError()
-  loss = msle([0., 0., 1., 1.], [1., 1., 1., 0.])
-  print('Loss: ', loss.numpy())  # Loss: 0.36034
-  ```
+  >>> msle = tf.keras.losses.MeanSquaredLogarithmicError()
+  >>> loss = msle([[0., 1.], [0., 0.]], [[1., 1.], [1., 0.]])
+  >>> loss.numpy()
+  0.24022643
+
+  >>> loss = msle([[0., 1.], [0., 0.]], [[1., 1.], [1., 0.]],
+  ...             sample_weight=[0.7, 0.3])
+  >>> loss.numpy()
+  0.12011322
 
   Usage with the `compile` API:
 
@@ -357,12 +373,15 @@ class BinaryCrossentropy(LossFunctionWrapper):
   `[batch_size]`.
 
   Usage:
+  >>> bce = tf.keras.losses.BinaryCrossentropy()
+  >>> loss = bce([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]])
+  >>> loss.numpy()
+  0.81492424
 
-  ```python
-  bce = tf.keras.losses.BinaryCrossentropy()
-  loss = bce([0., 0., 1., 1.], [1., 1., 1., 0.])
-  print('Loss: ', loss.numpy())  # Loss: 11.522857
-  ```
+  >>> loss = bce([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]],
+  ...            sample_weight=[1, 0])
+  >>> loss.numpy()
+  0.45814526
 
   Usage with the `tf.keras` API:
 
@@ -421,13 +440,17 @@ class CategoricalCrossentropy(LossFunctionWrapper):
 
   Usage:
 
-  ```python
-  cce = tf.keras.losses.CategoricalCrossentropy()
-  loss = cce(
-    [[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]],
-    [[.9, .05, .05], [.05, .89, .06], [.05, .01, .94]])
-  print('Loss: ', loss.numpy())  # Loss: 0.0945
-  ```
+  >>> cce = tf.keras.losses.CategoricalCrossentropy()
+  >>> loss = cce([[0, 1, 0], [0, 0, 1]],
+  ...            [[0.05, 0.95, 0], [0.1, 0.8, 0.1]])
+  >>> loss.numpy()
+  1.1769392
+
+  >>> loss = cce([[0, 1, 0], [0, 0, 1]],
+  ...            [[0.05, 0.95, 0], [0.1, 0.8, 0.1]],
+  ...            sample_weight=tf.constant([0.3, 0.7]))
+  >>> loss.numpy()
+  0.8135988
 
   Usage with the `compile` API:
 
@@ -439,7 +462,7 @@ class CategoricalCrossentropy(LossFunctionWrapper):
   Args:
     from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
       we assume that `y_pred` encodes a probability distribution.
-      Note: Using from_logits=True may be more numerically stable.
+      **Note: Using from_logits=True is more numerically stable.**
     label_smoothing: Float in [0, 1]. When > 0, label values are smoothed,
       meaning the confidence on label values are relaxed. e.g.
       `label_smoothing=0.2` means that we will use a value of `0.1` for label
@@ -486,13 +509,15 @@ class SparseCategoricalCrossentropy(LossFunctionWrapper):
 
   Usage:
 
-  ```python
-  cce = tf.keras.losses.SparseCategoricalCrossentropy()
-  loss = cce(
-    tf.convert_to_tensor([0, 1, 2]),
-    tf.convert_to_tensor([[.9, .05, .05], [.5, .89, .6], [.05, .01, .94]]))
-  print('Loss: ', loss.numpy())  # Loss: 0.3239
-  ```
+  >>> scce = tf.keras.losses.SparseCategoricalCrossentropy()
+  >>> loss = scce([1, 2], [[0.05, 0.95, 0], [0.1, 0.8, 0.1]])
+  >>> loss.numpy()
+  1.1769392
+
+  >>> loss = scce([1, 2], [[0.05, 0.95, 0], [0.1, 0.8, 0.1]],
+  ...             sample_weight=tf.constant([0.3, 0.7]))
+  >>> loss.numpy()
+  0.8135988
 
   Usage with the `compile` API:
 
@@ -539,14 +564,14 @@ class Hinge(LossFunctionWrapper):
 
   Usage:
 
-  ```python
-  h = tf.keras.losses.Hinge()
-  loss = h([-1., 1., 1.], [0.6, -0.7, -0.5])
+  >>> h = tf.keras.losses.Hinge()
+  >>> loss = h([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]])
+  >>> loss.numpy()
+  1.3
 
-  # loss = max(0, 1 - y_true * y_pred) = [1.6 + 1.7 + 1.5] / 3
-
-  print('Loss: ', loss.numpy())  # Loss: 1.6
-  ```
+  >>> loss = h([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]], sample_weight=[1, 0])
+  >>> loss.numpy()
+  0.55
 
   Usage with the `compile` API:
 
@@ -571,14 +596,14 @@ class SquaredHinge(LossFunctionWrapper):
 
   Usage:
 
-  ```python
-  sh = tf.keras.losses.SquaredHinge()
-  loss = sh([-1., 1., 1.], [0.6, -0.7, -0.5])
+  >>> h = tf.keras.losses.SquaredHinge()
+  >>> loss = h([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]])
+  >>> loss.numpy()
+  1.86
 
-  # loss = (max(0, 1 - y_true * y_pred))^2 = [1.6^2 + 1.7^2 + 1.5^2] / 3
-
-  print('Loss: ', loss.numpy())  # Loss: 2.566666
-  ```
+  >>> loss = h([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]], sample_weight=[1, 0])
+  >>> loss.numpy()
+  0.73
 
   Usage with the `compile` API:
 
@@ -604,11 +629,14 @@ class CategoricalHinge(LossFunctionWrapper):
 
   Usage:
 
-  ```python
-  ch = tf.keras.losses.CategoricalHinge()
-  loss = ch([0., 1., 1.], [1., 0., 1.])
-  print('Loss: ', loss.numpy())  # Loss: 1.0
-  ```
+  >>> h = tf.keras.losses.CategoricalHinge()
+  >>> loss = h([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]])
+  >>> loss.numpy()
+  1.4000001
+
+  >>> loss = h([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]], sample_weight=[1, 0])
+  >>> loss.numpy()
+  0.6
 
   Usage with the `compile` API:
 
@@ -633,11 +661,15 @@ class Poisson(LossFunctionWrapper):
 
   Usage:
 
-  ```python
-  p = tf.keras.losses.Poisson()
-  loss = p([1., 9., 2.], [4., 8., 12.])
-  print('Loss: ', loss.numpy())  # Loss: -0.35702705
-  ```
+  >>> p = tf.keras.losses.Poisson()
+  >>> loss = p([[0., 1.], [0., 0.]], [[1., 1.], [0., 0.]])
+  >>> loss.numpy()
+  0.49999997
+
+  >>> loss = p([[0., 1.], [0., 0.]], [[1., 1.], [0., 0.]],
+  ...          sample_weight=[1., 0.])
+  >>> loss.numpy()
+  0.49999997
 
   Usage with the `compile` API:
 
@@ -660,11 +692,15 @@ class LogCosh(LossFunctionWrapper):
 
   Usage:
 
-  ```python
-  l = tf.keras.losses.LogCosh()
-  loss = l([0., 1., 1.], [1., 0., 1.])
-  print('Loss: ', loss.numpy())  # Loss: 0.289
-  ```
+  >>> l = tf.keras.losses.LogCosh()
+  >>> loss = l([[0., 1.], [0., 0.]], [[1., 1.], [0., 0.]])
+  >>> loss.numpy()
+  0.10844523
+
+  >>> loss = l([[0., 1.], [0., 0.]], [[1., 1.], [0., 0.]],
+  ...          sample_weight=[1., 0.])
+  >>> loss.numpy()
+  0.10844523
 
   Usage with the `compile` API:
 
@@ -688,11 +724,15 @@ class KLDivergence(LossFunctionWrapper):
 
   Usage:
 
-  ```python
-  k = tf.keras.losses.KLDivergence()
-  loss = k([.4, .9, .2], [.5, .8, .12])
-  print('Loss: ', loss.numpy())  # Loss: 0.11891246
-  ```
+  >>> kl = tf.keras.losses.KLDivergence()
+  >>> loss = kl([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]])
+  >>> loss.numpy()
+  0.45814306
+
+  >>> loss = kl([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]],
+  ...           sample_weight=[1, 0])
+  >>> loss.numpy()
+  0.4581446
 
   Usage with the `compile` API:
 
@@ -723,11 +763,15 @@ class Huber(LossFunctionWrapper):
 
   Usage:
 
-  ```python
-  l = tf.keras.losses.Huber()
-  loss = l([0., 1., 1.], [1., 0., 1.])
-  print('Loss: ', loss.numpy())  # Loss: 0.333
-  ```
+  >>> h = tf.keras.losses.Huber()
+  >>> loss = h([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]])
+  >>> loss.numpy()
+  0.155
+
+  >>> loss = h([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]],
+  ...          sample_weight=[1, 0])
+  >>> loss.numpy()
+  0.09
 
   Usage with the `compile` API:
 
@@ -766,6 +810,17 @@ class Huber(LossFunctionWrapper):
               'keras.losses.mse',
               'keras.losses.MSE')
 def mean_squared_error(y_true, y_pred):
+  """Computes the mean squared error between labels and predictions.
+
+  `loss = square(y_true - y_pred)`
+
+  Args:
+    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
+    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
+
+  Returns:
+    Mean squared error values. shape = `[batch_size, d0, .. dN-1]`.
+  """
   y_pred = ops.convert_to_tensor(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
   return K.mean(math_ops.squared_difference(y_pred, y_true), axis=-1)
@@ -778,6 +833,17 @@ def mean_squared_error(y_true, y_pred):
               'keras.losses.mae',
               'keras.losses.MAE')
 def mean_absolute_error(y_true, y_pred):
+  """Computes the mean absolute error between labels and predictions.
+
+  `loss = abs(y_true - y_pred)`
+
+  Args:
+    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
+    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
+
+  Returns:
+    Mean absolute error values. shape = `[batch_size, d0, .. dN-1]`.
+  """
   y_pred = ops.convert_to_tensor(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
   return K.mean(math_ops.abs(y_pred - y_true), axis=-1)
@@ -789,7 +855,18 @@ def mean_absolute_error(y_true, y_pred):
               'keras.losses.mean_absolute_percentage_error',
               'keras.losses.mape',
               'keras.losses.MAPE')
-def mean_absolute_percentage_error(y_true, y_pred):  # pylint: disable=missing-docstring
+def mean_absolute_percentage_error(y_true, y_pred):
+  """Computes the mean absolute percentage error between `y_true` and `y_pred`.
+
+  `loss = 100 * abs(y_true - y_pred) / y_true`
+
+  Args:
+    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
+    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
+
+  Returns:
+    Mean absolute percentage error values. shape = `[batch_size, d0, .. dN-1]`.
+  """
   y_pred = ops.convert_to_tensor(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
   diff = math_ops.abs(
@@ -803,7 +880,18 @@ def mean_absolute_percentage_error(y_true, y_pred):  # pylint: disable=missing-d
               'keras.losses.mean_squared_logarithmic_error',
               'keras.losses.msle',
               'keras.losses.MSLE')
-def mean_squared_logarithmic_error(y_true, y_pred):  # pylint: disable=missing-docstring
+def mean_squared_logarithmic_error(y_true, y_pred):
+  """Computes the mean squared logarithmic error between `y_true` and `y_pred`.
+
+  `loss = square(log(y_true) - log(y_pred))`
+
+  Args:
+    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
+    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
+
+  Returns:
+    Mean squared logarithmic error values. shape = `[batch_size, d0, .. dN-1]`.
+  """
   y_pred = ops.convert_to_tensor(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
   first_log = math_ops.log(K.maximum(y_pred, K.epsilon()) + 1.)
@@ -830,13 +918,16 @@ def _maybe_convert_labels(y_true):
 def squared_hinge(y_true, y_pred):
   """Computes the squared hinge loss between `y_true` and `y_pred`.
 
+  `loss = square(maximum(1 - y_true * y_pred, 0))`
+
   Args:
     y_true: The ground truth values. `y_true` values are expected to be -1 or 1.
       If binary (0 or 1) labels are provided we will convert them to -1 or 1.
-    y_pred: The predicted values.
+      shape = `[batch_size, d0, .. dN]`.
+    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
 
   Returns:
-    Tensor with one scalar loss entry per sample.
+     Squared hinge loss values. shape = `[batch_size, d0, .. dN-1]`.
   """
   y_pred = ops.convert_to_tensor(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
@@ -849,13 +940,16 @@ def squared_hinge(y_true, y_pred):
 def hinge(y_true, y_pred):
   """Computes the hinge loss between `y_true` and `y_pred`.
 
+  `loss = maximum(1 - y_true * y_pred, 0)`
+
   Args:
     y_true: The ground truth values. `y_true` values are expected to be -1 or 1.
       If binary (0 or 1) labels are provided they will be converted to -1 or 1.
-    y_pred: The predicted values.
+      shape = `[batch_size, d0, .. dN]`.
+    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
 
   Returns:
-    Tensor with one scalar loss entry per sample.
+    Hinge loss values. shape = `[batch_size, d0, .. dN-1]`.
   """
   y_pred = ops.convert_to_tensor(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
@@ -867,13 +961,16 @@ def hinge(y_true, y_pred):
 def categorical_hinge(y_true, y_pred):
   """Computes the categorical hinge loss between `y_true` and `y_pred`.
 
+  `loss = maximum(neg - pos + 1, 0)`
+  where `neg = sum(y_true * y_pred)` and `pos = maximum(1 - y_true)`
+
   Args:
     y_true: The ground truth values. `y_true` values are expected to be -1 or 1.
       If binary (0 or 1) labels are provided they will be converted to -1 or 1.
     y_pred: The predicted values.
 
   Returns:
-    A tensor.
+    Categorical hinge loss values.
   """
   y_pred = ops.convert_to_tensor(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
@@ -924,12 +1021,12 @@ def logcosh(y_true, y_pred):
   like the mean squared error, but will not be so strongly affected by the
   occasional wildly incorrect prediction.
 
-  Arguments:
-      y_true: tensor of true targets.
-      y_pred: tensor of predicted targets.
+  Args:
+    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
+    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
 
   Returns:
-      Tensor with one scalar loss entry per sample.
+    Logcosh error values. shape = `[batch_size, d0, .. dN-1]`.
   """
   y_pred = ops.convert_to_tensor(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
@@ -974,6 +1071,19 @@ def categorical_crossentropy(y_true,
 @keras_export('keras.metrics.sparse_categorical_crossentropy',
               'keras.losses.sparse_categorical_crossentropy')
 def sparse_categorical_crossentropy(y_true, y_pred, from_logits=False, axis=-1):
+  """Computes the sparse categorical crossentropy loss.
+
+  Args:
+    y_true: Ground truth values.
+    y_pred: The predicted values.
+    from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
+      we assume that `y_pred` encodes a probability distribution.
+    axis: (Optional) Defaults to -1. The dimension along which the entropy is
+      computed.
+
+  Returns:
+    Sparse categorical crossentropy loss value.
+  """
   y_pred = ops.convert_to_tensor(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
   return K.sparse_categorical_crossentropy(
@@ -982,7 +1092,19 @@ def sparse_categorical_crossentropy(y_true, y_pred, from_logits=False, axis=-1):
 
 @keras_export('keras.metrics.binary_crossentropy',
               'keras.losses.binary_crossentropy')
-def binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0):  # pylint: disable=missing-docstring
+def binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0):
+  """Computes the binary crossentropy loss.
+
+  Args:
+    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
+    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
+    from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
+      we assume that `y_pred` encodes a probability distribution.
+    label_smoothing: Float in [0, 1]. If > `0` then smooth the labels.
+
+  Returns:
+    Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`.
+  """
   y_pred = ops.convert_to_tensor(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
   label_smoothing = ops.convert_to_tensor(label_smoothing, dtype=K.floatx())
@@ -1025,7 +1147,6 @@ def kullback_leibler_divergence(y_true, y_pred):
 
   Raises:
       TypeError: If `y_true` cannot be cast to the `y_pred.dtype`.
-
   """
   y_pred = ops.convert_to_tensor(y_pred)
   y_true = math_ops.cast(y_true, y_pred.dtype)
@@ -1041,19 +1162,12 @@ def poisson(y_true, y_pred):
   The Poisson loss is the mean of the elements of the `Tensor`
   `y_pred - y_true * log(y_pred)`.
 
-  Usage:
-
-  ```python
-  loss = tf.keras.losses.poisson([1.4, 9.3, 2.2], [4.3, 8.2, 12.2])
-  print('Loss: ', loss.numpy())  # Loss: -0.8045559
-  ```
-
   Args:
-    y_true: Tensor of true targets.
-    y_pred: Tensor of predicted targets.
+    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
+    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
 
   Returns:
-    A `Tensor` with the mean Poisson loss.
+     Poisson loss value. shape = `[batch_size, d0, .. dN-1]`.
 
   Raises:
       InvalidArgumentError: If `y_true` and `y_pred` have incompatible shapes.
@@ -1099,19 +1213,19 @@ def cosine_similarity(y_true, y_pred, axis=-1):
 class CosineSimilarity(LossFunctionWrapper):
   """Computes the cosine similarity between `y_true` and `y_pred`.
 
+  `loss = -sum(y_true * y_pred)`
+
   Usage:
 
-  ```python
-  cosine_loss = tf.keras.losses.CosineSimilarity(axis=1)
-  loss = cosine_loss([[0., 1.], [1., 1.]], [[1., 0.], [1., 1.]])
-  # l2_norm(y_true) = [[0., 1.], [1./1.414], 1./1.414]]]
-  # l2_norm(y_pred) = [[1., 0.], [1./1.414], 1./1.414]]]
-  # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]]
-  # loss = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1))
-         = ((0. + 0.) +  (0.5 + 0.5)) / 2
-
-  print('Loss: ', loss.numpy())  # Loss: 0.5
-  ```
+  >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1)
+  >>> loss = cosine_loss([[0., 1.], [1., 1.]], [[1., 0.], [1., 1.]])
+  >>> # l2_norm(y_true) = [[0., 1.], [1./1.414], 1./1.414]]]
+  >>> # l2_norm(y_pred) = [[1., 0.], [1./1.414], 1./1.414]]]
+  >>> # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]]
+  >>> # loss = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1))
+  >>> #       = ((0. + 0.) +  (0.5 + 0.5)) / 2
+  >>> loss.numpy()
+  -0.49999997
 
   Usage with the `compile` API:
 
diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py
index db8f897ec67..2fd79fc0f2e 100644
--- a/tensorflow/python/keras/metrics.py
+++ b/tensorflow/python/keras/metrics.py
@@ -88,8 +88,8 @@ class Metric(base_layer.Layer):
   model.add(tf.keras.layers.Dense(64, activation='relu'))
   model.add(tf.keras.layers.Dense(10, activation='softmax'))
 
-  model.compile(optimizer=tf.compat.v1.train.RMSPropOptimizer(0.01),
-                loss=tf.keras.losses.categorical_crossentropy,
+  model.compile(optimizer=tf.keras.optimizers.RMSprop(0.01),
+                loss=tf.keras.losses.CategoricalCrossentropy(),
                 metrics=[tf.keras.metrics.CategoricalAccuracy()])
 
   data = np.random.random((1000, 32))
@@ -97,9 +97,8 @@ class Metric(base_layer.Layer):
 
   dataset = tf.data.Dataset.from_tensor_slices((data, labels))
   dataset = dataset.batch(32)
-  dataset = dataset.repeat()
 
-  model.fit(dataset, epochs=10, steps_per_epoch=30)
+  model.fit(dataset, epochs=10)
   ```
 
   To be implemented by subclasses:
@@ -112,7 +111,7 @@ class Metric(base_layer.Layer):
 
   Example subclass implementation:
 
-  ```
+  ```python
   class BinaryTruePositives(tf.keras.metrics.Metric):
 
     def __init__(self, name='binary_true_positives', **kwargs):
@@ -389,11 +388,10 @@ class Sum(Reduce):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.Sum()
-  m.update_state([1, 3, 5, 7])
-  print('Final result: ', m.result().numpy())  # Final result: 16.0
-  ```
+  >>> m = tf.keras.metrics.Sum()
+  >>> _ = m.update_state([1, 3, 5, 7])
+  >>> m.result().numpy()
+  16.0
 
   Usage with tf.keras API:
 
@@ -465,8 +463,8 @@ class MeanRelativeError(Mean):
   """Computes the mean relative error by normalizing with the given values.
 
   This metric creates two local variables, `total` and `count` that are used to
-  compute the mean relative absolute error. This average is weighted by
-  `sample_weight`, and it is ultimately returned as `mean_relative_error`:
+  compute the mean relative error. This is weighted by `sample_weight`, and
+  it is ultimately returned as `mean_relative_error`:
   an idempotent operation that simply divides `total` by `count`.
 
   If `sample_weight` is `None`, weights default to 1.
@@ -474,15 +472,14 @@ class MeanRelativeError(Mean):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.MeanRelativeError(normalizer=[1, 3, 2, 3])
-  m.update_state([1, 3, 2, 3], [2, 4, 6, 8])
+  >>> m = tf.keras.metrics.MeanRelativeError(normalizer=[1, 3, 2, 3])
+  >>> _ = m.update_state([1, 3, 2, 3], [2, 4, 6, 8])
 
-  # metric = mean(|y_pred - y_true| / normalizer)
-  #        = mean([1, 1, 4, 5] / [1, 3, 2, 3]) = mean([1, 1/3, 2, 5/3])
-  #        = 5/4 = 1.25
-  print('Final result: ', m.result().numpy())  # Final result: 1.25
-  ```
+  >>> # metric = mean(|y_pred - y_true| / normalizer)
+  >>> #        = mean([1, 1, 4, 5] / [1, 3, 2, 3]) = mean([1, 1/3, 2, 5/3])
+  >>> #        = 5/4 = 1.25
+  >>> m.result().numpy()
+  1.25
 
   Usage with tf.keras API:
 
@@ -567,11 +564,17 @@ class MeanMetricWrapper(Mean):
     `y_true` and `y_pred` should have the same shape.
 
     Args:
-      y_true: The ground truth values.
-      y_pred: The predicted values.
-      sample_weight: Optional weighting of each example. Defaults to 1. Can be
-        a `Tensor` whose rank is either 0, or the same rank as `y_true`,
-        and must be broadcastable to `y_true`.
+      y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
+      y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
+      sample_weight: Optional `sample_weight` acts as a
+        coefficient for the metric. If a scalar is provided, then the metric is
+        simply scaled by the given value. If `sample_weight` is a tensor of size
+        `[batch_size]`, then the metric for each sample of the batch is rescaled
+        by the corresponding element in the `sample_weight` vector. If the shape
+        of `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be broadcasted
+        to this shape), then each metric element of `y_pred` is scaled by the
+        corresponding value of `sample_weight`. (Note on `dN-1`: all metric
+        functions reduce by 1 dimension, usually the last axis (-1)).
 
     Returns:
       Update op.
@@ -598,11 +601,7 @@ class MeanMetricWrapper(Mean):
 
 @keras_export('keras.metrics.Accuracy')
 class Accuracy(MeanMetricWrapper):
-  """Calculates how often predictions matches labels.
-
-  For example, if `y_true` is [1, 2, 3, 4] and `y_pred` is [0, 2, 3, 4]
-  then the accuracy is 3/4 or .75.  If the weights were specified as
-  [1, 1, 0, 0] then the accuracy would be 1/2 or .5.
+  """Calculates how often predictions equals labels.
 
   This metric creates two local variables, `total` and `count` that are used to
   compute the frequency with which `y_pred` matches `y_true`. This frequency is
@@ -615,11 +614,13 @@ class Accuracy(MeanMetricWrapper):
   Usage:
 
   >>> m = tf.keras.metrics.Accuracy()
-  >>> _ = m.update_state([1, 2, 3, 4], [0, 2, 3, 4])
+  >>> _ = m.update_state([[1], [2], [3], [4]], [[0], [2], [3], [4]])
   >>> m.result().numpy()
   0.75
+
   >>> m.reset_states()
-  >>> _ = m.update_state([1, 2, 3, 4], [0, 2, 3, 4], sample_weight=[1, 1, 0, 0])
+  >>> _ = m.update_state([[1], [2], [3], [4]], [[0], [2], [3], [4]],
+  ...                    sample_weight=[1, 1, 0, 0])
   >>> m.result().numpy()
   0.5
 
@@ -637,11 +638,7 @@ class Accuracy(MeanMetricWrapper):
 
 @keras_export('keras.metrics.BinaryAccuracy')
 class BinaryAccuracy(MeanMetricWrapper):
-  """Calculates how often predictions matches labels.
-
-  For example, if `y_true` is [1, 1, 0, 0] and `y_pred` is [0.98, 1, 0, 0.6]
-  then the binary accuracy is 3/4 or .75.  If the weights were specified as
-  [1, 0, 0, 1] then the binary accuracy would be 1/2 or .5.
+  """Calculates how often predictions matches binary labels.
 
   This metric creates two local variables, `total` and `count` that are used to
   compute the frequency with which `y_pred` matches `y_true`. This frequency is
@@ -653,11 +650,16 @@ class BinaryAccuracy(MeanMetricWrapper):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.BinaryAccuracy()
-  m.update_state([1, 1, 0, 0], [0.98, 1, 0, 0.6])
-  print('Final result: ', m.result().numpy())  # Final result: 0.75
-  ```
+  >>> m = tf.keras.metrics.BinaryAccuracy()
+  >>> _ = m.update_state([[1], [1], [0], [0]], [[0.98], [1], [0], [0.6]])
+  >>> m.result().numpy()
+  0.75
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([[1], [1], [0], [0]], [[0.98], [1], [0], [0.6]],
+  ...                    sample_weight=[1, 0, 0, 1])
+  >>> m.result().numpy()
+  0.5
 
   Usage with tf.keras API:
 
@@ -682,12 +684,9 @@ class BinaryAccuracy(MeanMetricWrapper):
 
 @keras_export('keras.metrics.CategoricalAccuracy')
 class CategoricalAccuracy(MeanMetricWrapper):
-  """Calculates how often predictions matches labels.
+  """Calculates how often predictions matches one-hot labels.
 
-  For example, if `y_true` is [[0, 0, 1], [0, 1, 0]] and `y_pred` is
-  [[0.1, 0.9, 0.8], [0.05, 0.95, 0]] then the categorical accuracy is 1/2 or .5.
-  If the weights were specified as [0.7, 0.3] then the categorical accuracy
-  would be .3. You can provide logits of classes as `y_pred`, since argmax of
+  You can provide logits of classes as `y_pred`, since argmax of
   logits and probabilities are same.
 
   This metric creates two local variables, `total` and `count` that are used to
@@ -709,6 +708,13 @@ class CategoricalAccuracy(MeanMetricWrapper):
   >>> m.result().numpy()
   0.5
 
+  >>> m.reset_states()
+  >>> _ = m.update_state([[0, 0, 1], [0, 1, 0]], [[0.1, 0.9, 0.8],
+  ...                     [0.05, 0.95, 0]],
+  ...                    sample_weight=[0.7, 0.3])
+  >>> m.result().numpy()
+  0.3
+
   Usage with tf.keras API:
 
   ```python
@@ -735,10 +741,7 @@ class CategoricalAccuracy(MeanMetricWrapper):
 class SparseCategoricalAccuracy(MeanMetricWrapper):
   """Calculates how often predictions matches integer labels.
 
-  For example, if `y_true` is [[2], [1]] and `y_pred` is
-  [[0.1, 0.9, 0.8], [0.05, 0.95, 0]] then the categorical accuracy is 1/2 or .5.
-  If the weights were specified as [0.7, 0.3] then the categorical accuracy
-  would be .3. You can provide logits of classes as `y_pred`, since argmax of
+  You can provide logits of classes as `y_pred`, since argmax of
   logits and probabilities are same.
 
   This metric creates two local variables, `total` and `count` that are used to
@@ -756,6 +759,12 @@ class SparseCategoricalAccuracy(MeanMetricWrapper):
   >>> m.result().numpy()
   0.5
 
+  >>> m.reset_states()
+  >>> _ = m.update_state([[2], [1]], [[0.1, 0.9, 0.8], [0.05, 0.95, 0]],
+  ...                    sample_weight=[0.7, 0.3])
+  >>> m.result().numpy()
+  0.3
+
   Usage with tf.keras API:
 
   ```python
@@ -778,11 +787,18 @@ class TopKCategoricalAccuracy(MeanMetricWrapper):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.TopKCategoricalAccuracy()
-  m.update_state([[0, 0, 1], [0, 1, 0]], [[0.1, 0.9, 0.8], [0.05, 0.95, 0]])
-  print('Final result: ', m.result().numpy())  # Final result: 1.0
-  ```
+  >>> m = tf.keras.metrics.TopKCategoricalAccuracy(k=1)
+  >>> _ = m.update_state([[0, 0, 1], [0, 1, 0]],
+  ...                    [[0.1, 0.9, 0.8], [0.05, 0.95, 0]])
+  >>> m.result().numpy()
+  0.5
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([[0, 0, 1], [0, 1, 0]],
+  ...                    [[0.1, 0.9, 0.8], [0.05, 0.95, 0]],
+  ...                    sample_weight=[0.7, 0.3])
+  >>> m.result().numpy()
+  0.3
 
   Usage with tf.keras API:
 
@@ -811,11 +827,16 @@ class SparseTopKCategoricalAccuracy(MeanMetricWrapper):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.SparseTopKCategoricalAccuracy()
-  m.update_state([2, 1], [[0.1, 0.9, 0.8], [0.05, 0.95, 0]])
-  print('Final result: ', m.result().numpy())  # Final result: 1.0
-  ```
+  >>> m = tf.keras.metrics.SparseTopKCategoricalAccuracy(k=1)
+  >>> _ = m.update_state([2, 1], [[0.1, 0.9, 0.8], [0.05, 0.95, 0]])
+  >>> m.result().numpy()
+  0.5
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([2, 1], [[0.1, 0.9, 0.8], [0.05, 0.95, 0]],
+  ...                    sample_weight=[0.7, 0.3])
+  >>> m.result().numpy()
+  0.3
 
   Usage with tf.keras API:
 
@@ -912,10 +933,6 @@ class _ConfusionMatrixConditionCount(Metric):
 class FalsePositives(_ConfusionMatrixConditionCount):
   """Calculates the number of false positives.
 
-  For example, if `y_true` is [0, 1, 0, 0] and `y_pred` is [0, 0, 1, 1]
-  then the false positives value is 2.  If the weights were specified as
-  [0, 0, 1, 0] then the false positives value would be 1.
-
   If `sample_weight` is given, calculates the sum of the weights of
   false positives. This metric creates one local variable, `accumulator`
   that is used to keep track of the number of false positives.
@@ -925,11 +942,15 @@ class FalsePositives(_ConfusionMatrixConditionCount):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.FalsePositives()
-  m.update_state([0, 1, 0, 0], [0, 0, 1, 1])
-  print('Final result: ', m.result().numpy())  # Final result: 2
-  ```
+  >>> m = tf.keras.metrics.FalsePositives()
+  >>> _ = m.update_state([0, 1, 0, 0], [0, 0, 1, 1])
+  >>> m.result().numpy()
+  2.0
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([0, 1, 0, 0], [0, 0, 1, 1], sample_weight=[0, 0, 1, 0])
+  >>> m.result().numpy()
+  1.0
 
   Usage with tf.keras API:
 
@@ -962,10 +983,6 @@ class FalsePositives(_ConfusionMatrixConditionCount):
 class FalseNegatives(_ConfusionMatrixConditionCount):
   """Calculates the number of false negatives.
 
-  For example, if `y_true` is [0, 1, 1, 1] and `y_pred` is [0, 1, 0, 0]
-  then the false negatives value is 2.  If the weights were specified as
-  [0, 0, 1, 0] then the false negatives value would be 1.
-
   If `sample_weight` is given, calculates the sum of the weights of
   false negatives. This metric creates one local variable, `accumulator`
   that is used to keep track of the number of false negatives.
@@ -975,11 +992,15 @@ class FalseNegatives(_ConfusionMatrixConditionCount):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.FalseNegatives()
-  m.update_state([0, 1, 1, 1], [0, 1, 0, 0])
-  print('Final result: ', m.result().numpy())  # Final result: 2
-  ```
+  >>> m = tf.keras.metrics.FalseNegatives()
+  >>> _ = m.update_state([0, 1, 1, 1], [0, 1, 0, 0])
+  >>> m.result().numpy()
+  2.0
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([0, 1, 1, 1], [0, 1, 0, 0], sample_weight=[0, 0, 1, 0])
+  >>> m.result().numpy()
+  1.0
 
   Usage with tf.keras API:
 
@@ -1012,10 +1033,6 @@ class FalseNegatives(_ConfusionMatrixConditionCount):
 class TrueNegatives(_ConfusionMatrixConditionCount):
   """Calculates the number of true negatives.
 
-  For example, if `y_true` is [0, 1, 0, 0] and `y_pred` is [1, 1, 0, 0]
-  then the true negatives value is 2.  If the weights were specified as
-  [0, 0, 1, 0] then the true negatives value would be 1.
-
   If `sample_weight` is given, calculates the sum of the weights of
   true negatives. This metric creates one local variable, `accumulator`
   that is used to keep track of the number of true negatives.
@@ -1025,11 +1042,15 @@ class TrueNegatives(_ConfusionMatrixConditionCount):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.TrueNegatives()
-  m.update_state([0, 1, 0, 0], [1, 1, 0, 0])
-  print('Final result: ', m.result().numpy())  # Final result: 2
-  ```
+  >>> m = tf.keras.metrics.TrueNegatives()
+  >>> _ = m.update_state([0, 1, 0, 0], [1, 1, 0, 0])
+  >>> m.result().numpy()
+  2.0
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([0, 1, 0, 0], [1, 1, 0, 0], sample_weight=[0, 0, 1, 0])
+  >>> m.result().numpy()
+  1.0
 
   Usage with tf.keras API:
 
@@ -1062,10 +1083,6 @@ class TrueNegatives(_ConfusionMatrixConditionCount):
 class TruePositives(_ConfusionMatrixConditionCount):
   """Calculates the number of true positives.
 
-  For example, if `y_true` is [0, 1, 1, 1] and `y_pred` is [1, 0, 1, 1]
-  then the true positives value is 2.  If the weights were specified as
-  [0, 0, 1, 0] then the true positives value would be 1.
-
   If `sample_weight` is given, calculates the sum of the weights of
   true positives. This metric creates one local variable, `true_positives`
   that is used to keep track of the number of true positives.
@@ -1075,11 +1092,15 @@ class TruePositives(_ConfusionMatrixConditionCount):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.TruePositives()
-  m.update_state([0, 1, 1, 1], [1, 0, 1, 1])
-  print('Final result: ', m.result().numpy())  # Final result: 2
-  ```
+  >>> m = tf.keras.metrics.TruePositives()
+  >>> _ = m.update_state([0, 1, 1, 1], [1, 0, 1, 1])
+  >>> m.result().numpy()
+  2.0
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([0, 1, 1, 1], [1, 0, 1, 1], sample_weight=[0, 0, 1, 0])
+  >>> m.result().numpy()
+  1.0
 
   Usage with tf.keras API:
 
@@ -1112,10 +1133,6 @@ class TruePositives(_ConfusionMatrixConditionCount):
 class Precision(Metric):
   """Computes the precision of the predictions with respect to the labels.
 
-  For example, if `y_true` is [0, 1, 1, 1] and `y_pred` is [1, 0, 1, 1]
-  then the precision value is 2/(2+1) ie. 0.66. If the weights were specified as
-  [0, 0, 1, 0] then the precision value would be 1.
-
   The metric creates two local variables, `true_positives` and `false_positives`
   that are used to compute the precision. This value is ultimately returned as
   `precision`, an idempotent operation that simply divides `true_positives`
@@ -1135,11 +1152,15 @@ class Precision(Metric):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.Precision()
-  m.update_state([0, 1, 1, 1], [1, 0, 1, 1])
-  print('Final result: ', m.result().numpy())  # Final result: 0.66
-  ```
+  >>> m = tf.keras.metrics.Precision()
+  >>> _ = m.update_state([0, 1, 1, 1], [1, 0, 1, 1])
+  >>> m.result().numpy()
+  0.6666667
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([0, 1, 1, 1], [1, 0, 1, 1], sample_weight=[0, 0, 1, 0])
+  >>> m.result().numpy()
+  1.0
 
   Usage with tf.keras API:
 
@@ -1239,10 +1260,6 @@ class Precision(Metric):
 class Recall(Metric):
   """Computes the recall of the predictions with respect to the labels.
 
-  For example, if `y_true` is [0, 1, 1, 1] and `y_pred` is [1, 0, 1, 1]
-  then the recall value is 2/(2+1) ie. 0.66. If the weights were specified as
-  [0, 0, 1, 0] then the recall value would be 1.
-
   This metric creates two local variables, `true_positives` and
   `false_negatives`, that are used to compute the recall. This value is
   ultimately returned as `recall`, an idempotent operation that simply divides
@@ -1261,11 +1278,15 @@ class Recall(Metric):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.Recall()
-  m.update_state([0, 1, 1, 1], [1, 0, 1, 1])
-  print('Final result: ', m.result().numpy())  # Final result: 0.66
-  ```
+  >>> m = tf.keras.metrics.Recall()
+  >>> _ = m.update_state([0, 1, 1, 1], [1, 0, 1, 1])
+  >>> m.result().numpy()
+  0.6666667
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([0, 1, 1, 1], [1, 0, 1, 1], sample_weight=[0, 0, 1, 0])
+  >>> m.result().numpy()
+  1.0
 
   Usage with tf.keras API:
 
@@ -1452,11 +1473,16 @@ class SensitivityAtSpecificity(SensitivitySpecificityBase):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.SensitivityAtSpecificity(0.4, num_thresholds=1)
-  m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9])
-  print('Final result: ', m.result().numpy())  # Final result: 0.5
-  ```
+  >>> m = tf.keras.metrics.SensitivityAtSpecificity(0.4, num_thresholds=1)
+  >>> _ = m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9])
+  >>> m.result().numpy()
+  0.5
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9],
+  ...                    sample_weight=[1, 0, 0, 1])
+  >>> m.result().numpy()
+  1.0
 
   Usage with tf.keras API:
 
@@ -1533,11 +1559,16 @@ class SpecificityAtSensitivity(SensitivitySpecificityBase):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.SpecificityAtSensitivity(0.8, num_thresholds=1)
-  m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9])
-  print('Final result: ', m.result().numpy())  # Final result: 1.0
-  ```
+  >>> m = tf.keras.metrics.SpecificityAtSensitivity(0.8, num_thresholds=1)
+  >>> _ = m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9])
+  >>> m.result().numpy()
+  1.0
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9],
+  ...                    sample_weight=[1, 0, 0, 1])
+  >>> m.result().numpy()
+  1.0
 
   Usage with tf.keras API:
 
@@ -1606,11 +1637,16 @@ class PrecisionAtRecall(SensitivitySpecificityBase):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.PrecisionAtRecall(0.8, num_thresholds=1)
-  m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9])
-  print('Final result: ', m.result().numpy())  # Final result: 1.0
-  ```
+  >>> m = tf.keras.metrics.PrecisionAtRecall(0.8, num_thresholds=1)
+  >>> _ = m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9])
+  >>> m.result().numpy()
+  1.0
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9],
+  ...                    sample_weight=[1, 0, 0, 1])
+  >>> m.result().numpy()
+  1.0
 
   Usage with tf.keras API:
 
@@ -1696,17 +1732,20 @@ class AUC(Metric):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.AUC(num_thresholds=3)
-  m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9])
+  >>> m = tf.keras.metrics.AUC(num_thresholds=3)
+  >>> _ = m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9])
+  >>> # threshold values are [0 - 1e-7, 0.5, 1 + 1e-7]
+  >>> # tp = [2, 1, 0], fp = [2, 0, 0], fn = [0, 1, 2], tn = [0, 2, 2]
+  >>> # recall = [1, 0.5, 0], fp_rate = [1, 0, 0]
+  >>> # auc = ((((1+0.5)/2)*(1-0))+ (((0.5+0)/2)*(0-0))) = 0.75
+  >>> m.result().numpy()
+  0.75
 
-  # threshold values are [0 - 1e-7, 0.5, 1 + 1e-7]
-  # tp = [2, 1, 0], fp = [2, 0, 0], fn = [0, 1, 2], tn = [0, 2, 2]
-  # recall = [1, 0.5, 0], fp_rate = [1, 0, 0]
-  # auc = ((((1+0.5)/2)*(1-0))+ (((0.5+0)/2)*(0-0))) = 0.75
-
-  print('Final result: ', m.result().numpy())  # Final result: 0.75
-  ```
+  >>> m.reset_states()
+  >>> _ = m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9],
+  ...                    sample_weight=[1, 0, 0, 1])
+  >>> m.result().numpy()
+  1.0
 
   Usage with tf.keras API:
 
@@ -2079,24 +2118,26 @@ class CosineSimilarity(MeanMetricWrapper):
   cosine similarity = (a . b) / ||a|| ||b||
   [Cosine Similarity](https://en.wikipedia.org/wiki/Cosine_similarity)
 
-  For example, if `y_true` is [0, 1, 1], and `y_pred` is [1, 0, 1], the cosine
-  similarity is 0.5.
-
   This metric keeps the average cosine similarity between `predictions` and
   `labels` over a stream of data.
 
   Usage:
-  ```python
-  m = tf.keras.metrics.CosineSimilarity(axis=1)
-  m.update_state([[0., 1.], [1., 1.]], [[1., 0.], [1., 1.]])
-  # l2_norm(y_true) = [[0., 1.], [1./1.414], 1./1.414]]]
-  # l2_norm(y_pred) = [[1., 0.], [1./1.414], 1./1.414]]]
-  # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]]
-  # result = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1))
-         = ((0. + 0.) +  (0.5 + 0.5)) / 2
 
-  print('Final result: ', m.result().numpy())  # Final result: 0.5
-  ```
+  >>> # l2_norm(y_true) = [[0., 1.], [1./1.414], 1./1.414]]]
+  >>> # l2_norm(y_pred) = [[1., 0.], [1./1.414], 1./1.414]]]
+  >>> # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]]
+  >>> # result = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1))
+  >>> #        = ((0. + 0.) +  (0.5 + 0.5)) / 2
+  >>> m = tf.keras.metrics.CosineSimilarity(axis=1)
+  >>> _ = m.update_state([[0., 1.], [1., 1.]], [[1., 0.], [1., 1.]])
+  >>> m.result().numpy()
+  0.49999997
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([[0., 1.], [1., 1.]], [[1., 0.], [1., 1.]],
+  ...                    sample_weight=[0.3, 0.7])
+  >>> m.result().numpy()
+  0.6999999
 
   Usage with tf.keras API:
 
@@ -2126,21 +2167,25 @@ class CosineSimilarity(MeanMetricWrapper):
 class MeanAbsoluteError(MeanMetricWrapper):
   """Computes the mean absolute error between the labels and predictions.
 
-  For example, if `y_true` is [0., 0., 1., 1.], and `y_pred` is [1., 1., 1., 0.]
-  the mean absolute error is 3/4 (0.75).
-
   Usage:
 
-  >>> m = MeanAbsoluteError()
-  >>> _ = m.update_state([0., 0., 1., 1.], [1., 1., 1., 0.])
+  >>> m = tf.keras.metrics.MeanAbsoluteError()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]])
   >>> m.result().numpy()
-  0.75
+  0.25
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]],
+  ...                    sample_weight=[1, 0])
+  >>> m.result().numpy()
+  0.5
 
   Usage with tf.keras API:
 
   ```python
   model = tf.keras.Model(inputs, outputs)
-  model.compile('sgd', metrics=[tf.keras.metrics.MeanAbsoluteError()])
+  model.compile(
+      'sgd', loss='mse', metrics=[tf.keras.metrics.MeanAbsoluteError()])
   ```
   """
 
@@ -2153,13 +2198,16 @@ class MeanAbsoluteError(MeanMetricWrapper):
 class MeanAbsolutePercentageError(MeanMetricWrapper):
   """Computes the mean absolute percentage error between `y_true` and `y_pred`.
 
-  For example, if `y_true` is [0., 0., 1., 1.], and `y_pred` is [1., 1., 1., 0.]
-  the mean absolute percentage error is 5e+08.
-
   Usage:
 
   >>> m = tf.keras.metrics.MeanAbsolutePercentageError()
-  >>> _ = m.update_state([0., 0., 1., 1.], [1., 1., 1., 0.])
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]])
+  >>> m.result().numpy()
+  250000000.0
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]],
+  ...                    sample_weight=[1, 0])
   >>> m.result().numpy()
   500000000.0
 
@@ -2167,7 +2215,10 @@ class MeanAbsolutePercentageError(MeanMetricWrapper):
 
   ```python
   model = tf.keras.Model(inputs, outputs)
-  model.compile('sgd', metrics=[tf.keras.metrics.MeanAbsolutePercentageError()])
+  model.compile(
+      'sgd',
+      loss='mse',
+      metrics=[tf.keras.metrics.MeanAbsolutePercentageError()])
   ```
   """
 
@@ -2180,22 +2231,25 @@ class MeanAbsolutePercentageError(MeanMetricWrapper):
 class MeanSquaredError(MeanMetricWrapper):
   """Computes the mean squared error between `y_true` and `y_pred`.
 
-  For example, if `y_true` is [0., 0., 1., 1.], and `y_pred` is [1., 1., 1., 0.]
-  the mean squared error is 3/4 (0.75).
-
   Usage:
 
-  ```python
-  m = tf.keras.metrics.MeanSquaredError()
-  m.update_state([0., 0., 1., 1.], [1., 1., 1., 0.])
-  print('Final result: ', m.result().numpy())  # Final result: 0.75
-  ```
+  >>> m = tf.keras.metrics.MeanSquaredError()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]])
+  >>> m.result().numpy()
+  0.25
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]],
+  ...                    sample_weight=[1, 0])
+  >>> m.result().numpy()
+  0.5
 
   Usage with tf.keras API:
 
   ```python
   model = tf.keras.Model(inputs, outputs)
-  model.compile('sgd', metrics=[tf.keras.metrics.MeanSquaredError()])
+  model.compile(
+      'sgd', loss='mse', metrics=[tf.keras.metrics.MeanSquaredError()])
   ```
   """
 
@@ -2208,22 +2262,27 @@ class MeanSquaredError(MeanMetricWrapper):
 class MeanSquaredLogarithmicError(MeanMetricWrapper):
   """Computes the mean squared logarithmic error between `y_true` and `y_pred`.
 
-  For example, if `y_true` is [0., 0., 1., 1.], and `y_pred` is [1., 1., 1., 0.]
-  the mean squared logarithmic error is 0.36034.
-
   Usage:
 
-  ```python
-  m = tf.keras.metrics.MeanSquaredLogarithmicError()
-  m.update_state([0., 0., 1., 1.], [1., 1., 1., 0.])
-  print('Final result: ', m.result().numpy())  # Final result: 0.36034
-  ```
+  >>> m = tf.keras.metrics.MeanSquaredLogarithmicError()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]])
+  >>> m.result().numpy()
+  0.12011322
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]],
+  ...                    sample_weight=[1, 0])
+  >>> m.result().numpy()
+  0.24022643
 
   Usage with tf.keras API:
 
   ```python
   model = tf.keras.Model(inputs, outputs)
-  model.compile('sgd', metrics=[tf.keras.metrics.MeanSquaredLogarithmicError()])
+  model.compile(
+      'sgd',
+      loss='mse',
+      metrics=[tf.keras.metrics.MeanSquaredLogarithmicError()])
   ```
   """
 
@@ -2239,25 +2298,24 @@ class Hinge(MeanMetricWrapper):
   `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are
   provided we will convert them to -1 or 1.
 
-  For example, if `y_true` is [-1., 1., 1.], and `y_pred` is [0.6, -0.7, -0.5]
-  the hinge metric value is 1.6.
-
   Usage:
 
-  ```python
-  m = tf.keras.metrics.Hinge()
-  m.update_state([-1., 1., 1.], [0.6, -0.7, -0.5])
+  >>> m = tf.keras.metrics.Hinge()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]])
+  >>> m.result().numpy()
+  1.3
 
-  # result = max(0, 1-y_true * y_pred) = [1.6 + 1.7 + 1.5] / 3
-
-  print('Final result: ', m.result().numpy())  # Final result: 1.6
-  ```
+  >>> m.reset_states()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]],
+  ...                    sample_weight=[1, 0])
+  >>> m.result().numpy()
+  1.1
 
   Usage with tf.keras API:
 
   ```python
   model = tf.keras.Model(inputs, outputs)
-  model.compile('sgd', metrics=[tf.keras.metrics.Hinge()])
+  model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.Hinge()])
   ```
   """
 
@@ -2272,25 +2330,27 @@ class SquaredHinge(MeanMetricWrapper):
   `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are
   provided we will convert them to -1 or 1.
 
-  For example, if `y_true` is [-1., 1., 1.], and `y_pred` is [0.6, -0.7, -0.5]
-  the squared hinge metric value is 2.6.
-
   Usage:
 
-  ```python
-  m = tf.keras.metrics.SquaredHinge()
-  m.update_state([-1., 1., 1.], [0.6, -0.7, -0.5])
+  >>> m = tf.keras.metrics.SquaredHinge()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]])
+  >>> m.result().numpy()
+  1.86
 
-  # result = max(0, 1-y_true * y_pred) = [1.6^2 + 1.7^2 + 1.5^2] / 3
-
-  print('Final result: ', m.result().numpy())  # Final result: 2.6
-  ```
+  >>> m.reset_states()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]],
+  ...                    sample_weight=[1, 0])
+  >>> m.result().numpy()
+  1.46
 
   Usage with tf.keras API:
 
   ```python
   model = tf.keras.Model(inputs, outputs)
-  model.compile('sgd', metrics=[tf.keras.metrics.SquaredHinge()])
+  model.compile(
+      'sgd',
+      loss='mse',
+      metrics=[tf.keras.metrics.SquaredHinge()])
   ```
   """
 
@@ -2302,22 +2362,27 @@ class SquaredHinge(MeanMetricWrapper):
 class CategoricalHinge(MeanMetricWrapper):
   """Computes the categorical hinge metric between `y_true` and `y_pred`.
 
-  For example, if `y_true` is [0., 1., 1.], and `y_pred` is [1., 0., 1.]
-  the categorical hinge metric value is 1.0.
-
   Usage:
 
-  ```python
-  m = tf.keras.metrics.CategoricalHinge()
-  m.update_state([0., 1., 1.], [1., 0., 1.])
-  print('Final result: ', m.result().numpy())  # Final result: 1.0
-  ```
+  >>> m = tf.keras.metrics.CategoricalHinge()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]])
+  >>> m.result().numpy()
+  1.4000001
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]],
+  ...                    sample_weight=[1, 0])
+  >>> m.result().numpy()
+  1.2
 
   Usage with tf.keras API:
 
   ```python
   model = tf.keras.Model(inputs, outputs)
-  model.compile('sgd', metrics=[tf.keras.metrics.CategoricalHinge()])
+  model.compile(
+      'sgd',
+      loss='mse',
+      metrics=[tf.keras.metrics.CategoricalHinge()])
   ```
   """
 
@@ -2331,17 +2396,25 @@ class RootMeanSquaredError(Mean):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.RootMeanSquaredError()
-  m.update_state([2., 4., 6.], [1., 3., 2.])
-  print('Final result: ', m.result().numpy())  # Final result: 2.449
-  ```
+  >>> m = tf.keras.metrics.RootMeanSquaredError()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]])
+  >>> m.result().numpy()
+  0.5
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]],
+  ...                    sample_weight=[1, 0])
+  >>> m.result().numpy()
+  0.70710677
 
   Usage with tf.keras API:
 
   ```python
   model = tf.keras.Model(inputs, outputs)
-  model.compile('sgd', metrics=[tf.keras.metrics.RootMeanSquaredError()])
+  model.compile(
+      'sgd',
+      loss='mse',
+      metrics=[tf.keras.metrics.RootMeanSquaredError()])
   ```
   """
 
@@ -2381,17 +2454,22 @@ class LogCoshError(MeanMetricWrapper):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.LogCoshError()
-  m.update_state([0., 1., 1.], [1., 0., 1.])
-  print('Final result: ', m.result().numpy())  # Final result: 0.289
-  ```
+  >>> m = tf.keras.metrics.LogCoshError()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]])
+  >>> m.result().numpy()
+  0.10844523
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]],
+  ...                    sample_weight=[1, 0])
+  >>> m.result().numpy()
+  0.21689045
 
   Usage with tf.keras API:
 
   ```python
   model = tf.keras.Model(inputs, outputs)
-  model.compile('sgd', metrics=[tf.keras.metrics.LogCoshError()])
+  model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.LogCoshError()])
   ```
   """
 
@@ -2407,17 +2485,22 @@ class Poisson(MeanMetricWrapper):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.Poisson()
-  m.update_state([1, 9, 2], [4, 8, 12])
-  print('Final result: ', m.result().numpy())  # Final result: -4.63
-  ```
+  >>> m = tf.keras.metrics.Poisson()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]])
+  >>> m.result().numpy()
+  0.49999997
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]],
+  ...                    sample_weight=[1, 0])
+  >>> m.result().numpy()
+  0.99999994
 
   Usage with tf.keras API:
 
   ```python
   model = tf.keras.Model(inputs, outputs)
-  model.compile('sgd', metrics=[tf.keras.metrics.Poisson()])
+  model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.Poisson()])
   ```
   """
 
@@ -2433,17 +2516,22 @@ class KLDivergence(MeanMetricWrapper):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.KLDivergence()
-  m.update_state([.4, .9, .2], [.5, .8, .12])
-  print('Final result: ', m.result().numpy())  # Final result: -0.043
-  ```
+  >>> m = tf.keras.metrics.KLDivergence()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]])
+  >>> m.result().numpy()
+  0.45814306
+
+  >>> m.reset_states()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]],
+  ...                    sample_weight=[1, 0])
+  >>> m.result().numpy()
+  0.9162892
 
   Usage with tf.keras API:
 
   ```python
   model = tf.keras.Model(inputs, outputs)
-  model.compile('sgd', metrics=[tf.keras.metrics.KLDivergence()])
+  model.compile('sgd', loss='mse', metrics=[tf.keras.metrics.KLDivergence()])
   ```
   """
 
@@ -2468,17 +2556,21 @@ class MeanIoU(Metric):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.MeanIoU(num_classes=2)
-  m.update_state([0, 0, 1, 1], [0, 1, 0, 1])
+  >>> # cm = [[1, 1],
+  >>> #        [1, 1]]
+  >>> # sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1]
+  >>> # iou = true_positives / (sum_row + sum_col - true_positives))
+  >>> # result = (1 / (2 + 2 - 1) + 1 / (2 + 2 - 1)) / 2 = 0.33
+  >>> m = tf.keras.metrics.MeanIoU(num_classes=2)
+  >>> _ = m.update_state([0, 0, 1, 1], [0, 1, 0, 1])
+  >>> m.result().numpy()
+  0.33333334
 
-    # cm = [[1, 1],
-            [1, 1]]
-    # sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1]
-    # iou = true_positives / (sum_row + sum_col - true_positives))
-    # result = (1 / (2 + 2 - 1) + 1 / (2 + 2 - 1)) / 2 = 0.33
-  print('Final result: ', m.result().numpy())  # Final result: 0.33
-  ```
+  >>> m.reset_states()
+  >>> _ = m.update_state([0, 0, 1, 1], [0, 1, 0, 1],
+  ...                    sample_weight=[0.3, 0.3, 0.3, 0.1])
+  >>> m.result().numpy()
+  0.23809525
 
   Usage with tf.keras API:
 
@@ -2536,8 +2628,10 @@ class MeanIoU(Metric):
     if y_true.shape.ndims > 1:
       y_true = array_ops.reshape(y_true, [-1])
 
-    if sample_weight is not None and sample_weight.shape.ndims > 1:
-      sample_weight = array_ops.reshape(sample_weight, [-1])
+    if sample_weight is not None:
+      sample_weight = math_ops.cast(sample_weight, self._dtype)
+      if sample_weight.shape.ndims > 1:
+        sample_weight = array_ops.reshape(sample_weight, [-1])
 
     # Accumulate the prediction to current confusion matrix.
     current_cm = confusion_matrix.confusion_matrix(
@@ -2592,14 +2686,15 @@ class MeanTensor(Metric):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.MeanTensor()
-  m.update_state([0, 1, 2, 3])
-  m.update_state([4, 5, 6, 7])
-  print('Result: ', m.result().numpy())  # Result: [2, 3, 4, 5]
-  m.update_state([12, 10, 8, 6], sample_weights= [0, 0.2, 0.5, 1])
-  print('Result: ', m.result().numpy())  # Result: [2, 3.636, 4.8, 5.333]
-  ```
+  >>> m = tf.keras.metrics.MeanTensor()
+  >>> _ = m.update_state([0, 1, 2, 3])
+  >>> _ = m.update_state([4, 5, 6, 7])
+  >>> m.result().numpy()
+  array([2., 3., 4., 5.], dtype=float32)
+
+  >>> _ = m.update_state([12, 10, 8, 6], sample_weight= [0, 0.2, 0.5, 1])
+  >>> m.result().numpy()
+  array([2.       , 3.6363635, 4.8      , 5.3333335], dtype=float32)
   """
 
   def __init__(self, name='mean_tensor', dtype=None):
@@ -2701,22 +2796,16 @@ class BinaryCrossentropy(MeanMetricWrapper):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.BinaryCrossentropy()
-  m.update_state([1., 0., 1., 0.], [1., 1., 1., 0.])
+  >>> m = tf.keras.metrics.BinaryCrossentropy()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]])
+  >>> m.result().numpy()
+  0.81492424
 
-  # EPSILON = 1e-7, y = y_true, y` = y_pred, Y_MAX = 0.9999999
-  # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON)
-  # y` = [Y_MAX, Y_MAX, Y_MAX, EPSILON]
-
-  # Metric = -(y log(y` + EPSILON) + (1 - y) log(1 - y` + EPSILON))
-  #        = [-log(Y_MAX + EPSILON), -log(1 - Y_MAX + EPSILON),
-  #           -log(Y_MAX + EPSILON), -log(1)]
-  #        = [(0 + 15.33) / 2, (0 + 0) / 2]
-  # Reduced metric = 7.665 / 2
-
-  print('Final result: ', m.result().numpy())  # Final result: 3.833
-  ```
+  >>> m.reset_states()
+  >>> _ = m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]],
+  ...                    sample_weight=[1, 0])
+  >>> m.result().numpy()
+  0.9162905
 
   Usage with tf.keras API:
 
@@ -2766,22 +2855,25 @@ class CategoricalCrossentropy(MeanMetricWrapper):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.CategoricalCrossentropy()
-  m.update_state([[0, 1, 0], [0, 0, 1]],
-                 [[0.05, 0.95, 0], [0.1, 0.8, 0.1]])
+  >>> # EPSILON = 1e-7, y = y_true, y` = y_pred
+  >>> # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON)
+  >>> # y` = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]]
+  >>> # xent = -sum(y * log(y'), axis = -1)
+  >>> #      = -((log 0.95), (log 0.1))
+  >>> #      = [0.051, 2.302]
+  >>> # Reduced xent = (0.051 + 2.302) / 2
+  >>> m = tf.keras.metrics.CategoricalCrossentropy()
+  >>> _ = m.update_state([[0, 1, 0], [0, 0, 1]],
+  ...                    [[0.05, 0.95, 0], [0.1, 0.8, 0.1]])
+  >>> m.result().numpy()
+  1.1769392
 
-  # EPSILON = 1e-7, y = y_true, y` = y_pred
-  # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON)
-  # y` = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]]
-
-  # xent = -sum(y * log(y'), axis = -1)
-  #      = -((log 0.95), (log 0.1))
-  #      = [0.051, 2.302]
-  # Reduced xent = (0.051 + 2.302) / 2
-
-  print('Final result: ', m.result().numpy())  # Final result: 1.176
-  ```
+  >>> m.reset_states()
+  >>> _ = m.update_state([[0, 1, 0], [0, 0, 1]],
+  ...                    [[0.05, 0.95, 0], [0.1, 0.8, 0.1]],
+  ...                    sample_weight=tf.constant([0.3, 0.7]))
+  >>> m.result().numpy()
+  1.6271976
 
   Usage with tf.keras API:
 
@@ -2835,26 +2927,28 @@ class SparseCategoricalCrossentropy(MeanMetricWrapper):
 
   Usage:
 
-  ```python
-  m = tf.keras.metrics.SparseCategoricalCrossentropy()
-  m.update_state(
-    [1, 2],
-    [[0.05, 0.95, 0], [0.1, 0.8, 0.1]])
+  >>> # y_true = one_hot(y_true) = [[0, 1, 0], [0, 0, 1]]
+  >>> # logits = log(y_pred)
+  >>> # softmax = exp(logits) / sum(exp(logits), axis=-1)
+  >>> # softmax = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]]
+  >>> # xent = -sum(y * log(softmax), 1)
+  >>> # log(softmax) = [[-2.9957, -0.0513, -16.1181],
+  >>> #                [-2.3026, -0.2231, -2.3026]]
+  >>> # y_true * log(softmax) = [[0, -0.0513, 0], [0, 0, -2.3026]]
+  >>> # xent = [0.0513, 2.3026]
+  >>> # Reduced xent = (0.0513 + 2.3026) / 2
+  >>> m = tf.keras.metrics.SparseCategoricalCrossentropy()
+  >>> _ = m.update_state([1, 2],
+  ...                    [[0.05, 0.95, 0], [0.1, 0.8, 0.1]])
+  >>> m.result().numpy()
+  1.1769392
 
-  # y_true = one_hot(y_true) = [[0, 1, 0], [0, 0, 1]]
-  # logits = log(y_pred)
-  # softmax = exp(logits) / sum(exp(logits), axis=-1)
-  # softmax = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]]
-
-  # xent = -sum(y * log(softmax), 1)
-  # log(softmax) = [[-2.9957, -0.0513, -16.1181], [-2.3026, -0.2231, -2.3026]]
-  # y_true * log(softmax) = [[0, -0.0513, 0], [0, 0, -2.3026]]
-
-  # xent = [0.0513, 2.3026]
-  # Reduced xent = (0.0513 + 2.3026) / 2
-
-  print('Final result: ', m.result().numpy())  # Final result: 1.176
-  ```
+  >>> m.reset_states()
+  >>> _ = m.update_state([1, 2],
+  ...                    [[0.05, 0.95, 0], [0.1, 0.8, 0.1]],
+  ...                    sample_weight=tf.constant([0.3, 0.7]))
+  >>> m.result().numpy()
+  1.6271976
 
   Usage with tf.keras API:
 
@@ -2958,6 +3052,17 @@ def accuracy(y_true, y_pred):
 
 @keras_export('keras.metrics.binary_accuracy')
 def binary_accuracy(y_true, y_pred, threshold=0.5):
+  """Calculates how often predictions matches binary labels.
+
+  Args:
+    y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
+    y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
+    threshold: (Optional) Float representing the threshold for deciding whether
+      prediction values are 1 or 0.
+
+  Returns:
+    Binary accuracy values. shape = `[batch_size, d0, .. dN-1]`
+  """
   threshold = math_ops.cast(threshold, y_pred.dtype)
   y_pred = math_ops.cast(y_pred > threshold, y_pred.dtype)
   return K.mean(math_ops.equal(y_true, y_pred), axis=-1)
@@ -2965,6 +3070,18 @@ def binary_accuracy(y_true, y_pred, threshold=0.5):
 
 @keras_export('keras.metrics.categorical_accuracy')
 def categorical_accuracy(y_true, y_pred):
+  """Calculates how often predictions matches one-hot labels.
+
+  You can provide logits of classes as `y_pred`, since argmax of
+  logits and probabilities are same.
+
+  Args:
+    y_true: One-hot ground truth values.
+    y_pred: The prediction values.
+
+  Returns:
+    Categorical accuracy values.
+  """
   return math_ops.cast(
       math_ops.equal(
           math_ops.argmax(y_true, axis=-1), math_ops.argmax(y_pred, axis=-1)),
@@ -2973,6 +3090,18 @@ def categorical_accuracy(y_true, y_pred):
 
 @keras_export('keras.metrics.sparse_categorical_accuracy')
 def sparse_categorical_accuracy(y_true, y_pred):
+  """Calculates how often predictions matches integer labels.
+
+  You can provide logits of classes as `y_pred`, since argmax of
+  logits and probabilities are same.
+
+  Args:
+    y_true: Integer ground truth values.
+    y_pred: The prediction values.
+
+  Returns:
+    Sparse categorical accuracy values.
+  """
   y_pred_rank = ops.convert_to_tensor(y_pred).shape.ndims
   y_true_rank = ops.convert_to_tensor(y_true).shape.ndims
   # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,)
@@ -2991,6 +3120,17 @@ def sparse_categorical_accuracy(y_true, y_pred):
 
 @keras_export('keras.metrics.top_k_categorical_accuracy')
 def top_k_categorical_accuracy(y_true, y_pred, k=5):
+  """Computes how often targets are in the top `K` predictions.
+
+  Args:
+    y_true: The ground truth values.
+    y_pred: The prediction values.
+    k: (Optional) Number of top elements to look at for computing accuracy.
+      Defaults to 5.
+
+  Returns:
+    Top K categorical accuracy value.
+  """
   return math_ops.cast(
       nn.in_top_k(y_pred, math_ops.argmax(y_true, axis=-1), k), K.floatx())
 
@@ -3022,7 +3162,17 @@ def sparse_top_k_categorical_accuracy(y_true, y_pred, k=5):
 
 
 def cosine_proximity(y_true, y_pred, axis=-1):
-  """Computes the cosine similarity between labels and predictions."""
+  """Computes the cosine similarity between labels and predictions.
+
+  Args:
+    y_true: The ground truth values.
+    y_pred: The prediction values.
+    axis: (Optional) Defaults to -1. The dimension along which the cosine
+      similarity is computed.
+
+  Returns:
+    Cosine similarity value.
+  """
   y_true = nn.l2_normalize(y_true, axis=axis)
   y_pred = nn.l2_normalize(y_pred, axis=axis)
   return math_ops.reduce_sum(y_true * y_pred, axis=axis)
diff --git a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py
index 5957b3d8ce0..c6f39c42b42 100644
--- a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py
+++ b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable.py
@@ -185,46 +185,60 @@ class AutoCastVariable(variables.Variable):
     return self._variable.constraint
 
   def assign(self, value, use_locking=None, name=None, read_value=True):
-    return self._variable.assign(value, use_locking, name, read_value)
+    assign_op = self._variable.assign(value, use_locking, name, read_value)
+    return _maybe_wrap(assign_op, wrap=read_value)
 
   def assign_add(self, delta, use_locking=None, name=None, read_value=True):
-    return self._variable.assign_add(delta, use_locking, name, read_value)
+    assign_op = self._variable.assign_add(delta, use_locking, name, read_value)
+    return _maybe_wrap(assign_op, wrap=read_value)
 
   def assign_sub(self, delta, use_locking=None, name=None, read_value=True):
-    return self._variable.assign_sub(delta, use_locking, name, read_value)
+    assign_op = self._variable.assign_sub(delta, use_locking, name, read_value)
+    return _maybe_wrap(assign_op, wrap=read_value)
 
   def scatter_sub(self, sparse_delta, use_locking=False, name=None):
-    return self._variable.scatter_sub(sparse_delta, use_locking, name)
+    var = self._variable.scatter_sub(sparse_delta, use_locking, name)
+    return _maybe_wrap(var)
 
   def scatter_add(self, sparse_delta, use_locking=False, name=None):
-    return self._variable.scatter_add(sparse_delta, use_locking, name)
+    var = self._variable.scatter_add(sparse_delta, use_locking, name)
+    return _maybe_wrap(var)
 
   def scatter_max(self, sparse_delta, use_locking=False, name=None):
-    return self._variable.scatter_max(sparse_delta, use_locking, name)
+    var = self._variable.scatter_max(sparse_delta, use_locking, name)
+    return _maybe_wrap(var)
 
   def scatter_min(self, sparse_delta, use_locking=False, name=None):
-    return self._variable.scatter_min(sparse_delta, use_locking, name)
+    var = self._variable.scatter_min(sparse_delta, use_locking, name)
+    return _maybe_wrap(var)
 
   def scatter_mul(self, sparse_delta, use_locking=False, name=None):
-    return self._variable.scatter_mul(sparse_delta, use_locking, name)
+    var = self._variable.scatter_mul(sparse_delta, use_locking, name)
+    return _maybe_wrap(var)
 
   def scatter_div(self, sparse_delta, use_locking=False, name=None):
-    return self._variable.scatter_div(sparse_delta, use_locking, name)
+    var = self._variable.scatter_div(sparse_delta, use_locking, name)
+    return _maybe_wrap(var)
 
   def scatter_update(self, sparse_delta, use_locking=False, name=None):
-    return self._variable.scatter_update(sparse_delta, use_locking, name)
+    var = self._variable.scatter_update(sparse_delta, use_locking, name)
+    return _maybe_wrap(var)
 
   def batch_scatter_update(self, sparse_delta, use_locking=False, name=None):
-    return self._variable.batch_scatter_update(sparse_delta, use_locking, name)
+    var = self._variable.batch_scatter_update(sparse_delta, use_locking, name)
+    return _maybe_wrap(var)
 
   def scatter_nd_sub(self, indices, updates, name=None):
-    return self._variable.scatter_nd_sub(indices, updates, name)
+    var = self._variable.scatter_nd_sub(indices, updates, name)
+    return _maybe_wrap(var)
 
   def scatter_nd_add(self, indices, updates, name=None):
-    return self._variable.scatter_nd_add(indices, updates, name)
+    var = self._variable.scatter_nd_add(indices, updates, name)
+    return _maybe_wrap(var)
 
   def scatter_nd_update(self, indices, updates, name=None):
-    return self._variable.scatter_nd_update(indices, updates, name)
+    var = self._variable.scatter_nd_update(indices, updates, name)
+    return _maybe_wrap(var)
 
   def load(self, value, session=None):
     return self._variable.load(value, session)
@@ -410,3 +424,24 @@ def create_autocast_variable(variable):
       # pylint: enable=missing-format-attribute
 
   return AutoCastDistributedVariable(variable)
+
+
+def _maybe_wrap(variable, wrap=True):
+  """Creates an AutoCastVariable that wraps another variable if applicable.
+
+  This function is used to wrap the return value of AutoCastVariable.assign.
+  Unfortunately MirroredVariable.assign will (incorrectly) return a Mirrored
+  value instead of a MirroredVariable. So we cannot properly wrap it in an
+  AutoCastVariable. We return the original variable in that case.
+
+  Args:
+    variable: A tf.Variable or op.
+    wrap: A boolean to define whether to wrap the variable in an
+      AutoCastVariable or not.
+
+  Returns:
+    An AutoCastVariable if wrap is True and variable is a resource variable.
+  """
+  if wrap and resource_variable_ops.is_resource_variable(variable):
+    return create_autocast_variable(variable)
+  return variable
diff --git a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py
index ea4d262edab..350357421dc 100644
--- a/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py
+++ b/tensorflow/python/keras/mixed_precision/experimental/autocast_variable_test.py
@@ -157,25 +157,36 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
     # Test AutoCastVariable correctly delegates Variable methods to the
     # underlying variable.
     with get_distribute_scope(distribute):
-      evaluate = self.evaluate
       for read_dtype in (dtypes.float32, dtypes.float16):
+        if distribute:
+          # MirroredVariable.assign will (incorrectly) return a Mirrored value
+          # instead of a MirroredVariable. So we cannot properly wrap it in an
+          # AutoCastVariable.
+          evaluate = self.evaluate
+        else:
+
+          def evaluate(var):
+            self.assertIsInstance(var, autocast_variable.AutoCastVariable)
+            self.assertEqual(var.dtype, read_dtype)
+            return self.evaluate(var)
+
         x = get_var(7., dtypes.float32)
         x = autocast_variable.create_autocast_variable(x)
         with ops.get_default_graph()._enable_auto_casting_variables(
             read_dtype):
-          evaluate(x.initializer)
-          self.assertEqual(evaluate(x.value()), 7)
-          self.assertEqual(evaluate(x.read_value()), 7)
+          self.evaluate(x.initializer)
+          self.assertEqual(self.evaluate(x.value()), 7)
+          self.assertEqual(self.evaluate(x.read_value()), 7)
           self.assertTrue(x.trainable)
           self.assertEqual(x.synchronization, x._variable.synchronization)
           self.assertEqual(x.aggregation, x._variable.aggregation)
-          self.assertEqual(evaluate(x.initialized_value()), 7)
+          self.assertEqual(self.evaluate(x.initialized_value()), 7)
           if not context.executing_eagerly():
             if not distribute:
               # These functions are not supported for DistributedVariables
               x.load(9)
               self.assertEqual(x.eval(), 9)
-            self.assertEqual(evaluate(x.initial_value), 7)
+            self.assertEqual(self.evaluate(x.initial_value), 7)
             self.assertEqual(x.op, x._variable.op)
             self.assertEqual(x.graph, x._variable.graph)
           if not distribute:
@@ -197,8 +208,8 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
           x = autocast_variable.create_autocast_variable(x)
           with ops.get_default_graph()._enable_auto_casting_variables(
               read_dtype):
-            evaluate(x.initializer)
-            self.assertAllEqual(evaluate(x.value()), [7, 8])
+            self.evaluate(x.initializer)
+            self.assertAllEqual(self.evaluate(x.value()), [7, 8])
 
             def slices(val, index):
               return indexed_slices.IndexedSlices(
@@ -305,6 +316,26 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
         self.assertAllClose(3.14 * 2, self.evaluate(x.assign_add(3.14)))
         self.assertAllClose(3.14, self.evaluate(x.assign_sub(3.14)))
 
+        # Assign multiple times
+        assign = x.assign(1.)
+        self.assertAllClose(1., self.evaluate(assign))
+        self.assertAllClose(0., self.evaluate(assign.assign(0.)))
+        assign_add = x.assign_add(3.14)
+        self.assertAllClose(3.14, self.evaluate(assign_add))
+        self.assertAllClose(3.14 * 2,
+                            self.evaluate(assign_add.assign_add(3.14)))
+        assign_sub = x.assign_sub(3.14)
+        self.assertAllClose(3.14, self.evaluate(assign_sub))
+        self.assertAllClose(0., self.evaluate(assign_sub.assign_sub(3.14)))
+
+        # Assign with read_value=False
+        self.assertIsNone(self.evaluate(x.assign(1., read_value=False)))
+        self.assertAllClose(1., self.evaluate(x))
+        self.assertIsNone(self.evaluate(x.assign_add(2., read_value=False)))
+        self.assertAllClose(3., self.evaluate(x))
+        self.assertIsNone(self.evaluate(x.assign_sub(3., read_value=False)))
+        self.assertAllClose(0., self.evaluate(x))
+
         # Use the tf.assign functions instead of the var.assign methods.
         self.assertAllClose(0., self.evaluate(state_ops.assign(x, 0.)))
         self.assertAllClose(3.14, self.evaluate(state_ops.assign(x, 3.14)))
diff --git a/tensorflow/python/keras/mixed_precision/experimental/keras_test.py b/tensorflow/python/keras/mixed_precision/experimental/keras_test.py
index 9c34b46f9d0..6bb73cdfdef 100644
--- a/tensorflow/python/keras/mixed_precision/experimental/keras_test.py
+++ b/tensorflow/python/keras/mixed_precision/experimental/keras_test.py
@@ -841,8 +841,11 @@ class KerasModelTest(keras_parameterized.TestCase):
       x = layers.Input(shape=(1,))
       y = mp_test_util.AddLayer()(x)
       model = models.Model(x, y)
-      with self.assertRaisesRegexp(ValueError,
-                                   'optimizer" must be an instance of '):
+      if context.executing_eagerly():
+        error_msg = 'Use a `tf.keras` Optimizer instead'
+      else:
+        error_msg = 'optimizer" must be an instance of '
+      with self.assertRaisesRegexp(ValueError, error_msg):
         model.compile(optimizers.SGD(1.), 'mse')
 
   @test_util.run_in_graph_and_eager_modes
diff --git a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py
index 8d14ec1ceca..1dcf4a7f248 100644
--- a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py
+++ b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer.py
@@ -129,6 +129,8 @@ class LossScaleOptimizer(optimizer_v2.OptimizerV2):
 
     self._optimizer = optimizer
     self._loss_scale = keras_loss_scale_module.get(loss_scale)
+    if self._loss_scale is None:
+      raise ValueError('loss_scale cannot be None.')
     for weight in loss_scale_module.get_loss_scale_weights(self._loss_scale):
       # We cannot call `track_variable` in the LossScale class itself, because a
       # file outside of Keras cannot depend on a Keras file. Calling it here
diff --git a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer_test.py b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer_test.py
index 05035c50dab..58107e7a3a5 100644
--- a/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer_test.py
+++ b/tensorflow/python/keras/mixed_precision/experimental/loss_scale_optimizer_test.py
@@ -310,6 +310,11 @@ class LossScaleOptimizerTest(test.TestCase, parameterized.TestCase):
         'will be removed in the future.'):
       opt.add_slot(None, None)
 
+  def testPassingNoneToLossScale(self):
+    opt = gradient_descent.SGD()
+    with self.assertRaisesRegexp(ValueError, r'loss_scale cannot be None'):
+      loss_scale_optimizer.LossScaleOptimizer(opt, None)
+
   @parameterized.named_parameters(*TESTCASES)
   @test_util.run_in_graph_and_eager_modes
   def testGettingAndSettingLearningRate(self, strategy_fn):
diff --git a/tensorflow/python/keras/model_subclassing_compiled_test.py b/tensorflow/python/keras/model_subclassing_compiled_test.py
index bf27b3bf8a7..54a91bdcc57 100644
--- a/tensorflow/python/keras/model_subclassing_compiled_test.py
+++ b/tensorflow/python/keras/model_subclassing_compiled_test.py
@@ -44,8 +44,8 @@ class ModelSubclassCompiledTest(keras_parameterized.TestCase):
     num_samples = 100
     input_dim = 50
 
-    model = model_util.SimpleTestModel(
-        num_classes=num_classes, use_dp=True, use_bn=True)
+    model = testing_utils.SmallSubclassMLP(
+        num_hidden=32, num_classes=num_classes, use_dp=True, use_bn=True)
     model.compile(
         loss='mse',
         optimizer='rmsprop',
@@ -87,8 +87,8 @@ class ModelSubclassCompiledTest(keras_parameterized.TestCase):
     input_dim = 50
 
     with self.cached_session():
-      model = model_util.SimpleTestModel(
-          num_classes=num_classes, use_dp=True, use_bn=True)
+      model = testing_utils.SmallSubclassMLP(
+          num_hidden=32, num_classes=num_classes, use_dp=True, use_bn=True)
       model.compile(
           loss='mse',
           optimizer='rmsprop',
diff --git a/tensorflow/python/keras/model_subclassing_test.py b/tensorflow/python/keras/model_subclassing_test.py
index a4b8ac92b03..5202b44c3a8 100644
--- a/tensorflow/python/keras/model_subclassing_test.py
+++ b/tensorflow/python/keras/model_subclassing_test.py
@@ -135,8 +135,8 @@ class ModelSubclassingTest(keras_parameterized.TestCase):
     num_classes = 2
     input_dim = 50
 
-    model = model_util.SimpleTestModel(
-        num_classes=num_classes, use_dp=True, use_bn=True)
+    model = testing_utils.SmallSubclassMLP(
+        num_hidden=32, num_classes=num_classes, use_dp=True, use_bn=True)
 
     self.assertFalse(model.built, 'Model should not have been built')
     self.assertFalse(model.weights, ('Model should have no weights since it '
@@ -212,8 +212,8 @@ class ModelSubclassingTest(keras_parameterized.TestCase):
     input_dim = 50
     batch_size = None
 
-    model = model_util.SimpleTestModel(
-        num_classes=num_classes, use_dp=True, use_bn=True)
+    model = testing_utils.SmallSubclassMLP(
+        num_hidden=32, num_classes=num_classes, use_dp=True, use_bn=True)
 
     self.assertFalse(model.built, 'Model should not have been built')
     self.assertFalse(model.weights, ('Model should have no weights since it '
@@ -229,8 +229,8 @@ class ModelSubclassingTest(keras_parameterized.TestCase):
     input_dim = tensor_shape.Dimension(50)
     batch_size = tensor_shape.Dimension(None)
 
-    model = model_util.SimpleTestModel(
-        num_classes=num_classes, use_dp=True, use_bn=True)
+    model = testing_utils.SmallSubclassMLP(
+        num_hidden=32, num_classes=num_classes, use_dp=True, use_bn=True)
 
     self.assertFalse(model.built, 'Model should not have been built')
     self.assertFalse(model.weights, ('Model should have no weights since it '
@@ -338,7 +338,8 @@ class ModelSubclassingTest(keras_parameterized.TestCase):
         self.contents += msg + '\n'
 
     # Single-io
-    model = model_util.SimpleTestModel(num_classes=4, use_bn=True, use_dp=True)
+    model = testing_utils.SmallSubclassMLP(
+        num_hidden=32, num_classes=4, use_bn=True, use_dp=True)
     model._set_inputs(np.ones((3, 4)))  # need to build model first
     print_fn = ToString()
     model.summary(print_fn=print_fn)
@@ -481,7 +482,6 @@ class ModelSubclassingTest(keras_parameterized.TestCase):
       self.assertEqual(1, len(model.get_updates_for(x)))
 
 
-
 class GraphSpecificModelSubclassingTests(test.TestCase):
 
   @test_util.run_deprecated_v1
@@ -491,8 +491,8 @@ class GraphSpecificModelSubclassingTests(test.TestCase):
     input_dim = 50
 
     with self.cached_session():
-      model = model_util.SimpleTestModel(
-          num_classes=num_classes, use_dp=True, use_bn=True)
+      model = testing_utils.SmallSubclassMLP(
+          num_hidden=32, num_classes=num_classes, use_dp=True, use_bn=True)
       model.compile(loss='mse', optimizer='rmsprop')
 
       x = array_ops.ones((num_samples, input_dim))
diff --git a/tensorflow/python/keras/model_subclassing_test_util.py b/tensorflow/python/keras/model_subclassing_test_util.py
index cf627b984a1..5802d319e6b 100644
--- a/tensorflow/python/keras/model_subclassing_test_util.py
+++ b/tensorflow/python/keras/model_subclassing_test_util.py
@@ -23,30 +23,6 @@ from tensorflow.python.keras import testing_utils
 
 
 # pylint: disable=missing-docstring,not-callable
-class SimpleTestModel(keras.Model):
-
-  def __init__(self, use_bn=False, use_dp=False, num_classes=10):
-    super(SimpleTestModel, self).__init__(name='test_model')
-    self.use_bn = use_bn
-    self.use_dp = use_dp
-    self.num_classes = num_classes
-
-    self.dense1 = keras.layers.Dense(32, activation='relu')
-    self.dense2 = keras.layers.Dense(num_classes, activation='softmax')
-    if self.use_dp:
-      self.dp = keras.layers.Dropout(0.5)
-    if self.use_bn:
-      self.bn = keras.layers.BatchNormalization(axis=-1)
-
-  def call(self, x):
-    x = self.dense1(x)
-    if self.use_dp:
-      x = self.dp(x)
-    if self.use_bn:
-      x = self.bn(x)
-    return self.dense2(x)
-
-
 class SimpleConvTestModel(keras.Model):
 
   def __init__(self, num_classes=10):
@@ -92,9 +68,8 @@ class NestedTestModel1(keras.Model):
     self.dense1 = keras.layers.Dense(32, activation='relu')
     self.dense2 = keras.layers.Dense(num_classes, activation='relu')
     self.bn = keras.layers.BatchNormalization()
-    self.test_net = SimpleTestModel(num_classes=4,
-                                    use_bn=True,
-                                    use_dp=True)
+    self.test_net = testing_utils.SmallSubclassMLP(
+        num_hidden=32, num_classes=4, use_bn=True, use_dp=True)
 
   def call(self, inputs):
     x = self.dense1(inputs)
diff --git a/tensorflow/python/keras/models.py b/tensorflow/python/keras/models.py
index 81a3e5e0f11..74634e110e0 100644
--- a/tensorflow/python/keras/models.py
+++ b/tensorflow/python/keras/models.py
@@ -25,6 +25,7 @@ from tensorflow.python.keras import optimizers
 from tensorflow.python.keras.engine import network
 from tensorflow.python.keras.engine import sequential
 from tensorflow.python.keras.engine import training
+from tensorflow.python.keras.engine import training_v1
 from tensorflow.python.keras.engine.base_layer import AddMetric
 from tensorflow.python.keras.engine.base_layer import Layer
 from tensorflow.python.keras.engine.input_layer import Input
@@ -33,6 +34,7 @@ from tensorflow.python.keras.engine.network import Network
 from tensorflow.python.keras.saving import model_config
 from tensorflow.python.keras.saving import save
 from tensorflow.python.keras.utils import generic_utils
+from tensorflow.python.keras.utils import version_utils
 from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import nest
@@ -447,6 +449,8 @@ def _in_place_subclassed_model_reset(model):
     ValueError: In case the model uses a subclassed model as inner layer.
   """
   assert not model._is_graph_network  # Only makes sense for subclassed networks
+  # Select correct base class for new Model.
+  version_utils.swap_class(model.__class__, training.Model, training_v1.Model)
   # Retrieve all layers tracked by the model as well as their attribute names
   attributes_cache = {}
   for name in dir(model):
diff --git a/tensorflow/python/keras/models_test.py b/tensorflow/python/keras/models_test.py
index d2f1a8a646f..81f419c02c7 100644
--- a/tensorflow/python/keras/models_test.py
+++ b/tensorflow/python/keras/models_test.py
@@ -386,12 +386,13 @@ class TestCloneAndBuildModel(keras_parameterized.TestCase):
         experimental_run_tf_function=testing_utils.should_run_tf_function())
     new_model.train_on_batch(inp, out)
 
-    # Create new tensors for inputs and targets
+    # Create new tensors for inputs.
     input_a = keras.Input(shape=(4,))
-    target_a = keras.Input(shape=(4,))
     new_model = models.clone_and_build_model(
-        model, input_tensors=input_a, target_tensors=[target_a],
-        compile_clone=False, in_place_reset=is_subclassed)
+        model,
+        input_tensors=input_a,
+        compile_clone=False,
+        in_place_reset=is_subclassed)
     with self.assertRaisesRegexp(RuntimeError, 'must compile'):
       new_model.evaluate(inp, out)
     with self.assertRaisesRegexp(RuntimeError, 'must compile'):
@@ -428,7 +429,7 @@ class TestCloneAndBuildModel(keras_parameterized.TestCase):
     new_model.train_on_batch(inp, out)
     new_model.evaluate(inp, out)
 
-    # Create new tensors for inputs and targets
+    # Create new tensors for inputs.
     input_a = keras.Input(shape=(4,), name='a')
     new_model = models.clone_and_build_model(
         model, input_tensors=input_a, compile_clone=True,
@@ -437,10 +438,12 @@ class TestCloneAndBuildModel(keras_parameterized.TestCase):
     new_model.train_on_batch(inp, out)
     new_model.evaluate(inp, out)
 
-    target_a = keras.Input(shape=(4,), name='b')
     new_model = models.clone_and_build_model(
-        model, input_tensors=input_a, target_tensors=[target_a],
-        compile_clone=True, in_place_reset=is_subclassed)
+        model,
+        input_tensors=input_a,
+        target_tensors=None,
+        compile_clone=True,
+        in_place_reset=is_subclassed)
     self._assert_same_compile_params(new_model)
     new_model.train_on_batch(inp, out)
     new_model.evaluate(inp, out)
diff --git a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
index 1e97ae469bb..1483019ad9f 100644
--- a/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
+++ b/tensorflow/python/keras/optimizer_v2/optimizer_v2.py
@@ -474,18 +474,17 @@ class OptimizerV2(trackable.Trackable):
       else:
         return update_op
 
+    eagerly_outside_functions = ops.executing_eagerly_outside_functions()
     update_ops = []
-    with backend.name_scope(name or self._name):
+    with ops.name_scope(name or self._name, skip_on_eager=True):
       for grad, var in grads_and_vars:
-        scope_name = ("update" if ops.executing_eagerly_outside_functions() else
-                      "update_" + var.op.name)
         # Colocate the update with variables to avoid unnecessary communication
         # delays. See b/136304694.
-        with backend.name_scope(
-            scope_name), distribution.extended.colocate_vars_with(var):
-          update_ops.extend(
-              distribution.extended.update(
-                  var, apply_grad_to_update_var, args=(grad,), group=False))
+        with distribution.extended.colocate_vars_with(var):
+          with ops.name_scope("update" if eagerly_outside_functions else
+                              "update_" + var.op.name, skip_on_eager=True):
+            update_ops.extend(distribution.extended.update(
+                var, apply_grad_to_update_var, args=(grad,), group=False))
 
       any_symbolic = any(isinstance(i, ops.Operation) or
                          tf_utils.is_symbolic_tensor(i) for i in update_ops)
@@ -753,11 +752,65 @@ class OptimizerV2(trackable.Trackable):
     return self._weights
 
   def get_weights(self):
+    """Returns the current weights of the optimizer.
+
+    The weights of an optimizer are its state (ie, variables).
+    This function returns the weight values associated with this
+    optimizer as a list of Numpy arrays. The first value is always the
+    iterations count of the optimizer, followed by the optimizer's state
+    variables in the order they were created. The returned list can in turn
+    be used to load state into similarly parameterized optimizers.
+
+    For example, the RMSprop optimizer for this simple model returns a list of
+    three values-- the iteration count, followed by the root-mean-square value
+    of the kernel and bias of the single Dense layer:
+
+    >>> opt = tf.keras.optimizers.RMSprop()
+    >>> m = tf.keras.models.Sequential([tf.keras.layers.Dense(10)])
+    >>> m.compile(opt, loss='mse')
+    >>> data = np.arange(100).reshape(5, 20)
+    >>> labels = np.zeros(5)
+    >>> print('Training'); results = m.fit(data, labels)
+    Training ...
+    >>> len(opt.get_weights())
+    3
+
+    Returns:
+        Weights values as a list of numpy arrays.
+    """
     params = self.weights
     return backend.batch_get_value(params)
 
   # TODO(tanzheny): Maybe share this logic with base_layer.
   def set_weights(self, weights):
+    """Sett the weights of the optimizer.
+
+    The weights of an optimizer are its state (ie, variables).
+    This function takes the weight values associated with this
+    optimizer as a list of Numpy arrays. The first value is always the
+    iterations count of the optimizer, followed by the optimizer's state
+    variables in the order they are created. The passed values are used to set
+    the new state of the optimizer.
+
+    For example, the RMSprop optimizer for this simple model takes a list of
+    three values-- the iteration count, followed by the root-mean-square value
+    of the kernel and bias of the single Dense layer:
+
+    >>> opt = tf.keras.optimizers.RMSprop()
+    >>> m = tf.keras.models.Sequential([tf.keras.layers.Dense(10)])
+    >>> m.compile(opt, loss='mse')
+    >>> data = np.arange(100).reshape(5, 20)
+    >>> labels = np.zeros(5)
+    >>> print('Training'); results = m.fit(data, labels)
+    Training ...
+    >>> new_weights = [np.array(10), np.ones([20, 10]), np.zeros([10])]
+    >>> opt.set_weights(new_weights)
+    >>> opt.iterations
+    <tf.Variable 'RMSprop/iter:0' shape=() dtype=int64, numpy=10>
+
+    Arguments:
+        weights: weight values as a list of numpy arrays.
+    """
     params = self.weights
     if len(params) != len(weights):
       raise ValueError(
diff --git a/tensorflow/python/keras/testing_utils.py b/tensorflow/python/keras/testing_utils.py
index aa4059cb50e..2c48434cb68 100644
--- a/tensorflow/python/keras/testing_utils.py
+++ b/tensorflow/python/keras/testing_utils.py
@@ -416,17 +416,28 @@ def get_small_functional_mlp(num_hidden, num_classes, input_dim):
   return keras.Model(inputs, outputs)
 
 
-class _SmallSubclassMLP(keras.Model):
+class SmallSubclassMLP(keras.Model):
   """A subclass model based small MLP."""
 
-  def __init__(self, num_hidden, num_classes):
-    super(_SmallSubclassMLP, self).__init__()
+  def __init__(self, num_hidden, num_classes, use_bn=False, use_dp=False):
+    super(SmallSubclassMLP, self).__init__(name='test_model')
+    self.use_bn = use_bn
+    self.use_dp = use_dp
+
     self.layer_a = keras.layers.Dense(num_hidden, activation='relu')
     activation = 'sigmoid' if num_classes == 1 else 'softmax'
     self.layer_b = keras.layers.Dense(num_classes, activation=activation)
+    if self.use_dp:
+      self.dp = keras.layers.Dropout(0.5)
+    if self.use_bn:
+      self.bn = keras.layers.BatchNormalization(axis=-1)
 
   def call(self, inputs, **kwargs):
     x = self.layer_a(inputs)
+    if self.use_dp:
+      x = self.dp(x)
+    if self.use_bn:
+      x = self.bn(x)
     return self.layer_b(x)
 
 
@@ -451,7 +462,7 @@ class _SmallSubclassMLPCustomBuild(keras.Model):
 
 
 def get_small_subclass_mlp(num_hidden, num_classes):
-  return _SmallSubclassMLP(num_hidden, num_classes)
+  return SmallSubclassMLP(num_hidden, num_classes)
 
 
 def get_small_subclass_mlp_with_custom_build(num_hidden, num_classes):
diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD
index 4d044bdd2e4..bbc0f34d2c7 100644
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
@@ -3015,6 +3015,7 @@ cuda_py_test(
 cuda_py_test(
     name = "rnn_test",
     size = "medium",
+    timeout = "long",
     srcs = ["rnn_test.py"],
     additional_deps = [
         "//third_party/py/numpy",
diff --git a/tensorflow/python/kernel_tests/proto/BUILD b/tensorflow/python/kernel_tests/proto/BUILD
index 75100a3ff38..389612c9827 100644
--- a/tensorflow/python/kernel_tests/proto/BUILD
+++ b/tensorflow/python/kernel_tests/proto/BUILD
@@ -10,7 +10,10 @@ package(
     licenses = ["notice"],  # Apache 2.0
 )
 
-exports_files(["LICENSE"])
+exports_files([
+    "LICENSE",
+    "test_example.proto",
+])
 
 tf_py_test(
     name = "decode_proto_op_test",
diff --git a/tensorflow/python/lib/io/tf_record.py b/tensorflow/python/lib/io/tf_record.py
index fed88004ee4..052aabf9288 100644
--- a/tensorflow/python/lib/io/tf_record.py
+++ b/tensorflow/python/lib/io/tf_record.py
@@ -192,8 +192,64 @@ def tf_record_iterator(path, options=None):
 class TFRecordWriter(object):
   """A class to write records to a TFRecords file.
 
+  [TFRecords tutorial](https://www.tensorflow.org/tutorials/load_data/tfrecord)
+
+  TFRecords is a binary format which is optimized for high throughput data
+  retrieval, generally in conjunction with `tf.data`. `TFRecordWriter` is used
+  to write serialized examples to a file for later consumption. The key steps
+  are:
+
+   Ahead of time:
+
+   - [Convert data into a serialized format](
+   https://www.tensorflow.org/tutorials/load_data/tfrecord#tfexample)
+   - [Write the serialized data to one or more files](
+   https://www.tensorflow.org/tutorials/load_data/tfrecord#tfrecord_files_in_python)
+
+   During training or evaluation:
+
+   - [Read serialized examples into memory](
+   https://www.tensorflow.org/tutorials/load_data/tfrecord#reading_a_tfrecord_file)
+   - [Parse (deserialize) examples](
+   https://www.tensorflow.org/tutorials/load_data/tfrecord#reading_a_tfrecord_file)
+
+  A minimal example is given below:
+
+  >>> import tempfile
+  >>> example_path = os.path.join(tempfile.gettempdir(), "example.tfrecords")
+  >>> np.random.seed(0)
+
+  >>> # Write the records to a file.
+  ... with tf.io.TFRecordWriter(example_path) as file_writer:
+  ...   for _ in range(4):
+  ...     x, y = np.random.random(), np.random.random()
+  ...
+  ...     record_bytes = tf.train.Example(features=tf.train.Features(feature={
+  ...         "x": tf.train.Feature(float_list=tf.train.FloatList(value=[x])),
+  ...         "y": tf.train.Feature(float_list=tf.train.FloatList(value=[y])),
+  ...     })).SerializeToString()
+  ...     file_writer.write(record_bytes)
+
+  >>> # Read the data back out.
+  >>> def decode_fn(record_bytes):
+  ...   return tf.io.parse_single_example(
+  ...       # Data
+  ...       record_bytes,
+  ...
+  ...       # Schema
+  ...       {"x": tf.io.FixedLenFeature([], dtype=tf.float32),
+  ...        "y": tf.io.FixedLenFeature([], dtype=tf.float32)}
+  ...   )
+
+  >>> for batch in tf.data.TFRecordDataset([example_path]).map(decode_fn):
+  ...   print("x = {x:.4f},  y = {y:.4f}".format(**batch))
+  x = 0.5488,  y = 0.7152
+  x = 0.6028,  y = 0.5449
+  x = 0.4237,  y = 0.6459
+  x = 0.4376,  y = 0.8918
+
   This class implements `__enter__` and `__exit__`, and can be used
-  in `with` blocks like a normal file.
+  in `with` blocks like a normal file. (See the usage example above.)
   """
 
   # TODO(josh11b): Support appending?
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
index c7156b5346e..6bab9dcb196 100644
--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -245,18 +245,32 @@ def fill(dims, value, name=None):
 @tf_export("identity")
 @dispatch.add_dispatch_support
 def identity(input, name=None):  # pylint: disable=redefined-builtin
-  r"""Return a tensor with the same shape and contents as input.
+  r"""Return a Tensor with the same shape and contents as input.
+
+  The return value is not the same Tensor as the original, but contains the same
+  values.  This operation is fast when used on the same device.
 
   For example:
 
-  ```python
-  import tensorflow as tf
-  val0 = tf.ones((1,), dtype=tf.float32)
-  a = tf.atan2(val0, val0)
-  a_identity = tf.identity(a)
-  print(a.numpy())          #[0.7853982]
-  print(a_identity.numpy()) #[0.7853982]
-  ```
+  >>> a = tf.constant([0.78])
+  >>> a_identity = tf.identity(a)
+  >>> a.numpy()
+  array([0.78], dtype=float32)
+  >>> a_identity.numpy()
+  array([0.78], dtype=float32)
+
+  Calling `tf.identity` on a variable will make a Tensor that represents the
+  value of that variable at the time it is called. This is equivalent to calling
+  `<variable>.read_value()`.
+
+  >>> a = tf.Variable(5)
+  >>> a_identity = tf.identity(a)
+  >>> a.assign_add(1)
+  <tf.Variable ... shape=() dtype=int32, numpy=6>
+  >>> a.numpy()
+  6
+  >>> a_identity.numpy()
+  5
 
   Args:
     input: A `Tensor`.
@@ -645,10 +659,9 @@ def size_v2(input, out_type=dtypes.int32, name=None):
 
   For example:
 
-  ```python
-  t = tf.constant([[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]])
-  tf.size(t)  # 12
-  ```
+  >>> t = tf.constant([[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]])
+  >>> tf.size(t)
+  <tf.Tensor: shape=(), dtype=int32, numpy=12>
 
   Args:
     input: A `Tensor` or `SparseTensor`.
@@ -1868,11 +1881,11 @@ unique_with_counts.__doc__ = gen_array_ops.unique_with_counts.__doc__
 
 @tf_export("split")
 def split(value, num_or_size_splits, axis=0, num=None, name="split"):
-  """Splits a tensor into sub tensors.
+  """Splits a tensor `value` into a list of sub tensors.
 
-  If `num_or_size_splits` is an integer, then `value` is split along dimension
-  `axis` into `num_split` smaller tensors. This requires that `num_split` evenly
-  divides `value.shape[axis]`.
+  If `num_or_size_splits` is an integer, then `value` is split along the
+  dimension `axis` into `num_split` smaller tensors. This requires that
+  `value.shape[axis]` is divisible by `num_split`.
 
   If `num_or_size_splits` is a 1-D Tensor (or list), we call it `size_splits`
   and `value` is split into `len(size_splits)` elements. The shape of the `i`-th
@@ -1881,17 +1894,21 @@ def split(value, num_or_size_splits, axis=0, num=None, name="split"):
 
   For example:
 
-  ```python
-  # 'value' is a tensor with shape [5, 30]
-  # Split 'value' into 3 tensors with sizes [4, 15, 11] along dimension 1
-  split0, split1, split2 = tf.split(value, [4, 15, 11], 1)
-  tf.shape(split0)  # [5, 4]
-  tf.shape(split1)  # [5, 15]
-  tf.shape(split2)  # [5, 11]
-  # Split 'value' into 3 tensors along dimension 1
-  split0, split1, split2 = tf.split(value, num_or_size_splits=3, axis=1)
-  tf.shape(split0)  # [5, 10]
-  ```
+  >>> x = tf.Variable(tf.random.uniform([5, 30], -1, 1))
+
+  Split `x` into 3 tensors along dimension 1
+  >>> s0, s1, s2 = tf.split(x, num_or_size_splits=3, axis=1)
+  >>> tf.shape(s0).numpy()
+  array([ 5, 10], dtype=int32)
+
+  Split `x` into 3 tensors with sizes [4, 15, 11] along dimension 1
+  >>> split0, split1, split2 = tf.split(x, [4, 15, 11], 1)
+  >>> tf.shape(split0).numpy()
+  array([5, 4], dtype=int32)
+  >>> tf.shape(split1).numpy()
+  array([ 5, 15], dtype=int32)
+  >>> tf.shape(split2).numpy()
+  array([ 5, 11], dtype=int32)
 
   Args:
     value: The `Tensor` to split.
@@ -1907,8 +1924,8 @@ def split(value, num_or_size_splits, axis=0, num=None, name="split"):
     name: A name for the operation (optional).
 
   Returns:
-    if `num_or_size_splits` is a scalar returns `num_or_size_splits` `Tensor`
-    objects; if `num_or_size_splits` is a 1-D Tensor returns
+    if `num_or_size_splits` is a scalar returns a list of `num_or_size_splits`
+    `Tensor` objects; if `num_or_size_splits` is a 1-D Tensor returns
     `num_or_size_splits.get_shape[0]` `Tensor` objects resulting from splitting
     `value`.
 
@@ -2840,7 +2857,7 @@ def ones_like_v2(
     input,  # pylint: disable=redefined-builtin
     dtype=None,
     name=None):
-  """Creates a tensor with all elements set to one.
+  """Creates a tensor of all ones that has the same shape as the input.
 
   Given a single tensor (`tensor`), this operation returns a tensor of the
   same type and shape as `tensor` with all elements set to 1. Optionally,
@@ -2848,10 +2865,11 @@ def ones_like_v2(
 
   For example:
 
-  ```python
-  tensor = tf.constant([[1, 2, 3], [4, 5, 6]])
-  tf.ones_like(tensor)  # [[1, 1, 1], [1, 1, 1]]
-  ```
+  >>> tensor = tf.constant([[1, 2, 3], [4, 5, 6]])
+  >>> tf.ones_like(tensor)
+  <tf.Tensor: shape=(2, 3), dtype=int32, numpy=
+    array([[1, 1, 1],
+           [1, 1, 1]], dtype=int32)>
 
   Args:
     input: A `Tensor`.
diff --git a/tensorflow/python/ops/clip_ops.py b/tensorflow/python/ops/clip_ops.py
index 30317092a29..d316d6df6d7 100644
--- a/tensorflow/python/ops/clip_ops.py
+++ b/tensorflow/python/ops/clip_ops.py
@@ -49,29 +49,58 @@ def clip_by_value(t, clip_value_min, clip_value_max,
 
   For example:
 
-  ```python
-  A = tf.constant([[1, 20, 13], [3, 21, 13]])
-  B = tf.clip_by_value(A, clip_value_min=0, clip_value_max=3) # [[1, 3, 3],[3, 3, 3]]
-  C = tf.clip_by_value(A, clip_value_min=0., clip_value_max=3.) # throws `TypeError`
-  as input and clip_values are of different dtype
-  ```
+  Basic usage passes a scalar as the min and max value.
+
+  >>> t = tf.constant([[-10., -1., 0.], [0., 2., 10.]])
+  >>> t2 = tf.clip_by_value(t, clip_value_min=-1, clip_value_max=1)
+  >>> t2.numpy()
+  array([[-1., -1.,  0.],
+         [ 0.,  1.,  1.]], dtype=float32)
+
+  The min and max can be the same size as `t`, or broadcastable to that size.
+
+  >>> t = tf.constant([[-1, 0., 10.], [-1, 0, 10]])
+  >>> clip_min = [[2],[1]]
+  >>> t3 = tf.clip_by_value(t, clip_value_min=clip_min, clip_value_max=100)
+  >>> t3.numpy()
+  array([[ 2.,  2., 10.],
+         [ 1.,  1., 10.]], dtype=float32)
+
+  Broadcasting fails, intentionally, if you would expand the dimensions of `t`
+
+  >>> t = tf.constant([[-1, 0., 10.], [-1, 0, 10]])
+  >>> clip_min = [[[2, 1]]] # Has a third axis
+  >>> t4 = tf.clip_by_value(t, clip_value_min=clip_min, clip_value_max=100)
+  Traceback (most recent call last):
+  ...
+  InvalidArgumentError: Incompatible shapes: [2,3] vs. [1,1,2]
+
+  It throws a `TypeError` if you try to clip an `int` to a `float` value
+  (`tf.cast` the input to `float` first).
+
+  >>> t = tf.constant([[1, 2], [3, 4]], dtype=tf.int32)
+  >>> t5 = tf.clip_by_value(t, clip_value_min=-3.1, clip_value_max=3.1)
+  Traceback (most recent call last):
+  ...
+  TypeError: Cannot convert ...
+
 
   Args:
     t: A `Tensor` or `IndexedSlices`.
-    clip_value_min: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape
-      as `t`. The minimum value to clip by.
-    clip_value_max: A 0-D (scalar) `Tensor`, or a `Tensor` with the same shape
-      as `t`. The maximum value to clip by.
+    clip_value_min: The minimum value to clip to. A scalar `Tensor` or one that
+      is broadcastable to the shape of `t`.
+    clip_value_max: The minimum value to clip to. A scalar `Tensor` or one that
+      is broadcastable to the shape of `t`.
     name: A name for the operation (optional).
 
   Returns:
     A clipped `Tensor` or `IndexedSlices`.
 
   Raises:
-    ValueError: If the clip tensors would trigger array broadcasting
-      that would make the returned tensor larger than the input.
+    `tf.errors.InvalidArgumentError`: If the clip tensors would trigger array
+      broadcasting that would make the returned tensor larger than the input.
     TypeError: If dtype of the input is `int32` and dtype of
-    the `clip_value_min` or `clip_value_max` is `float32`
+      the `clip_value_min` or `clip_value_max` is `float32`
   """
   with ops.name_scope(name, "clip_by_value",
                       [t, clip_value_min, clip_value_max]) as name:
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
index bf0df397c85..5d0ca7c90de 100644
--- a/tensorflow/python/ops/control_flow_ops.py
+++ b/tensorflow/python/ops/control_flow_ops.py
@@ -749,10 +749,10 @@ class ControlFlowContext(object):
   def ExitResult(self, result):
     """Make a list of tensors available in the outer context."""
     if self._outer_context:
-      nest.map_structure(
-          lambda x: self._outer_context.AddName(x.name),
-          result,
-          expand_composites=True)
+      def fn(x):
+        self._outer_context.AddName(x.name)
+        return x
+      nest.map_structure(fn, result, expand_composites=True)
 
   def GetWhileContext(self):
     """Return the while context containing this context."""
diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py
index f9252fdd547..d33498c517c 100644
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@@ -2974,6 +2974,14 @@ def rgb_to_yuv(images):
 
   Returns:
     images: tensor with the same shape as `images`.
+    
+  Usage Example:
+  ```python
+  >> import tensorflow as tf
+  >> x = tf.random.normal(shape=(256, 256, 3))
+  >> tf.image.rgb_to_yuv(x)
+  ```
+    
   """
   images = ops.convert_to_tensor(images, name='images')
   kernel = ops.convert_to_tensor(
diff --git a/tensorflow/python/ops/init_ops_v2.py b/tensorflow/python/ops/init_ops_v2.py
index c490921a285..4999c4d8aac 100644
--- a/tensorflow/python/ops/init_ops_v2.py
+++ b/tensorflow/python/ops/init_ops_v2.py
@@ -91,16 +91,68 @@ class Initializer(object):
 
 @tf_export("zeros_initializer", v1=[])
 class Zeros(Initializer):
-  """Initializer that generates tensors initialized to 0."""
+  """Initializer that generates tensors initialized to 0.
+
+  Initializers allow you to pre-specify an initialization strategy, encoded in
+  the Initializer object, without knowing the shape and dtype of the variable
+  being initialized.
+
+  Examples:
+
+  >>> def make_variables(k, initializer):
+  ...   return (tf.Variable(initializer(shape=[k], dtype=tf.float32)),
+  ...           tf.Variable(initializer(shape=[k, k], dtype=tf.float32)))
+  >>> v1, v2 = make_variables(3, tf.zeros_initializer())
+  >>> v1
+  <tf.Variable ... shape=(3,) ... numpy=array([0., 0., 0.], dtype=float32)>
+  >>> v2
+  <tf.Variable ... shape=(3, 3) ... numpy=
+  array([[0., 0., 0.],
+         [0., 0., 0.],
+         [0., 0., 0.]], dtype=float32)>
+  >>> make_variables(4, tf.random_uniform_initializer(minval=-1., maxval=1.))
+  (<tf.Variable...shape=(4,) dtype=float32...>, <tf.Variable...shape=(4, 4) ...
+  """
 
   def __call__(self, shape, dtype=dtypes.float32):
+    """Returns a tensor object initialized as specified by the initializer.
+
+    Args:
+      shape: Shape of the tensor.
+      dtype: Optional dtype of the tensor. Only numeric or boolean dtypes are
+       supported.
+
+    Raises:
+      ValuesError: If the dtype is not numeric or boolean.
+    """
     dtype = dtypes.as_dtype(dtype)
     return array_ops.zeros(shape, dtype)
 
 
 @tf_export("ones_initializer", v1=[])
 class Ones(Initializer):
-  """Initializer that generates tensors initialized to 1."""
+  """Initializer that generates tensors initialized to 1.
+
+  Initializers allow you to pre-specify an initialization strategy, encoded in
+  the Initializer object, without knowing the shape and dtype of the variable
+  being initialized.
+
+  Examples:
+
+  >>> def make_variables(k, initializer):
+  ...   return (tf.Variable(initializer(shape=[k], dtype=tf.float32)),
+  ...           tf.Variable(initializer(shape=[k, k], dtype=tf.float32)))
+  >>> v1, v2 = make_variables(3, tf.ones_initializer())
+  >>> v1
+  <tf.Variable ... shape=(3,) ... numpy=array([1., 1., 1.], dtype=float32)>
+  >>> v2
+  <tf.Variable ... shape=(3, 3) ... numpy=
+  array([[1., 1., 1.],
+         [1., 1., 1.],
+         [1., 1., 1.]], dtype=float32)>
+  >>> make_variables(4, tf.random_uniform_initializer(minval=-1., maxval=1.))
+  (<tf.Variable...shape=(4,) dtype=float32...>, <tf.Variable...shape=(4, 4) ...
+  """
 
   def __call__(self, shape, dtype=dtypes.float32):
     """Returns a tensor object initialized as specified by the initializer.
@@ -123,18 +175,56 @@ class Ones(Initializer):
 class Constant(Initializer):
   """Initializer that generates tensors with constant values.
 
-  The resulting tensor is populated with values of type `dtype`, as
-  specified by arguments `value` following the desired `shape` of the
-  new tensor (see examples below).
+  Initializers allow you to pre-specify an initialization strategy, encoded in
+  the Initializer object, without knowing the shape and dtype of the variable
+  being initialized.
 
-  The argument `value` can be a constant value, or a list of values of type
-  `dtype`. If `value` is a list, then the length of the list must be less
-  than or equal to the number of elements implied by the desired shape of the
-  tensor. In the case where the total number of elements in `value` is less
-  than the number of elements required by the tensor shape, the last element
-  in `value` will be used to fill the remaining entries. If the total number of
-  elements in `value` is greater than the number of elements required by the
-  tensor shape, the initializer will raise a `ValueError`.
+  `tf.constant_initializer` returns an object which when called returns a tensor
+  populated with the `value` specified in the constructor. This `value` must be
+  convertible to the requested `dtype`.
+
+  The argument `value` can be a scalar constant value, or a list of
+  values. Scalars broadcast to whichever shape is requested from the
+  initializer.
+
+  If `value` is a list, then the length of the list must be equal to the number
+  of elements implied by the desired shape of the tensor. If the total number of
+  elements in `value` is not equal to the number of elements required by the
+  tensor shape, the initializer will raise a `TypeError`.
+
+  Examples:
+
+  >>> def make_variables(k, initializer):
+  ...   return (tf.Variable(initializer(shape=[k], dtype=tf.float32)),
+  ...           tf.Variable(initializer(shape=[k, k], dtype=tf.float32)))
+  >>> v1, v2 = make_variables(3, tf.constant_initializer(2.))
+  >>> v1
+  <tf.Variable ... shape=(3,) ... numpy=array([2., 2., 2.], dtype=float32)>
+  >>> v2
+  <tf.Variable ... shape=(3, 3) ... numpy=
+  array([[2., 2., 2.],
+         [2., 2., 2.],
+         [2., 2., 2.]], dtype=float32)>
+  >>> make_variables(4, tf.random_uniform_initializer(minval=-1., maxval=1.))
+  (<tf.Variable...shape=(4,) dtype=float32...>, <tf.Variable...shape=(4, 4) ...
+
+  >>> value = [0, 1, 2, 3, 4, 5, 6, 7]
+  >>> init = tf.constant_initializer(value)
+  >>> # Fitting shape
+  >>> tf.Variable(init(shape=[2, 4], dtype=tf.float32))
+  <tf.Variable ...
+  array([[0., 1., 2., 3.],
+         [4., 5., 6., 7.]], dtype=float32)>
+  >>> # Larger shape
+  >>> tf.Variable(init(shape=[3, 4], dtype=tf.float32))
+  Traceback (most recent call last):
+  ...
+  TypeError: ...value has 8 elements, shape is (3, 4) with 12 elements...
+  >>> # Smaller shape
+  >>> tf.Variable(init(shape=[2, 3], dtype=tf.float32))
+  Traceback (most recent call last):
+  ...
+  TypeError: ...value has 8 elements, shape is (2, 3) with 6 elements...
 
   Args:
     value: A Python scalar, list or tuple of values, or a N-dimensional numpy
@@ -143,36 +233,6 @@ class Constant(Initializer):
 
   Raises:
     TypeError: If the input `value` is not one of the expected types.
-
-  Examples:
-    The following example can be rewritten using a numpy.ndarray instead
-    of the `value` list, even reshaped, as shown in the two commented lines
-    below the `value` list initialization.
-
-  >>> value = [0, 1, 2, 3, 4, 5, 6, 7]
-  >>> init = tf.compat.v1.constant_initializer(value)
-  >>> # Fitting shape
-  >>> with tf.compat.v1.Session():
-  ...   x = tf.compat.v1.get_variable('x', shape=[2, 4], initializer=init)
-  ...   x.initializer.run()
-  ...   print(x.eval())
-  [[0. 1. 2. 3.]
-   [4. 5. 6. 7.]]
-  >>> # Larger shape
-  >>> with tf.compat.v1.Session():
-  ...   y = tf.compat.v1.get_variable('y', shape=[3, 4], initializer=init)
-  ...   y.initializer.run()
-  ...   print(y.eval())
-  [[0. 1. 2. 3.]
-   [4. 5. 6. 7.]
-   [7. 7. 7. 7.]]
-  >>> # Smaller shape
-  >>> with tf.compat.v1.Session():
-  ...   z = tf.compat.v1.get_variable('z', shape=[2, 3], initializer=init)
-  Traceback (most recent call last):
-  ...
-  ValueError: Too many elements provided. Needed at most 6, but received 8
-
   """
 
   def __init__(self, value=0):
@@ -207,14 +267,33 @@ class Constant(Initializer):
 class RandomUniform(Initializer):
   """Initializer that generates tensors with a uniform distribution.
 
+  Initializers allow you to pre-specify an initialization strategy, encoded in
+  the Initializer object, without knowing the shape and dtype of the variable
+  being initialized.
+
+  Examples:
+
+  >>> def make_variables(k, initializer):
+  ...   return (tf.Variable(initializer(shape=[k], dtype=tf.float32)),
+  ...           tf.Variable(initializer(shape=[k, k], dtype=tf.float32)))
+  >>> v1, v2 = make_variables(3, tf.ones_initializer())
+  >>> v1
+  <tf.Variable ... shape=(3,) ... numpy=array([1., 1., 1.], dtype=float32)>
+  >>> v2
+  <tf.Variable ... shape=(3, 3) ... numpy=
+  array([[1., 1., 1.],
+         [1., 1., 1.],
+         [1., 1., 1.]], dtype=float32)>
+  >>> make_variables(4, tf.random_uniform_initializer(minval=-1., maxval=1.))
+  (<tf.Variable...shape=(4,) dtype=float32...>, <tf.Variable...shape=(4, 4) ...
+
   Args:
-    minval: A python scalar or a scalar tensor. Lower bound of the range
-      of random values to generate.
-    maxval: A python scalar or a scalar tensor. Upper bound of the range
-      of random values to generate.  Defaults to 1 for float types.
+    minval: A python scalar or a scalar tensor. Lower bound of the range of
+      random values to generate (inclusive).
+    maxval: A python scalar or a scalar tensor. Upper bound of the range of
+      random values to generate (exclusive).
     seed: A Python integer. Used to create random seeds. See
-      `tf.compat.v1.set_random_seed`
-      for behavior.
+      `tf.random.set_seed` for behavior.
   """
 
   def __init__(self, minval=-0.05, maxval=0.05, seed=None):
@@ -252,14 +331,33 @@ class RandomUniform(Initializer):
 class RandomNormal(Initializer):
   """Initializer that generates tensors with a normal distribution.
 
+  Initializers allow you to pre-specify an initialization strategy, encoded in
+  the Initializer object, without knowing the shape and dtype of the variable
+  being initialized.
+
+  Examples:
+
+  >>> def make_variables(k, initializer):
+  ...   return (tf.Variable(initializer(shape=[k], dtype=tf.float32)),
+  ...           tf.Variable(initializer(shape=[k, k], dtype=tf.float32)))
+  >>> v1, v2 = make_variables(3,
+  ...                         tf.random_normal_initializer(mean=1., stddev=2.))
+  >>> v1
+  <tf.Variable ... shape=(3,) ... numpy=array([...], dtype=float32)>
+  >>> v2
+  <tf.Variable ... shape=(3, 3) ... numpy=
+  ...
+  >>> make_variables(4, tf.random_uniform_initializer(minval=-1., maxval=1.))
+  (<tf.Variable...shape=(4,) dtype=float32...>, <tf.Variable...shape=(4, 4) ...
+
   Args:
-    mean: a python scalar or a scalar tensor. Mean of the random values
-      to generate.
-    stddev: a python scalar or a scalar tensor. Standard deviation of the
-      random values to generate.
+    mean: a python scalar or a scalar tensor. Mean of the random values to
+      generate.
+    stddev: a python scalar or a scalar tensor. Standard deviation of the random
+      values to generate.
     seed: A Python integer. Used to create random seeds. See
-      `tf.compat.v1.set_random_seed`
-      for behavior.
+      `tf.random.set_seed` for behavior.
+
   """
 
   def __init__(self, mean=0.0, stddev=0.05, seed=None):
@@ -294,10 +392,29 @@ class RandomNormal(Initializer):
 class TruncatedNormal(Initializer):
   """Initializer that generates a truncated normal distribution.
 
-  These values are similar to values from a `random_normal_initializer`
-  except that values more than two standard deviations from the mean
-  are discarded and re-drawn. This is the recommended initializer for
-  neural network weights and filters.
+  Initializers allow you to pre-specify an initialization strategy, encoded in
+  the Initializer object, without knowing the shape and dtype of the variable
+  being initialized.
+
+  These values are similar to values from a `tf.initializers.RandomNormal`
+  except that values more than two standard deviations from the mean are
+  discarded and re-drawn. This is the recommended initializer for neural network
+  weights and filters.
+
+  Examples:
+
+  >>> def make_variables(k, initializer):
+  ...   return (tf.Variable(initializer(shape=[k], dtype=tf.float32)),
+  ...           tf.Variable(initializer(shape=[k, k], dtype=tf.float32)))
+  >>> v1, v2 = make_variables(
+  ...     3, tf.initializers.TruncatedNormal(mean=1., stddev=2.))
+  >>> v1
+  <tf.Variable ... shape=(3,) ... numpy=array([...], dtype=float32)>
+  >>> v2
+  <tf.Variable ... shape=(3, 3) ... numpy=
+  ...
+  >>> make_variables(4, tf.initializers.RandomUniform(minval=-1., maxval=1.))
+  (<tf.Variable...shape=(4,) dtype=float32...>, <tf.Variable...shape=(4, 4) ...
 
   Args:
     mean: a python scalar or a scalar tensor. Mean of the random values
@@ -305,8 +422,7 @@ class TruncatedNormal(Initializer):
     stddev: a python scalar or a scalar tensor. Standard deviation of the
       random values to generate.
     seed: A Python integer. Used to create random seeds. See
-      `tf.compat.v1.set_random_seed`
-      for behavior.
+      `tf.random.set_seed` for behavior.
   """
 
   def __init__(self, mean=0.0, stddev=0.05, seed=None):
@@ -341,11 +457,15 @@ class TruncatedNormal(Initializer):
 class VarianceScaling(Initializer):
   """Initializer capable of adapting its scale to the shape of weights tensors.
 
-  With `distribution="truncated_normal" or "untruncated_normal"`,
-  samples are drawn from a truncated/untruncated normal
-  distribution with a mean of zero and a standard deviation (after truncation,
-  if used) `stddev = sqrt(scale / n)`
+  Initializers allow you to pre-specify an initialization strategy, encoded in
+  the Initializer object, without knowing the shape and dtype of the variable
+  being initialized.
+
+  With `distribution="truncated_normal" or "untruncated_normal"`, samples are
+  drawn from a truncated/untruncated normal distribution with a mean of zero and
+  a standard deviation (after truncation, if used) `stddev = sqrt(scale / n)`
   where n is:
+
     - number of input units in the weight tensor, if mode = "fan_in"
     - number of output units, if mode = "fan_out"
     - average of the numbers of input and output units, if mode = "fan_avg"
@@ -353,14 +473,27 @@ class VarianceScaling(Initializer):
   With `distribution="uniform"`, samples are drawn from a uniform distribution
   within [-limit, limit], with `limit = sqrt(3 * scale / n)`.
 
+  Examples:
+
+  >>> def make_variables(k, initializer):
+  ...   return (tf.Variable(initializer(shape=[k], dtype=tf.float32)),
+  ...           tf.Variable(initializer(shape=[k, k], dtype=tf.float32)))
+  >>> v1, v2 = make_variables(3, tf.initializers.VarianceScaling(scale=1.))
+  >>> v1
+  <tf.Variable ... shape=(3,) ... numpy=array([...], dtype=float32)>
+  >>> v2
+  <tf.Variable ... shape=(3, 3) ... numpy=
+  ...
+  >>> make_variables(4, tf.initializers.VarianceScaling(distribution='uniform'))
+  (<tf.Variable...shape=(4,) dtype=float32...>, <tf.Variable...shape=(4, 4) ...
+
   Args:
     scale: Scaling factor (positive float).
     mode: One of "fan_in", "fan_out", "fan_avg".
     distribution: Random distribution to use. One of "truncated_normal",
       "untruncated_normal" and  "uniform".
     seed: A Python integer. Used to create random seeds. See
-      `tf.compat.v1.set_random_seed`
-      for behavior.
+      `tf.random.set_seed` for behavior.
 
   Raises:
     ValueError: In case of an invalid value for the "scale", mode" or
@@ -436,6 +569,10 @@ class VarianceScaling(Initializer):
 class Orthogonal(Initializer):
   """Initializer that generates an orthogonal matrix.
 
+  Initializers allow you to pre-specify an initialization strategy, encoded in
+  the Initializer object, without knowing the shape and dtype of the variable
+  being initialized.
+
   If the shape of the tensor to initialize is two-dimensional, it is initialized
   with an orthogonal matrix obtained from the QR decomposition of a matrix of
   random numbers drawn from a normal distribution.
@@ -447,11 +584,24 @@ class Orthogonal(Initializer):
   is initialized, where `n` is the length of the shape vector.
   The matrix is subsequently reshaped to give a tensor of the desired shape.
 
+  Examples:
+
+  >>> def make_variables(k, initializer):
+  ...   return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)),
+  ...           tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32)))
+  >>> v1, v2 = make_variables(3, tf.initializers.Orthogonal())
+  >>> v1
+  <tf.Variable ... shape=(3, 3) ...
+  >>> v2
+  <tf.Variable ... shape=(3, 3, 3) ...
+  >>> make_variables(4, tf.initializers.Orthogonal(gain=0.5))
+  (<tf.Variable ... shape=(4, 4) dtype=float32...
+   <tf.Variable ... shape=(4, 4, 4) dtype=float32...
+
   Args:
     gain: multiplicative factor to apply to the orthogonal matrix
     seed: A Python integer. Used to create random seeds. See
-      `tf.compat.v1.set_random_seed`
-    for behavior.
+      `tf.random.set_seed` for behavior.
 
   References:
       [Saxe et al., 2014](https://openreview.net/forum?id=_wzZwKpTDF_9C)
@@ -506,7 +656,25 @@ class Orthogonal(Initializer):
 class Identity(Initializer):
   """Initializer that generates the identity matrix.
 
-  Only use for 2D matrices.
+  Initializers allow you to pre-specify an initialization strategy, encoded in
+  the Initializer object, without knowing the shape and dtype of the variable
+  being initialized.
+
+  Only usable for generating 2D matrices.
+
+  Examples:
+
+  >>> def make_variable(k, initializer):
+  ...   return tf.Variable(initializer(shape=[k, k], dtype=tf.float32))
+  >>> make_variable(2, tf.initializers.Identity())
+  <tf.Variable ... shape=(2, 2) dtype=float32, numpy=
+  array([[1., 0.],
+         [0., 1.]], dtype=float32)>
+  >>> make_variable(3, tf.initializers.Identity(gain=0.5))
+  <tf.Variable ... shape=(3, 3) dtype=float32, numpy=
+  array([[0.5, 0. , 0. ],
+         [0. , 0.5, 0. ],
+         [0. , 0. , 0.5]], dtype=float32)>
 
   Args:
     gain: Multiplicative factor to apply to the identity matrix.
@@ -525,6 +693,7 @@ class Identity(Initializer):
 
     Raises:
       ValueError: If the dtype is not floating point
+      ValueError: If the requested shape does not have exactly two axes.
     """
     partition_info = None  # Keeps logic so can be readded later if necessary
     dtype = _assert_float_dtype(dtype)
@@ -545,15 +714,32 @@ class Identity(Initializer):
 class GlorotUniform(VarianceScaling):
   """The Glorot uniform initializer, also called Xavier uniform initializer.
 
-  It draws samples from a uniform distribution within [-limit, limit]
-  where `limit` is `sqrt(6 / (fan_in + fan_out))`
-  where `fan_in` is the number of input units in the weight tensor
-  and `fan_out` is the number of output units in the weight tensor.
+  Initializers allow you to pre-specify an initialization strategy, encoded in
+  the Initializer object, without knowing the shape and dtype of the variable
+  being initialized.
+
+  Draws samples from a uniform distribution within [-limit, limit] where `limit`
+  is `sqrt(6 / (fan_in + fan_out))` where `fan_in` is the number of input units
+  in the weight tensor and `fan_out` is the number of output units in the weight
+  tensor.
+
+  Examples:
+
+  >>> def make_variables(k, initializer):
+  ...   return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)),
+  ...           tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32)))
+  >>> v1, v2 = make_variables(3, tf.initializers.GlorotUniform())
+  >>> v1
+  <tf.Variable ... shape=(3, 3) ...
+  >>> v2
+  <tf.Variable ... shape=(3, 3, 3) ...
+  >>> make_variables(4, tf.initializers.RandomNormal())
+  (<tf.Variable ... shape=(4, 4) dtype=float32...
+   <tf.Variable ... shape=(4, 4, 4) dtype=float32...
 
   Args:
     seed: A Python integer. Used to create random seeds. See
-      `tf.compat.v1.set_random_seed`
-      for behavior.
+      `tf.random.set_seed` for behavior.
 
   References:
       [Glorot et al., 2010](http://proceedings.mlr.press/v9/glorot10a.html)
@@ -574,14 +760,32 @@ class GlorotUniform(VarianceScaling):
 class GlorotNormal(VarianceScaling):
   """The Glorot normal initializer, also called Xavier normal initializer.
 
-  It draws samples from a truncated normal distribution centered on 0
-  with `stddev = sqrt(2 / (fan_in + fan_out))`
-  where `fan_in` is the number of input units in the weight tensor
-  and `fan_out` is the number of output units in the weight tensor.
+  Initializers allow you to pre-specify an initialization strategy, encoded in
+  the Initializer object, without knowing the shape and dtype of the variable
+  being initialized.
+
+  Draws samples from a truncated normal distribution centered on 0 with `stddev
+  = sqrt(2 / (fan_in + fan_out))` where `fan_in` is the number of input units in
+  the weight tensor and `fan_out` is the number of output units in the weight
+  tensor.
+
+  Examples:
+
+  >>> def make_variables(k, initializer):
+  ...   return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)),
+  ...           tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32)))
+  >>> v1, v2 = make_variables(3, tf.initializers.GlorotNormal())
+  >>> v1
+  <tf.Variable ... shape=(3, 3) ...
+  >>> v2
+  <tf.Variable ... shape=(3, 3, 3) ...
+  >>> make_variables(4, tf.initializers.RandomNormal())
+  (<tf.Variable ... shape=(4, 4) dtype=float32...
+   <tf.Variable ... shape=(4, 4, 4) dtype=float32...
 
   Args:
     seed: A Python integer. Used to create random seeds. See
-      `tf.compat.v1.set_random_seed` for behavior.
+      `tf.random.set_seed` for behavior.
 
   References:
       [Glorot et al., 2010](http://proceedings.mlr.press/v9/glorot10a.html)
@@ -619,15 +823,34 @@ identity_initializer = Identity
 def lecun_normal(seed=None):
   """LeCun normal initializer.
 
-  It draws samples from a truncated normal distribution centered on 0
-  with `stddev = sqrt(1 / fan_in)`
-  where `fan_in` is the number of input units in the weight tensor.
+  Initializers allow you to pre-specify an initialization strategy, encoded in
+  the Initializer object, without knowing the shape and dtype of the variable
+  being initialized.
+
+  Draws samples from a truncated normal distribution centered on 0 with `stddev
+  = sqrt(1 / fan_in)` where `fan_in` is the number of input units in the weight
+  tensor.
+
+  Examples:
+
+  >>> def make_variables(k, initializer):
+  ...   return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)),
+  ...           tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32)))
+  >>> v1, v2 = make_variables(3, tf.initializers.lecun_normal())
+  >>> v1
+  <tf.Variable ... shape=(3, 3) ...
+  >>> v2
+  <tf.Variable ... shape=(3, 3, 3) ...
+  >>> make_variables(4, tf.initializers.RandomNormal())
+  (<tf.Variable ... shape=(4, 4) dtype=float32...
+   <tf.Variable ... shape=(4, 4, 4) dtype=float32...
 
   Arguments:
     seed: A Python integer. Used to seed the random generator.
 
   Returns:
-    An initializer.
+    A callable Initializer with `shape` and `dtype` arguments which generates a
+    tensor.
 
   References:
       - Self-Normalizing Neural Networks,
@@ -645,15 +868,34 @@ def lecun_normal(seed=None):
 def lecun_uniform(seed=None):
   """LeCun uniform initializer.
 
-  It draws samples from a uniform distribution within [-limit, limit]
-  where `limit` is `sqrt(3 / fan_in)`
-  where `fan_in` is the number of input units in the weight tensor.
+  Initializers allow you to pre-specify an initialization strategy, encoded in
+  the Initializer object, without knowing the shape and dtype of the variable
+  being initialized.
+
+  Draws samples from a uniform distribution within [-limit, limit] where `limit`
+  is `sqrt(3 / fan_in)` where `fan_in` is the number of input units in the
+  weight tensor.
+
+  Examples:
+
+  >>> def make_variables(k, initializer):
+  ...   return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)),
+  ...           tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32)))
+  >>> v1, v2 = make_variables(3, tf.initializers.lecun_uniform())
+  >>> v1
+  <tf.Variable ... shape=(3, 3) ...
+  >>> v2
+  <tf.Variable ... shape=(3, 3, 3) ...
+  >>> make_variables(4, tf.initializers.RandomNormal())
+  (<tf.Variable ... shape=(4, 4) dtype=float32...
+   <tf.Variable ... shape=(4, 4, 4) dtype=float32...
 
   Arguments:
     seed: A Python integer. Used to seed the random generator.
 
   Returns:
-    An initializer.
+    A callable Initializer with `shape` and `dtype` arguments which generates a
+    tensor.
 
   References:
       - Self-Normalizing Neural Networks,
@@ -669,15 +911,34 @@ def lecun_uniform(seed=None):
 def he_normal(seed=None):
   """He normal initializer.
 
-  It draws samples from a truncated normal distribution centered on 0
-  with `stddev = sqrt(2 / fan_in)`
-  where `fan_in` is the number of input units in the weight tensor.
+  Initializers allow you to pre-specify an initialization strategy, encoded in
+  the Initializer object, without knowing the shape and dtype of the variable
+  being initialized.
+
+  It draws samples from a truncated normal distribution centered on 0 with
+  `stddev = sqrt(2 / fan_in)` where `fan_in` is the number of input units in the
+  weight tensor.
+
+  Examples:
+
+  >>> def make_variables(k, initializer):
+  ...   return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)),
+  ...           tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32)))
+  >>> v1, v2 = make_variables(3, tf.initializers.he_normal())
+  >>> v1
+  <tf.Variable ... shape=(3, 3) ...
+  >>> v2
+  <tf.Variable ... shape=(3, 3, 3) ...
+  >>> make_variables(4, tf.initializers.RandomNormal())
+  (<tf.Variable ... shape=(4, 4) dtype=float32...
+   <tf.Variable ... shape=(4, 4, 4) dtype=float32...
 
   Arguments:
     seed: A Python integer. Used to seed the random generator.
 
   Returns:
-    An initializer.
+    A callable Initializer with `shape` and `dtype` arguments which generates a
+    tensor.
 
   References:
       [He et al., 2015](https://www.cv-foundation.org/openaccess/content_iccv_2015/html/He_Delving_Deep_into_ICCV_2015_paper.html) # pylint: disable=line-too-long
@@ -690,15 +951,34 @@ def he_normal(seed=None):
 def he_uniform(seed=None):
   """He uniform variance scaling initializer.
 
-  It draws samples from a uniform distribution within [-limit, limit]
-  where `limit` is `sqrt(6 / fan_in)`
-  where `fan_in` is the number of input units in the weight tensor.
+  Initializers allow you to pre-specify an initialization strategy, encoded in
+  the Initializer object, without knowing the shape and dtype of the variable
+  being initialized.
+
+  Draws samples from a uniform distribution within [-limit, limit] where `limit`
+  is `sqrt(6 / fan_in)` where `fan_in` is the number of input units in the
+  weight tensor.
+
+  Examples:
+
+  >>> def make_variables(k, initializer):
+  ...   return (tf.Variable(initializer(shape=[k, k], dtype=tf.float32)),
+  ...           tf.Variable(initializer(shape=[k, k, k], dtype=tf.float32)))
+  >>> v1, v2 = make_variables(3, tf.initializers.he_uniform())
+  >>> v1
+  <tf.Variable ... shape=(3, 3) ...
+  >>> v2
+  <tf.Variable ... shape=(3, 3, 3) ...
+  >>> make_variables(4, tf.initializers.RandomNormal())
+  (<tf.Variable ... shape=(4, 4) dtype=float32...
+   <tf.Variable ... shape=(4, 4, 4) dtype=float32...
 
   Arguments:
     seed: A Python integer. Used to seed the random generator.
 
   Returns:
-    An initializer.
+    A callable Initializer with `shape` and `dtype` arguments which generates a
+    tensor.
 
   References:
       [He et al., 2015](https://www.cv-foundation.org/openaccess/content_iccv_2015/html/He_Delving_Deep_into_ICCV_2015_paper.html) # pylint: disable=line-too-long
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
index 078219e2f23..7cee4258793 100644
--- a/tensorflow/python/ops/math_ops.py
+++ b/tensorflow/python/ops/math_ops.py
@@ -147,39 +147,26 @@ def argmax_v2(input, axis=None, output_type=dtypes.int64, name=None):
   Note that in case of ties the identity of the return value is not guaranteed.
 
   For example:
-  ```python
-  A=tf.constant([2,20,30,3,6]) # Constant 1-D Tensor
-  tf.math.argmax(A) # output 2 as index 2 (A[2]) is maximum in tensor A
-  B=tf.constant([[2,20,30,3,6],[3,11,16,1,8],[14,45,23,5,27]])
-  tf.math.argmax(B,0) # [2, 2, 0, 2, 2]
-  tf.math.argmax(B,1) # [2, 2, 1]
-  ```
+
+  >>> A = tf.constant([2, 20, 30, 3, 6])
+  >>> tf.math.argmax(A)  # A[2] is maximum in tensor A
+  <tf.Tensor: shape=(), dtype=int64, numpy=2>
+  >>> B = tf.constant([[2, 20, 30, 3, 6], [3, 11, 16, 1, 8],
+  ...                  [14, 45, 23, 5, 27]])
+  >>> tf.math.argmax(B, 0)
+  <tf.Tensor: shape=(5,), dtype=int64, numpy=array([2, 2, 0, 2, 2])>
+  >>> tf.math.argmax(B, 1)
+  <tf.Tensor: shape=(3,), dtype=int64, numpy=array([2, 2, 1])>
 
   Args:
-    input: A `Tensor`. Must be one of the following types: `float32`, `float64`,
-      `int32`, `uint8`, `int16`, `int8`, `complex64`, `int64`, `qint8`,
-      `quint8`, `qint32`, `bfloat16`, `uint16`, `complex128`, `half`, `uint32`,
-      `uint64`.
-    axis: A `Tensor`. Must be one of the following types: `int32`, `int64`.
-      int32 or int64, must be in the range `-rank(input), rank(input))`.
-      Describes which axis of the input Tensor to reduce across. For vectors,
-      use axis = 0.
-    output_type: An optional `tf.DType` from: `tf.int32, tf.int64`. Defaults to
-      `tf.int64`.
-    name: A name for the operation (optional).
+    input: A `Tensor`.
+    axis: An integer, the axis to reduce across. Default to 0.
+    output_type: An optional output dtype (`tf.int32` or `tf.int64`). Defaults
+      to `tf.int64`.
+    name: An optional name for the operation.
 
   Returns:
     A `Tensor` of type `output_type`.
-
-  Usage:
-  ```python
-  import tensorflow as tf
-  a = [1, 10, 26.9, 2.8, 166.32, 62.3]
-  b = tf.math.argmax(input = a)
-  c = tf.keras.backend.eval(b)
-  # c = 4
-  # here a[4] = 166.32 which is the largest element of a across axis 0
-  ```
   """
   if axis is None:
     axis = 0
@@ -254,13 +241,15 @@ def abs(x, name=None):  # pylint: disable=redefined-builtin
   corresponding element in the input.
 
   Given a tensor `x` of complex numbers, this operation returns a tensor of type
-  `float32` or `float64` that is the absolute value of each element in `x`. All
-  elements in `x` must be complex numbers of the form \\(a + bj\\). The
-  absolute value is computed as \\( \sqrt{a^2 + b^2}\\).  For example:
-  ```python
-  x = tf.constant([[-2.25 + 4.75j], [-3.25 + 5.75j]])
-  tf.abs(x)  # [5.25594902, 6.60492229]
-  ```
+  `float32` or `float64` that is the absolute value of each element in `x`. For
+  a complex number \\(a + bj\\), its absolute value is computed as \\(\sqrt{a^2
+  + b^2}\\).  For example:
+
+  >>> x = tf.constant([[-2.25 + 4.75j], [-3.25 + 5.75j]])
+  >>> tf.abs(x)
+  <tf.Tensor: shape=(2, 1), dtype=float64, numpy=
+  array([[5.25594901],
+         [6.60492241]])>
 
   Args:
     x: A `Tensor` or `SparseTensor` of type `float16`, `float32`, `float64`,
@@ -268,10 +257,9 @@ def abs(x, name=None):  # pylint: disable=redefined-builtin
     name: A name for the operation (optional).
 
   Returns:
-    A `Tensor` or `SparseTensor` the same size, type, and sparsity as `x` with
-      absolute values.
-    Note, for `complex64` or `complex128` input, the returned `Tensor` will be
-      of type `float32` or `float64`, respectively.
+    A `Tensor` or `SparseTensor` of the same size, type and sparsity as `x`,
+      with absolute values. Note, for `complex64` or `complex128` input, the
+      returned `Tensor` will be of type `float32` or `float64`, respectively.
   """
   with ops.name_scope(name, "Abs", [x]) as name:
     x = ops.convert_to_tensor(x, name="x")
@@ -341,12 +329,18 @@ def divide(x, y, name=None):
     # override names. Use a dummy class to track the runtime division behavior
     return DivideDelegateWithName(x, name) / y
   else:
+    # We could short-circuit when y is 1, but we'd still have to cast to float,
+    # hence it doesn't seem to be worth optimizing.
     return x / y
 
 
 @tf_export("math.multiply", "multiply")
 @dispatch.add_dispatch_support
-def multiply(x, y, name=None):
+def multiply(x, y, name=None):  # pylint: disable=missing-docstring
+  # Do an is comparison here since this is cheaper than isinstance or __eq__
+  if y is 1:  # pylint: disable=literal-comparison
+    return x
+
   return gen_math_ops.mul(x, y, name)
 
 
@@ -358,16 +352,28 @@ multiply.__doc__ = gen_math_ops.mul.__doc__.replace("Multiply", "tf.multiply")
     "2016-12-30",
     "`tf.mul(x, y)` is deprecated, please use `tf.multiply(x, y)` or `x * y`")
 def _mul(x, y, name=None):
-  return gen_math_ops.mul(x, y, name)
+  return multiply(x, y, name=name)
 
 
 _mul.__doc__ = (
     gen_math_ops.mul.__doc__ + ("" if _mul.__doc__ is None else _mul.__doc__))
 
 
+def add_v2(x, y, name=None):
+  # Do an is comparison here since this is cheaper than isinstance or __eq__
+  if y is 0:  # pylint: disable=literal-comparison
+    return x
+
+  return gen_math_ops.add_v2(x, y, name=name)
+
+
 @tf_export("math.subtract", "subtract")
 @dispatch.add_dispatch_support
 def subtract(x, y, name=None):
+  # Do an is comparison here since this is cheaper than isinstance or __eq__
+  if y is 0:  # pylint: disable=literal-comparison
+    return x
+
   return gen_math_ops.sub(x, y, name)
 
 
@@ -379,7 +385,7 @@ subtract.__doc__ = gen_math_ops.sub.__doc__.replace("`Sub`", "`tf.subtract`")
     "2016-12-30",
     "`tf.sub(x, y)` is deprecated, please use `tf.subtract(x, y)` or `x - y`")
 def _sub(x, y, name=None):
-  return gen_math_ops.sub(x, y, name)
+  return subtract(x, y, name)
 
 
 _sub.__doc__ = (
@@ -1207,7 +1213,7 @@ def _add_dispatch(x, y, name=None):
   if x.dtype == dtypes.string:
     return gen_math_ops.add(x, y, name=name)
   else:
-    return gen_math_ops.add_v2(x, y, name=name)
+    return add_v2(x, y, name=name)
 
 
 def _mul_dispatch(x, y, name=None):
@@ -1233,7 +1239,7 @@ _OverrideBinaryOperatorHelper(gen_sparse_ops.sparse_dense_cwise_mul, "mul",
                               sparse_tensor.SparseTensor)
 
 _OverrideBinaryOperatorHelper(_add_dispatch, "add")
-_OverrideBinaryOperatorHelper(gen_math_ops.sub, "sub")
+_OverrideBinaryOperatorHelper(subtract, "sub")
 _OverrideBinaryOperatorHelper(_mul_dispatch, "mul")
 _OverrideBinaryOperatorHelper(_div_python2, "div")
 _OverrideBinaryOperatorHelper(_truediv_python3, "truediv")
@@ -1250,24 +1256,39 @@ def logical_xor(x, y, name="LogicalXor"):
 
   x ^ y = (x | y) & ~(x & y)
 
-  Inputs are tensor and if the tensors contains more than one element, an
-  element-wise logical XOR is computed.
+  The operation works for the following input types:
+
+  - Two single elements of type `bool`
+  - One `tf.Tensor` of type `bool` and one single `bool`, where the result will
+    be calculated by applying logical XOR with the single element to each
+    element in the larger Tensor.
+  - Two `tf.Tensor` objects of type `bool` of the same shape. In this case,
+    the result will be the element-wise logical XOR of the two input tensors.
 
   Usage:
 
-  ```python
-  x = tf.constant([False, False, True, True], dtype = tf.bool)
-  y = tf.constant([False, True, False, True], dtype = tf.bool)
-  z = tf.logical_xor(x, y, name="LogicalXor")
-  #  here z = [False  True  True False]
-  ```
+  >>> a = tf.constant([True])
+  >>> b = tf.constant([False])
+  >>> tf.math.logical_xor(a, b)
+  <tf.Tensor: shape=(1,), dtype=bool, numpy=array([ True])>
+
+  >>> c = tf.constant([True])
+  >>> x = tf.constant([False, True, True, False])
+  >>> tf.math.logical_xor(c, x)
+  <tf.Tensor: shape=(4,), dtype=bool, numpy=array([ True, False, False,  True])>
+
+  >>> y = tf.constant([False, False, True, True])
+  >>> z = tf.constant([False, True, False, True])
+  >>> tf.math.logical_xor(y, z)
+  <tf.Tensor: shape=(4,), dtype=bool, numpy=array([False,  True,  True, False])>
 
   Args:
-      x: A `Tensor` type bool.
-      y: A `Tensor` of type bool.
+      x: A `tf.Tensor` type bool.
+      y: A `tf.Tensor` of type bool.
+      name: A name for the operation (optional).
 
   Returns:
-    A `Tensor` of type bool with the same size as that of x or y.
+    A `tf.Tensor` of type bool with the same size as that of x or y.
   """
   # TODO(alemi) Make this a cwise op if people end up relying on it.
   return gen_math_ops.logical_and(
@@ -1276,6 +1297,48 @@ def logical_xor(x, y, name="LogicalXor"):
       name=name)
 
 
+@tf_export("math.logical_and", "logical_and")
+@dispatch.add_dispatch_support
+def logical_and(x, y, name=None):
+  """Logical AND function.
+
+  The operation works for the following input types:
+
+  - Two single elements of type `bool`
+  - One `tf.Tensor` of type `bool` and one single `bool`, where the result will
+    be calculated by applying logical AND with the single element to each
+    element in the larger Tensor.
+  - Two `tf.Tensor` objects of type `bool` of the same shape. In this case,
+    the result will be the element-wise logical AND of the two input tensors.
+
+  Usage:
+
+  >>> a = tf.constant([True])
+  >>> b = tf.constant([False])
+  >>> tf.math.logical_and(a, b)
+  <tf.Tensor: shape=(1,), dtype=bool, numpy=array([False])>
+
+  >>> c = tf.constant([True])
+  >>> x = tf.constant([False, True, True, False])
+  >>> tf.math.logical_and(c, x)
+  <tf.Tensor: shape=(4,), dtype=bool, numpy=array([False,  True,  True, False])>
+
+  >>> y = tf.constant([False, False, True, True])
+  >>> z = tf.constant([False, True, False, True])
+  >>> tf.math.logical_and(y, z)
+  <tf.Tensor: shape=(4,), dtype=bool, numpy=array([False, False, False,  True])>
+
+  Args:
+      x: A `tf.Tensor` type bool.
+      y: A `tf.Tensor` of type bool.
+      name: A name for the operation (optional).
+
+  Returns:
+    A `tf.Tensor` of type bool with the same size as that of x or y.
+  """
+  return gen_math_ops.logical_and(x, y, name)
+
+
 _OverrideBinaryOperatorHelper(gen_math_ops.logical_and, "and")
 _OverrideBinaryOperatorHelper(gen_math_ops.logical_or, "or")
 _OverrideBinaryOperatorHelper(logical_xor, "xor")
@@ -1400,18 +1463,25 @@ def range(start, limit=None, delta=1, dtype=None, name="range"):  # pylint: disa
   For example:
 
   ```python
-  start = 3
-  limit = 18
-  delta = 3
-  tf.range(start, limit, delta)  # [3, 6, 9, 12, 15]
+  >>> start = 3
+  >>> limit = 18
+  >>> delta = 3
+  >>> tf.range(start, limit, delta)
+  <tf.Tensor: shape=(5,), dtype=int32,
+  numpy=array([ 3,  6,  9, 12, 15], dtype=int32)>
 
-  start = 3
-  limit = 1
-  delta = -0.5
-  tf.range(start, limit, delta)  # [3, 2.5, 2, 1.5]
+  >>> start = 3
+  >>> limit = 1
+  >>> delta = -0.5
+  >>> tf.range(start, limit, delta)
+  <tf.Tensor: shape=(4,), dtype=float32,
+  numpy=array([3. , 2.5, 2. , 1.5], dtype=float32)>
+
+  >>> limit = 5
+  >>> tf.range(limit)
+  <tf.Tensor: shape=(5,), dtype=int32,
+  numpy=array([0, 1, 2, 3, 4], dtype=int32)>
 
-  limit = 5
-  tf.range(limit)  # [0, 1, 2, 3, 4]
   ```
 
   Args:
@@ -1809,22 +1879,24 @@ def reduce_mean_v1(input_tensor,
                    keep_dims=None):
   """Computes the mean of elements across dimensions of a tensor.
 
-  Reduces `input_tensor` along the dimensions given in `axis`.
+  Reduces `input_tensor` along the dimensions given in `axis` by computing the
+  mean of elements across the dimensions in `axis`.
   Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
   entry in `axis`. If `keepdims` is true, the reduced dimensions
   are retained with length 1.
 
-  If `axis` is None, all dimensions are reduced, and a
-  tensor with a single element is returned.
+  If `axis` is None, all dimensions are reduced, and a tensor with a single
+  element is returned.
 
   For example:
 
-  ```python
-  x = tf.constant([[1., 1.], [2., 2.]])
-  tf.reduce_mean(x)  # 1.5
-  tf.reduce_mean(x, 0)  # [1.5, 1.5]
-  tf.reduce_mean(x, 1)  # [1.,  2.]
-  ```
+  >>> x = tf.constant([[1., 1.], [2., 2.]])
+  >>> tf.reduce_mean(x)
+  <tf.Tensor: shape=(), dtype=float32, numpy=1.5>
+  >>> tf.reduce_mean(x, 0)
+  <tf.Tensor: shape=(2,), dtype=float32, numpy=array([1.5, 1.5], dtype=float32)>
+  >>> tf.reduce_mean(x, 1)
+  <tf.Tensor: shape=(2,), dtype=float32, numpy=array([1., 2.], dtype=float32)>
 
   Args:
     input_tensor: The tensor to reduce. Should have numeric type.
@@ -1847,12 +1919,12 @@ def reduce_mean_v1(input_tensor,
   hand, `tf.reduce_mean` has an aggressive type inference from `input_tensor`,
   for example:
 
-  ```python
-  x = tf.constant([1, 0, 1, 0])
-  tf.reduce_mean(x)  # 0
-  y = tf.constant([1., 0., 1., 0.])
-  tf.reduce_mean(y)  # 0.5
-  ```
+  >>> x = tf.constant([1, 0, 1, 0])
+  >>> tf.reduce_mean(x)
+  <tf.Tensor: shape=(), dtype=int32, numpy=0>
+  >>> y = tf.constant([1., 0., 1., 0.])
+  >>> tf.reduce_mean(y)
+  <tf.Tensor: shape=(), dtype=float32, numpy=0.5>
 
   @end_compatibility
   """
@@ -1869,22 +1941,24 @@ def reduce_mean_v1(input_tensor,
 def reduce_mean(input_tensor, axis=None, keepdims=False, name=None):
   """Computes the mean of elements across dimensions of a tensor.
 
-  Reduces `input_tensor` along the dimensions given in `axis`.
+  Reduces `input_tensor` along the dimensions given in `axis` by computing the
+  mean of elements across the dimensions in `axis`.
   Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
-  entry in `axis`. If `keepdims` is true, the reduced dimensions
-  are retained with length 1.
+  entry in `axis`. If `keepdims` is true, the reduced dimensions are retained
+  with length 1.
 
-  If `axis` is None, all dimensions are reduced, and a
-  tensor with a single element is returned.
+  If `axis` is None, all dimensions are reduced, and a tensor with a single
+  element is returned.
 
   For example:
 
-  ```python
-  x = tf.constant([[1., 1.], [2., 2.]])
-  tf.reduce_mean(x)  # 1.5
-  tf.reduce_mean(x, 0)  # [1.5, 1.5]
-  tf.reduce_mean(x, 1)  # [1.,  2.]
-  ```
+  >>> x = tf.constant([[1., 1.], [2., 2.]])
+  >>> tf.reduce_mean(x)
+  <tf.Tensor: shape=(), dtype=float32, numpy=1.5>
+  >>> tf.reduce_mean(x, 0)
+  <tf.Tensor: shape=(2,), dtype=float32, numpy=array([1.5, 1.5], dtype=float32)>
+  >>> tf.reduce_mean(x, 1)
+  <tf.Tensor: shape=(2,), dtype=float32, numpy=array([1., 2.], dtype=float32)>
 
   Args:
     input_tensor: The tensor to reduce. Should have numeric type.
@@ -1905,12 +1979,12 @@ def reduce_mean(input_tensor, axis=None, keepdims=False, name=None):
   hand, `tf.reduce_mean` has an aggressive type inference from `input_tensor`,
   for example:
 
-  ```python
-  x = tf.constant([1, 0, 1, 0])
-  tf.reduce_mean(x)  # 0
-  y = tf.constant([1., 0., 1., 0.])
-  tf.reduce_mean(y)  # 0.5
-  ```
+  >>> x = tf.constant([1, 0, 1, 0])
+  >>> tf.reduce_mean(x)
+  <tf.Tensor: shape=(), dtype=int32, numpy=0>
+  >>> y = tf.constant([1., 0., 1., 0.])
+  >>> tf.reduce_mean(y)
+  <tf.Tensor: shape=(), dtype=float32, numpy=0.5>
 
   @end_compatibility
   """
@@ -3011,8 +3085,6 @@ def _as_indexed_slices_list(inputs, optimize=True):
 def add_n(inputs, name=None):
   """Adds all input tensors element-wise.
 
-  Converts `IndexedSlices` objects into dense tensors prior to adding.
-
   `tf.math.add_n` performs the same operation as `tf.math.accumulate_n`, but it
   waits for all of its inputs to be ready before beginning to sum.
   This buffering can result in higher memory consumption when inputs are ready
@@ -3026,19 +3098,21 @@ def add_n(inputs, name=None):
 
   For example:
 
-  ```python
-  a = tf.constant([[3, 5], [4, 8]])
-  b = tf.constant([[1, 6], [2, 9]])
-  tf.math.add_n([a, b, a])  # [[7, 16], [10, 25]]
-  ```
+  >>> a = tf.constant([[3, 5], [4, 8]])
+  >>> b = tf.constant([[1, 6], [2, 9]])
+  >>> tf.math.add_n([a, b, a])
+  <tf.Tensor: shape=(2, 2), dtype=int32, numpy=
+  array([[ 7, 16],
+         [10, 25]], dtype=int32)>
 
   Args:
-    inputs: A list of `tf.Tensor` or `tf.IndexedSlices` objects, each with same
-      shape and type.
+    inputs: A list of `tf.Tensor` or `tf.IndexedSlices` objects, each with the
+      same shape and type. `tf.IndexedSlices` objects will be converted into
+      dense tensors prior to adding.
     name: A name for the operation (optional).
 
   Returns:
-    A `Tensor` of same shape and type as the elements of `inputs`.
+    A `tf.Tensor` of the same shape and type as the elements of `inputs`.
 
   Raises:
     ValueError: If `inputs` don't all have same shape and dtype or the shape
@@ -4251,3 +4325,116 @@ def ndtri(x, name=None):
   """
   with ops.name_scope(name, "ndtri", [x]):
     return gen_math_ops.ndtri(x)
+
+
+@tf_export("math.ceil", v1=["math.ceil", "ceil"])
+@deprecation.deprecated_endpoints("ceil")
+@dispatch.add_dispatch_support
+def ceil(x, name=None):
+  """Return the ceiling of the input, element-wise.
+
+  For example:
+
+  >>> tf.math.ceil([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0])
+  <tf.Tensor: shape=(7,), dtype=float32,
+  numpy=array([-1., -1., -0.,  1.,  2.,  2.,  2.], dtype=float32)>
+
+  Args:
+    x: A `tf.Tensor`. Must be one of the following types: `bfloat16`, `half`,
+      `float32`, `float64`. `int32`
+    name: A name for the operation (optional).
+
+  Returns:
+    A `tf.Tensor`. Has the same type as `x`.
+
+  @compatibility(numpy)
+  Equivalent to np.ceil
+  @end_compatibility
+  """
+  return gen_math_ops.ceil(x, name)
+
+
+@tf_export("math.sqrt", "sqrt")
+@dispatch.add_dispatch_support
+def sqrt(x, name=None):  # pylint: disable=redefined-builtin
+  r"""Computes element-wise square root of the input tensor.
+
+  Note: This operation does not support integer types.
+
+  >>> x = tf.constant([[4.0], [16.0]])
+  >>> tf.sqrt(x)
+  <tf.Tensor: shape=(2, 1), dtype=float32, numpy=
+    array([[2.],
+           [4.]], dtype=float32)>
+  >>> y = tf.constant([[-4.0], [16.0]])
+  >>> tf.sqrt(y)
+  <tf.Tensor: shape=(2, 1), dtype=float32, numpy=
+    array([[nan],
+           [ 4.]], dtype=float32)>
+  >>> z = tf.constant([[-1.0], [16.0]], dtype=tf.complex128)
+  >>> tf.sqrt(z)
+  <tf.Tensor: shape=(2, 1), dtype=complex128, numpy=
+    array([[...+1.j],
+           [4...+0.j]])>
+
+  Note: In order to support complex complex, please provide an input tensor
+  of `complex64` or `complex128`.
+
+  Args:
+    x: A `tf.Tensor` of type `bfloat16`, `half`, `float32`, `float64`,
+      `complex64`, `complex128`
+    name: A name for the operation (optional).
+
+  Returns:
+    A `tf.Tensor` of same size, type and sparsity as `x`.
+  """
+  return gen_math_ops.sqrt(x, name)
+
+
+# pylint: disable=g-docstring-has-escape
+@tf_export("math.exp", "exp")
+@dispatch.add_dispatch_support
+def exp(x, name=None):
+  """Computes exponential of x element-wise.  \\(y = e^x\\).
+
+  This function computes the exponential of the input tensor element-wise.
+  i.e. `math.exp(x)` or \\(e^x\\), where `x` is the input tensor.
+  \\(e\\) denotes Euler's number and is approximately equal to 2.718281.
+  Output is positive for any real input.
+
+  >>> x = tf.constant(2.0)
+  >>> tf.math.exp(x)
+  <tf.Tensor: shape=(), dtype=float32, numpy=7.389056>
+
+  >>> x = tf.constant([2.0, 8.0])
+  >>> tf.math.exp(x)
+  <tf.Tensor: shape=(2,), dtype=float32,
+  numpy=array([   7.389056, 2980.958   ], dtype=float32)>
+
+  For complex numbers, the exponential value is calculated as
+  \\(e^{x+iy}={e^x}{e^{iy}}={e^x}{\\cos(y)+i\\sin(y)}\\)
+
+  For `1+1j` the value would be computed as:
+  \\(e^1{\\cos(1)+i\\sin(1)} = 2.7182817 \\times (0.5403023+0.84147096j)\\)
+
+  >>> x = tf.constant(1 + 1j)
+  >>> tf.math.exp(x)
+  <tf.Tensor: shape=(), dtype=complex128,
+  numpy=(1.4686939399158851+2.2873552871788423j)>
+
+  Args:
+    x: A `tf.Tensor`. Must be one of the following types: `bfloat16`, `half`,
+      `float32`, `float64`, `complex64`, `complex128`.
+    name: A name for the operation (optional).
+
+  Returns:
+    A `tf.Tensor`. Has the same type as `x`.
+
+  @compatibility(numpy)
+  Equivalent to np.exp
+  @end_compatibility
+  """
+  return gen_math_ops.exp(x, name)
+
+
+# pylint: enable=g-docstring-has-escape
diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py
index f49ba3dd2a3..54df055b5f7 100644
--- a/tensorflow/python/ops/math_ops_test.py
+++ b/tensorflow/python/ops/math_ops_test.py
@@ -443,6 +443,16 @@ class DivAndModTest(test_util.TensorFlowTestCase):
     np_result = np.divide(nums, divs)
     self.assertAllClose(tf_result, np_result)
 
+  def testDivideType(self):
+    a = array_ops.constant([2], dtype=dtypes.int32)
+    # Since __future__.division is effect, we should always upgrade to float64
+    b = math_ops.divide(a, 1)
+    self.assertEqual(b.dtype, dtypes.float64)
+    self.assertEqual(2.0, self.evaluate(b))
+    c = math_ops.divide(a, 4)
+    self.assertEqual(c.dtype, dtypes.float64)
+    self.assertEqual(0.5, self.evaluate(c))
+
   def testComplexDiv(self):
     foo = array_ops.constant([1. + 3.j])
     _ = math_ops.divide(foo, 1.)
@@ -689,5 +699,42 @@ class RangeTest(test_util.TensorFlowTestCase):
     self.assertAllEqual(values, self.evaluate(tensor))
 
 
+@test_util.run_all_in_graph_and_eager_modes
+class ScalarOptimizationTest(test_util.TensorFlowTestCase):
+
+  def testAddZero(self):
+    x = constant_op.constant(1)
+    y = math_ops.add_v2(x, 0)
+    self.assertAllEqual(x, y)
+    self.assertIs(x, y)
+
+    # Optimization not applied
+    y = math_ops.add_v2(x, constant_op.constant(0))
+    self.assertAllEqual(x, y)
+    self.assertIsNot(x, y)
+
+  def testSubtractZero(self):
+    x = constant_op.constant(1)
+    y = math_ops.subtract(x, 0)
+    self.assertAllEqual(x, y)
+    self.assertIs(x, y)
+
+    # Optimization not applied
+    y = math_ops.subtract(x, constant_op.constant(0))
+    self.assertAllEqual(x, y)
+    self.assertIsNot(x, y)
+
+  def testMultiplyOne(self):
+    x = constant_op.constant(1)
+    y = math_ops.multiply(x, 1)
+    self.assertAllEqual(x, y)
+    self.assertIs(x, y)
+
+    # Optimization not applied
+    y = math_ops.multiply(x, constant_op.constant(1))
+    self.assertAllEqual(x, y)
+    self.assertIsNot(x, y)
+
+
 if __name__ == "__main__":
   googletest.main()
diff --git a/tensorflow/python/ops/matmul_benchmark_test.py b/tensorflow/python/ops/matmul_benchmark_test.py
deleted file mode 100644
index 3df0c66ef9c..00000000000
--- a/tensorflow/python/ops/matmul_benchmark_test.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for matmul_benchmark.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import itertools
-import numpy as np
-
-from tensorflow.core.framework import graph_pb2
-from tensorflow.core.framework import node_def_pb2
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import matmul_benchmark
-from tensorflow.python.platform import test as googletest
-from tensorflow.python.platform import tf_logging
-
-
-def BuildGraphTest(n, m, k, transpose_a, transpose_b, dtype):
-
-  def Test(self):
-    if not googletest.is_gpu_available():
-      tf_logging.info("Skipping BuildGraphTest %s",
-                      (n, m, k, transpose_a, transpose_b))
-      return
-    tf_logging.info("Testing BuildGraphTest %s",
-                    (n, m, k, transpose_a, transpose_b))
-    self._VerifyBuildGraph(n, m, k, transpose_a, transpose_b, dtype)
-
-  return Test
-
-
-def RunGraphTest(n, m, k, transpose_a, transpose_b, dtype):
-
-  def Test(self):
-    if not googletest.is_gpu_available():
-      tf_logging.info("Skipping RunGraphTest %s",
-                      (n, m, k, transpose_a, transpose_b))
-      return
-    tf_logging.info("Testing RunGraphTest %s",
-                    (n, m, k, transpose_a, transpose_b))
-    self._VerifyRunGraph(n, m, k, transpose_a, transpose_b, dtype)
-
-  return Test
-
-
-class MatmulBenchmarkTest(googletest.TestCase):
-
-  def _StripNode(self, nd):
-    snode = node_def_pb2.NodeDef(name=nd.name, op=nd.op, input=nd.input)
-    if nd.device:
-      snode.device = nd.device
-    return snode
-
-  def _StripGraph(self, gd):
-    return graph_pb2.GraphDef(node=[self._StripNode(nd) for nd in gd.node])
-
-  def _VerifyBuildGraph(self, n, m, k, transpose_a, transpose_b, dtype):
-    graph = ops.Graph()
-    with graph.as_default():
-      matmul_benchmark.build_graph(googletest.gpu_device_name(), n, m, k,
-                                   transpose_a, transpose_b, dtype)
-      gd = graph.as_graph_def()
-      dev = googletest.gpu_device_name()
-      proto_expected = """
-      node { name: "random_uniform/shape" op: "Const" device: \"""" + dev + """\" }
-      node { name: "random_uniform/min" op: "Const" device: \"""" + dev + """\" }
-      node { name: "random_uniform/max" op: "Const" device: \"""" + dev + """\" }
-      node { name: "random_uniform/RandomUniform" op: "RandomUniform" input: "random_uniform/shape" device: \"""" + dev + """\" }
-      node { name: "random_uniform/sub" op: "Sub" input: "random_uniform/max" input: "random_uniform/min" device: \"""" + dev + """\" }
-      node { name: "random_uniform/mul" op: "Mul" input: "random_uniform/RandomUniform" input: "random_uniform/sub" device: \"""" + dev + """\" }
-      node { name: "random_uniform" op: "Add" input: "random_uniform/mul" input: "random_uniform/min" device: \"""" + dev + """\" }
-      node { name: "Variable" op: "VariableV2" device: \"""" + dev + """\" }
-      node { name: "Variable/Assign" op: "Assign" input: "Variable" input: "random_uniform" device: \"""" + dev + """\" }
-      node { name: "Variable/read" op: "Identity" input: "Variable" device: \"""" + dev + """\" }
-      node { name: "random_uniform_1/shape" op: "Const" device: \"""" + dev + """\" }
-      node { name: "random_uniform_1/min" op: "Const" device: \"""" + dev + """\" }
-      node { name: "random_uniform_1/max" op: "Const" device: \"""" + dev + """\" }
-      node { name: "random_uniform_1/RandomUniform" op: "RandomUniform" input: "random_uniform_1/shape" device: \"""" + dev + """\" }
-      node { name: "random_uniform_1/sub" op: "Sub" input: "random_uniform_1/max" input: "random_uniform_1/min" device: \"""" + dev + """\" }
-      node { name: "random_uniform_1/mul" op: "Mul" input: "random_uniform_1/RandomUniform" input: "random_uniform_1/sub" device: \"""" + dev + """\" }
-      node { name: "random_uniform_1" op: "Add" input: "random_uniform_1/mul" input: "random_uniform_1/min" device: \"""" + dev + """\" }
-      node { name: "Variable_1" op: "VariableV2" device: \"""" + dev + """\" }
-      node { name: "Variable_1/Assign" op: "Assign" input: "Variable_1" input: "random_uniform_1" device: \"""" + dev + """\" }
-      node { name: "Variable_1/read" op: "Identity" input: "Variable_1" device: \"""" + dev + """\" }
-      node { name: "MatMul" op: "MatMul" input: "Variable/read" input: "Variable_1/read" device: \"""" + dev + """\" }
-      node { name: "group_deps" op: "NoOp" input: "^MatMul" device: \"""" + dev + """\" }
-                       """
-      self.assertProtoEquals(str(proto_expected), self._StripGraph(gd))
-
-  def _VerifyRunGraph(self, n, m, k, transpose_a, transpose_b, dtype):
-    benchmark_instance = matmul_benchmark.MatmulBenchmark()
-    duration = benchmark_instance.run_graph(googletest.gpu_device_name(), n, m,
-                                            k, transpose_a, transpose_b, 1,
-                                            dtype)
-    self.assertTrue(duration > 1e-6)
-
-
-if __name__ == "__main__":
-  dtypes = [np.float32, np.float64]
-  index = 0
-  for _dtype in dtypes:
-    for _n, _m, (_transpose_a, _transpose_b) in itertools.product(
-        [512, 1024], [1, 8, 16, 128], [(False, False), (True, False),
-                                       (False, True)]):
-      _k = _n
-      setattr(MatmulBenchmarkTest, "testBuildGraph_" + str(index),
-              BuildGraphTest(_n, _m, _k, _transpose_a, _transpose_b, _dtype))
-      setattr(MatmulBenchmarkTest, "testRunGraph_" + str(index),
-              RunGraphTest(_n, _m, _k, _transpose_a, _transpose_b, _dtype))
-      index += 1
-  googletest.main()
diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py
index 73d761debc7..2b091464154 100644
--- a/tensorflow/python/ops/nn_impl.py
+++ b/tensorflow/python/ops/nn_impl.py
@@ -38,6 +38,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.ops.losses import util as losses_util
+from tensorflow.python.platform import device_context
 from tensorflow.python.util.deprecation import deprecated_args
 from tensorflow.python.util.deprecation import deprecated_argument_lookup
 from tensorflow.python.util.tf_export import tf_export
@@ -707,22 +708,6 @@ def zero_fraction(value, name=None):
     return array_ops.identity(zero_fraction_float32, "fraction")
 
 
-# copybara:strip_begin
-# TODO(b/138808492): Remove code inside copybara
-# to make TPU code and CPU code consistent.
-def _enclosing_tpu_context():
-  # pylint: disable=protected-access
-  context = ops.get_default_graph()._get_control_flow_context()
-  # pylint: enable=protected-access
-  while context is not None and not isinstance(
-      context, control_flow_ops.XLAControlFlowContext):
-    context = context.outer_context
-  return context
-
-
-# copybara:strip_end
-
-
 # pylint: disable=redefined-builtin
 @tf_export(v1=["nn.depthwise_conv2d"])
 def depthwise_conv2d(input,
@@ -782,11 +767,8 @@ def depthwise_conv2d(input,
     if rate is None:
       rate = [1, 1]
 
-    # copybara:strip_begin
-    # TODO(b/138808492): Remove code inside copybara
-    # to make TPU code and CPU code consistent.
     # Use depthwise_conv2d_native if executing on TPU.
-    if _enclosing_tpu_context() is not None:
+    if device_context.enclosing_tpu_context() is not None:
       if data_format == "NCHW":
         dilations = [1, 1, rate[0], rate[1]]
       else:
@@ -799,7 +781,6 @@ def depthwise_conv2d(input,
           data_format=data_format,
           dilations=dilations,
           name=name)
-    # copybara:strip_end
 
     def op(input_converted, _, padding):
       return nn_ops.depthwise_conv2d_native(
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 5ed5bf87408..0058e9629ef 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -36,10 +36,6 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
-# copybara:strip_begin
-# TODO(b/138808492): Remove code inside copybara
-from tensorflow.python.ops import control_flow_ops
-# copybara:strip_end
 from tensorflow.python.ops import gen_math_ops
 from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import math_ops
@@ -48,6 +44,7 @@ from tensorflow.python.ops import random_ops
 # pylint: disable=wildcard-import
 from tensorflow.python.ops.gen_nn_ops import *
 # pylint: enable=wildcard-import
+from tensorflow.python.platform import device_context
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import deprecation
 from tensorflow.python.util.compat import collections_abc
@@ -927,22 +924,6 @@ convolution_v2.__doc__ = deprecation.rewrite_argument_docstring(
     "filter", "filters")
 
 
-# copybara:strip_begin
-# TODO(b/138808492): Remove code inside copybara
-# to make TPU code and CPU code consistent.
-def _enclosing_tpu_context():
-  # pylint: disable=protected-access
-  run_context = ops.get_default_graph()._get_control_flow_context()
-  # pylint: enable=protected-access
-  while run_context is not None and not isinstance(
-      run_context, control_flow_ops.XLAControlFlowContext):
-    run_context = run_context.outer_context
-  return run_context
-
-
-# copybara:strip_end
-
-
 def convolution_internal(
     input,  # pylint: disable=redefined-builtin
     filters,
@@ -980,28 +961,20 @@ def convolution_internal(
   strides = _get_sequence(strides, n, channel_index, "strides")
   dilations = _get_sequence(dilations, n, channel_index, "dilations")
 
-  # copybara:strip_begin
-  # TODO(b/138808492): Remove code inside copybara
-  # to make TPU code and CPU code consistent.
   scopes = {1: "conv1d", 2: "Conv2D", 3: "Conv3D"}
-  if not call_from_convolution and _enclosing_tpu_context() is not None:
+  if not call_from_convolution and device_context.enclosing_tpu_context(
+  ) is not None:
     scope = scopes[n]
   else:
     scope = "convolution"
-  # copybara:strip_end
-  # copybara:insert scope = "convolution"
 
   with ops.name_scope(name, scope, [input, filters]) as name:
     conv_ops = {1: conv1d, 2: gen_nn_ops.conv2d, 3: gen_nn_ops.conv3d}
 
-    # copybara:strip_begin
-    # TODO(b/138808492): Remove code inside copybara
-    # to make TPU code and CPU code consistent.
-    if _enclosing_tpu_context() is not None or all(i == 1 for i in dilations):
+    if device_context.enclosing_tpu_context() is not None or all(
+        i == 1 for i in dilations):
       # fast path for TPU or if no dilation as gradient only supported on GPU
       # for dilations
-      # copybara:strip_end
-      # copybara:insert if all(i == 1 for i in dilations):
       op = conv_ops[n]
       return op(
           input,
@@ -1120,11 +1093,8 @@ class Convolution(object):
         name=self.name)
 
   def __call__(self, inp, filter):  # pylint: disable=redefined-builtin
-    # copybara:strip_begin
-    # TODO(b/138808492): Remove code inside copybara
-    # to make TPU code and CPU code consistent.
     # TPU convolution supports dilations greater than 1.
-    if _enclosing_tpu_context() is not None:
+    if device_context.enclosing_tpu_context() is not None:
       return convolution_internal(
           inp,
           filter,
@@ -1136,8 +1106,6 @@ class Convolution(object):
           call_from_convolution=False)
     else:
       return self.conv_op(inp, filter)
-    # copybara:strip_end
-    # copybara:insert return self.conv_op(inp, filter)
 
 
 @tf_export(v1=["nn.pool"])
diff --git a/tensorflow/python/ops/ragged/BUILD b/tensorflow/python/ops/ragged/BUILD
index 010a1a4cd71..083953ee837 100644
--- a/tensorflow/python/ops/ragged/BUILD
+++ b/tensorflow/python/ops/ragged/BUILD
@@ -6,6 +6,7 @@ package(
         "//intelligence/datum/prensor:__pkg__",
         "//learning/brain/contrib/text:__pkg__",
         "//nlp/nlx/bert:__pkg__",
+        "//nlp/nlx/i18n/saft:__subpackages__",
         "//nlp/nlx/infrastructure/multiscale:__subpackages__",
         "//nlp/projects/atc/tf/ops:__pkg__",
         "//research/socrates:__subpackages__",
diff --git a/tensorflow/python/ops/random_ops.py b/tensorflow/python/ops/random_ops.py
index f9208cca551..75c5fd5c5d2 100644
--- a/tensorflow/python/ops/random_ops.py
+++ b/tensorflow/python/ops/random_ops.py
@@ -50,6 +50,24 @@ def random_normal(shape,
                   name=None):
   """Outputs random values from a normal distribution.
 
+  Example that generates a new set of random values every time:
+
+  >>> tf.random.set_seed(5);
+  >>> tf.random.normal([4], 0, 1, tf.float32)
+  <tf.Tensor: shape=(4,), dtype=float32, numpy=..., dtype=float32)>
+
+  Example that outputs a reproduceable result:
+
+  >>> tf.random.set_seed(5);
+  >>> tf.random.normal([2,2], 0, 1, tf.float32, seed=1)
+  <tf.Tensor: shape=(2, 2), dtype=float32, numpy=
+  array([[-1.3768897 , -0.01258316],
+        [-0.169515   ,  1.0824056 ]], dtype=float32)>
+
+  In this case, we are setting both the global and operation-level seed to
+  ensure this result is reproduceable.  See `tf.random.set_seed` for more
+  information.
+
   Args:
     shape: A 1-D integer Tensor or Python array. The shape of the output tensor.
     mean: A Tensor or Python value of type `dtype`, broadcastable with `stddev`.
@@ -59,7 +77,7 @@ def random_normal(shape,
     dtype: The type of the output.
     seed: A Python integer. Used to create a random seed for the distribution.
       See
-      `tf.compat.v1.set_random_seed`
+      `tf.random.set_seed`
       for behavior.
     name: A name for the operation (optional).
 
@@ -109,7 +127,7 @@ def parameterized_truncated_normal(shape,
     dtype: The type of the output.
     seed: A Python integer. Used to create a random seed for the distribution.
       See
-      `tf.compat.v1.set_random_seed`
+      `tf.random.set_seed`
       for behavior.
     name: A name for the operation (optional).
 
@@ -160,7 +178,7 @@ def truncated_normal(shape,
     dtype: The type of the output.
     seed: A Python integer. Used to create a random seed for the distribution.
       See
-      `tf.compat.v1.set_random_seed`
+      `tf.random.set_seed`
       for behavior.
     name: A name for the operation (optional).
 
@@ -325,7 +343,7 @@ def random_shuffle(value, seed=None, name=None):
     value: A Tensor to be shuffled.
     seed: A Python integer. Used to create a random seed for the distribution.
       See
-      `tf.compat.v1.set_random_seed`
+      `tf.random.set_seed`
       for behavior.
     name: A name for the operation (optional).
 
@@ -354,7 +372,7 @@ def random_crop(value, size, seed=None, name=None):
     value: Input tensor to crop.
     size: 1-D tensor with size the rank of `value`.
     seed: Python integer. Used to create a random seed. See
-      `tf.compat.v1.set_random_seed`
+      `tf.random.set_seed`
       for behavior.
     name: A name for this operation (optional).
 
@@ -401,7 +419,7 @@ def multinomial(logits, num_samples, seed=None, name=None, output_dtype=None):
       `[i, :]` represents the unnormalized log-probabilities for all classes.
     num_samples: 0-D.  Number of independent samples to draw for each row slice.
     seed: A Python integer. Used to create a random seed for the distribution.
-      See `tf.compat.v1.set_random_seed` for behavior.
+      See `tf.random.set_seed` for behavior.
     name: Optional name for the operation.
     output_dtype: integer type to use for the output. Defaults to int64.
 
@@ -430,7 +448,7 @@ def categorical(logits, num_samples, dtype=None, seed=None, name=None):
     num_samples: 0-D.  Number of independent samples to draw for each row slice.
     dtype: integer type to use for the output. Defaults to int64.
     seed: A Python integer. Used to create a random seed for the distribution.
-      See `tf.compat.v1.set_random_seed` for behavior.
+      See `tf.random.set_seed` for behavior.
     name: Optional name for the operation.
 
   Returns:
@@ -521,7 +539,7 @@ def random_gamma(shape,
       `float64`.
     seed: A Python integer. Used to create a random seed for the distributions.
       See
-      `tf.compat.v1.set_random_seed`
+      `tf.random.set_seed`
       for behavior.
     name: Optional name for the operation.
 
@@ -583,7 +601,7 @@ def random_poisson(lam, shape, dtype=dtypes.float32, seed=None, name=None):
       `int64`.
     seed: A Python integer. Used to create a random seed for the distributions.
       See
-      `tf.compat.v1.set_random_seed`
+      `tf.random.set_seed`
       for behavior.
     name: Optional name for the operation.
 
@@ -622,7 +640,7 @@ def random_poisson_v2(shape, lam, dtype=dtypes.float32, seed=None, name=None):
       `int64`.
     seed: A Python integer. Used to create a random seed for the distributions.
       See
-      `tf.compat.v1.set_random_seed`
+      `tf.random.set_seed`
       for behavior.
     name: Optional name for the operation.
 
diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py
index d28c9e75622..29f24134e1c 100644
--- a/tensorflow/python/ops/string_ops.py
+++ b/tensorflow/python/ops/string_ops.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -390,16 +391,39 @@ reduce_join.__doc__ = reduce_join_v2.__doc__
 @tf_export(v1=["strings.length"])
 @dispatch.add_dispatch_support
 def string_length(input, name=None, unit="BYTE"):
+  """Computes the length of each string given in the input tensor.
+
+  >>> strings = tf.constant(['Hello','TensorFlow', '🙂'])
+  >>> tf.strings.length(strings).numpy() # default counts bytes
+  array([ 5, 10, 4], dtype=int32)
+  >>> tf.strings.length(strings, unit="UTF8_CHAR").numpy()
+  array([ 5, 10, 1], dtype=int32)
+
+  Args:
+    input: A `Tensor` of type `string`. The strings for which to compute the
+      length for each element.
+    name: A name for the operation (optional).
+    unit: An optional `string` from: `"BYTE", "UTF8_CHAR"`. Defaults to
+      `"BYTE"`. The unit that is counted to compute string length.  One of:
+        `"BYTE"` (for the number of bytes in each string) or `"UTF8_CHAR"` (for
+        the number of UTF-8 encoded Unicode code points in each string). Results
+        are undefined if `unit=UTF8_CHAR` and the `input` strings do not contain
+        structurally valid UTF-8.
+
+  Returns:
+    A `Tensor` of type `int32`, containing the length of the input string in
+    the same element of the input tensor.
+  """
   return gen_string_ops.string_length(input, unit=unit, name=name)
 
 
 @tf_export("strings.length", v1=[])
 @dispatch.add_dispatch_support
 def string_length_v2(input, unit="BYTE", name=None):
-  return string_length(input, name, unit)
+  return gen_string_ops.string_length(input, unit=unit, name=name)
 
 
-string_length.__doc__ = gen_string_ops.string_length.__doc__
+string_length_v2.__doc__ = gen_string_ops.string_length.__doc__
 
 
 @tf_export(v1=["substr"])
diff --git a/tensorflow/python/ops/tensor_array_ops_test.py b/tensorflow/python/ops/tensor_array_ops_test.py
new file mode 100644
index 00000000000..4f09ff5c22d
--- /dev/null
+++ b/tensorflow/python/ops/tensor_array_ops_test.py
@@ -0,0 +1,77 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tensor_array_ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.eager import def_function
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import tensor_array_ops
+from tensorflow.python.platform import test
+
+
+class TensorArrayOpsTest(test.TestCase):
+
+  @test_util.run_v1_only('Testing placeholders specifically.')
+  def test_concat_graph(self):
+    values = tensor_array_ops.TensorArray(
+        size=4, dtype=dtypes.string, element_shape=[None], infer_shape=False)
+    a = array_ops.placeholder(dtypes.string, [
+        None,
+    ])
+    b = array_ops.placeholder(dtypes.string, [
+        None,
+    ])
+    values = (values.write(0, a).write(
+        1, constant_op.constant([], dtypes.string))).write(2, b).write(
+            3, constant_op.constant([], dtypes.string))
+
+    with self.session() as s:
+      result = s.run(values.concat(), {a: ['a', 'b', 'c'], b: ['c', 'd', 'e']})
+    self.assertAllEqual(result, [b'a', b'b', b'c', b'c', b'd', b'e'])
+
+  @test_util.run_v2_only
+  def test_concat(self):
+    values = tensor_array_ops.TensorArray(
+        size=4, dtype=dtypes.string, element_shape=[None], infer_shape=False)
+    a = constant_op.constant(['a', 'b', 'c'], dtypes.string)
+    b = constant_op.constant(['c', 'd', 'e'], dtypes.string)
+    values = (values.write(0, a).write(
+        1, constant_op.constant([], dtypes.string))).write(2, b).write(
+            3, constant_op.constant([], dtypes.string))
+    self.assertAllEqual(values.concat(), [b'a', b'b', b'c', b'c', b'd', b'e'])
+
+  @test_util.run_v2_only
+  def test_concat_in_function(self):
+    @def_function.function
+    def fn(a, b):
+      values = tensor_array_ops.TensorArray(
+          size=4, dtype=dtypes.string, element_shape=[None], infer_shape=False)
+      values = (values.write(0, a).write(
+          1, constant_op.constant([], dtypes.string))).write(2, b).write(
+              3, constant_op.constant([], dtypes.string))
+      return values.concat()
+
+    self.assertAllEqual(fn(['a', 'b', 'c'], ['c', 'd', 'e']),
+                        [b'a', b'b', b'c', b'c', b'd', b'e'])
+
+
+if __name__ == '__main__':
+  test.main()
diff --git a/tensorflow/python/platform/device_context.py b/tensorflow/python/platform/device_context.py
new file mode 100644
index 00000000000..7be2fdb31ee
--- /dev/null
+++ b/tensorflow/python/platform/device_context.py
@@ -0,0 +1,22 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Helpers to get device context."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+def enclosing_tpu_context():
+  pass
diff --git a/tensorflow/python/profiler/tf_stats_proto_to_gviz.py b/tensorflow/python/profiler/tf_stats_proto_to_gviz.py
new file mode 100644
index 00000000000..0c4718912ca
--- /dev/null
+++ b/tensorflow/python/profiler/tf_stats_proto_to_gviz.py
@@ -0,0 +1,93 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains utilities for conversion of TF proto types to GViz types.
+
+Usage:
+    gviz_data_table = generate_chart_table(stats_table)
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import google_type_annotations
+from __future__ import print_function
+
+import gviz_api
+
+
+def get_chart_table_args(stats_table):
+  """Creates gviz DataTable object from a a TensorFlow stats table.
+
+  Args:
+    stats_table: A tf_stats_pb2.TfStatsTable.
+
+  Returns:
+    Returns a gviz_api.DataTable
+  """
+
+  ## Create schema
+  table_description = [
+      ("rank", "number", "Rank"),
+      ("host_or_device", "string", "Host/device"),
+      ("type", "string", "Type"),
+      ("operation", "string", "Operation"),
+      ("occurrences", "number", "#Occurrences"),
+      ("total_time", "number", "Total time (us)"),
+      ("avg_time", "number", "Avg. time (us)"),
+      ("total_self_time", "number", "Total self-time (us)"),
+      ("avg_self_time", "number", "Avg. self-time (us)"),
+      ("device_total_self_time_percent", "number",
+       "Total self-time on Device (%)"),
+      ("device_cumulative_total_self_time_percent", "number",
+       "Cumulative total-self time on Device (%)"),
+      ("host_total_self_time_percent", "number", "Total self-time on Host (%)"),
+      ("Host_cumulative_total_self_time_percent", "number",
+       "Cumulative total-self time on Host (%)"),
+      ("measured_flop_rate", "number", "Measured GFLOPs/Sec"),
+      ("measured_memory_bw", "number", "Measured Memory BW (GBytes/Sec)"),
+      ("operational_intensity", "number", "Operational Intensity (FLOPs/Byte)"),
+      ("bound_by", "string", "Bound by"),
+  ]
+
+  data = []
+  for record in stats_table.tf_stats_record:
+    row = [
+        record.rank,
+        record.host_or_device,
+        record.op_type,
+        record.op_name,
+        record.occurrences,
+        record.total_time_in_us,
+        record.avg_time_in_us,
+        record.total_self_time_in_us,
+        record.avg_self_time_in_us,
+        record.device_total_self_time_as_fraction,
+        record.device_cumulative_total_self_time_as_fraction,
+        record.host_total_self_time_as_fraction,
+        record.host_cumulative_total_self_time_as_fraction,
+        record.measured_flop_rate,
+        record.measured_memory_bw,
+        record.operational_intensity,
+        record.bound_by,
+    ]
+
+    data.append(row)
+
+  return (table_description, data, [])
+
+
+def generate_chart_table(stats_table):
+  (table_description, data,
+   custom_properties) = get_chart_table_args(stats_table)
+  return gviz_api.DataTable(table_description, data, custom_properties)
diff --git a/tensorflow/python/profiler/tf_stats_proto_to_gviz_test.py b/tensorflow/python/profiler/tf_stats_proto_to_gviz_test.py
new file mode 100644
index 00000000000..ab16867cc1c
--- /dev/null
+++ b/tensorflow/python/profiler/tf_stats_proto_to_gviz_test.py
@@ -0,0 +1,133 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# Lint as: python3
+"""Tests for tf_stats_proto_to_gviz."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import csv
+import io
+
+import gviz_api
+
+# pylint: disable=g-direct-tensorflow-import
+from tensorflow.core.profiler.protobuf import tf_stats_pb2
+from tensorflow.python.platform import test
+from tensorflow.python.profiler import tf_stats_proto_to_gviz
+# pylint: enable=g-direct-tensorflow-import
+
+
+class ProtoToGvizTest(test.TestCase):
+
+  @staticmethod
+  def create_empty_stats_table():
+    table = tf_stats_pb2.TfStatsTable()
+
+    return table
+
+  @staticmethod
+  def create_mock_stats_table():
+    table = tf_stats_pb2.TfStatsTable()
+
+    record = table.tf_stats_record.add()
+    record.rank = 100
+    record.host_or_device = "Device"
+    record.op_type = "Compute"
+    record.op_name = "Compute0"
+    record.occurrences = 1
+    record.total_time_in_us = 0.1799
+    record.avg_time_in_us = 0.1799
+    record.total_self_time_in_us = 0.1799
+    record.avg_self_time_in_us = 0.1799
+    record.device_total_self_time_as_fraction = 0.2020
+    record.device_cumulative_total_self_time_as_fraction = 0.7980
+    record.host_total_self_time_as_fraction = 0
+    record.host_cumulative_total_self_time_as_fraction = 0
+    record.measured_flop_rate = 1.6666
+    record.measured_memory_bw = 2.7777
+    record.operational_intensity = 0.6000
+    record.bound_by = "Memory"
+
+    record = table.tf_stats_record.add()
+    record.rank = 200
+    record.host_or_device = "Host"
+    record.op_type = "Loop"
+    record.op_name = "while"
+    record.occurrences = 2
+    record.total_time_in_us = 0.3
+    record.avg_time_in_us = 0.5
+    record.total_self_time_in_us = 0.7
+    record.avg_self_time_in_us = 0.11
+    record.device_total_self_time_as_fraction = 0.13
+    record.device_cumulative_total_self_time_as_fraction = 0.17
+    record.host_total_self_time_as_fraction = 0.19
+    record.host_cumulative_total_self_time_as_fraction = 0.23
+    record.measured_flop_rate = 2.9
+    record.measured_memory_bw = 3.1
+    record.operational_intensity = 0.37
+    record.bound_by = "Compute"
+
+    return table
+
+  def test_stats_table_empty(self):
+    stats_table = ProtoToGvizTest.create_empty_stats_table()
+    data_table = tf_stats_proto_to_gviz.generate_chart_table(stats_table)
+
+    self.assertEqual(0, data_table.NumberOfRows(),
+                     "Empty table should have 0 rows.")
+    # "Stats table has 17 columns as defined in tf_stats.proto."
+    self.assertLen(data_table.columns, 17)
+
+  def test_stats_table_simple(self):
+    stats_table = ProtoToGvizTest.create_mock_stats_table()
+    (table_description, data, custom_properties
+    ) = tf_stats_proto_to_gviz.get_chart_table_args(stats_table)
+    data_table = gviz_api.DataTable(table_description, data, custom_properties)
+
+    # Data is a list of 2 rows.
+    self.assertLen(data, 2)
+    self.assertEqual(2, data_table.NumberOfRows(), "Simple table has 2 rows.")
+    # Table descriptor is a list of 17 columns.
+    self.assertLen(table_description, 17)
+    # Stats table has 17 columns as defined in tf_stats.proto.
+    self.assertLen(data_table.columns, 17)
+
+    csv_file = io.StringIO(data_table.ToCsv())
+    reader = csv.reader(csv_file)
+
+    for (rr, row_values) in enumerate(reader):
+      if rr == 0:
+        for (cc, column_header) in enumerate(row_values):
+          self.assertEqual(table_description[cc][2], column_header)
+      else:
+        for (cc, cell_str) in enumerate(row_values):
+          raw_value = data[rr - 1][cc]
+          value_type = table_description[cc][1]
+
+          # Only number and strings are used in our (tf_stats) proto.
+          self.assertIn(value_type, ["number", "string"])
+
+          # Encode in similar fashion as DataTable.ToCsv()
+          expected_value = gviz_api.DataTable.CoerceValue(raw_value, value_type)
+          self.assertNotIsInstance(expected_value, tuple)
+          self.assertEqual(expected_value, raw_value)
+          self.assertEqual(str(expected_value), cell_str)
+
+
+if __name__ == "__main__":
+  test.main()
diff --git a/tensorflow/python/saved_model/function_deserialization.py b/tensorflow/python/saved_model/function_deserialization.py
index c7e1d028ac3..3ae6c8414f1 100644
--- a/tensorflow/python/saved_model/function_deserialization.py
+++ b/tensorflow/python/saved_model/function_deserialization.py
@@ -447,11 +447,15 @@ def _list_function_deps(fdef, library_function_names):
   return deps
 
 
+_FUNCTION_WARPPER_NAME_REGEX = r"^%s(.*)_\d+$" % (
+    function_lib._INFERENCE_PREFIX)  # pylint:disable=protected-access
+
+
 def _clean_function_name(name):
   """Vanity function to keep the function names comprehensible."""
   # Note: each time a function is wrapped into `function_lib.ConcreteFunction`
   # its name becomes "__inference_<orig>_xyz".
-  match = re.search(r"^__inference_(.*)_\d+$", name)
+  match = re.search(_FUNCTION_WARPPER_NAME_REGEX, name)
   if match:
     return match.group(1)
   else:
diff --git a/tensorflow/python/saved_model/save_test.py b/tensorflow/python/saved_model/save_test.py
index e178c362d04..8662cbaea51 100644
--- a/tensorflow/python/saved_model/save_test.py
+++ b/tensorflow/python/saved_model/save_test.py
@@ -442,6 +442,53 @@ class SaveTest(test.TestCase):
       save.save(root, os.path.join(self.get_temp_dir(), "saved_model"),
                 signatures=root.f)
 
+  def test_export_correct_output_shapes(self):
+    """Asserts that nodes are exported with the correct number of output shapes.
+
+    After backpropagation rewrite, functions are rewritten with additional
+    outputs. When exporting to SavedModel, the shapes of the additional outputs
+    were incorrectly added to the FunctionDef proto (b/133666530).
+    """
+    obj = tracking.AutoTrackable()
+    obj.v = variables.Variable(2.)
+
+    @def_function.function(input_signature=[
+        tensor_spec.TensorSpec(None, dtypes.float32)])
+    def f(x):
+      return (math_ops.multiply(obj.v, x),
+              math_ops.multiply(obj.v, (x+1)),
+              None)
+    obj.f = f
+
+    @def_function.function(input_signature=[
+        tensor_spec.TensorSpec(None, dtypes.float32)])
+    def g(x):
+      return obj.f(x)[1]
+    obj.g = g
+
+    # After the following lines, the concrete functions of obj.g and obj.f are
+    # rewritten with many extra outputs.
+    with backprop.GradientTape():
+      obj.g(constant_op.constant(3.0))
+
+    save_dir = os.path.join(self.get_temp_dir(), "saved_model")
+    save.save(obj, save_dir, signatures={"g": obj.g})
+    graph_def = loader_impl.parse_saved_model(save_dir).meta_graphs[0].graph_def
+
+    def assert_correct_number_of_output_shapes(node):
+      if node.op == "StatefulPartitionedCall":
+        fn_name = node.attr["f"].func.name
+        if fn_name.startswith("__inference_f"):
+          self.assertLen(node.attr["_output_shapes"].list.shape, 2)
+        if fn_name.startswith("__inference_g"):
+          self.assertLen(node.attr["_output_shapes"].list.shape, 1)
+
+    for f in graph_def.library.function:
+      if(f.signature.name.startswith("__inference_f") or
+         f.signature.name.startswith("__inference_g")):
+        for node in f.node_def:
+          assert_correct_number_of_output_shapes(node)
+
 
 class SavingOptionsTest(test.TestCase):
 
diff --git a/tensorflow/python/training/checkpoint_management.py b/tensorflow/python/training/checkpoint_management.py
index 251c1c2b8b9..5e487833879 100644
--- a/tensorflow/python/training/checkpoint_management.py
+++ b/tensorflow/python/training/checkpoint_management.py
@@ -321,11 +321,21 @@ def _prefix_to_checkpoint_path(prefix, format_version):
 def latest_checkpoint(checkpoint_dir, latest_filename=None):
   """Finds the filename of latest saved checkpoint file.
 
+  Gets the checkpoint state given the provided checkpoint_dir and looks for a
+  corresponding TensorFlow 2 (preferred) or TensorFlow 1.x checkpoint path.
+  The latest_filename argument is only applicable if you are saving checkpoint
+  using `v1.Saver.save`
+
+
+  See the [Training Checkpoints
+  Guide](https://www.tensorflow.org/guide/checkpoint) for more details and
+  examples.`
+
   Args:
     checkpoint_dir: Directory where the variables were saved.
     latest_filename: Optional name for the protocol buffer file that
       contains the list of most recent checkpoint filenames.
-      See the corresponding argument to `Saver.save()`.
+      See the corresponding argument to `v1.Saver.save`.
 
   Returns:
     The full path to the latest checkpoint or `None` if no checkpoint was found.
@@ -605,6 +615,10 @@ class CheckpointManager(object):
         if timestamp > self._last_preserved_timestamp:
           self._maybe_delete[filename] = timestamp
 
+  @property
+  def directory(self):
+    return self._directory
+
   @property
   def latest_checkpoint(self):
     """The prefix of the most recent checkpoint in `directory`.
diff --git a/tensorflow/python/training/experimental/loss_scale_optimizer.py b/tensorflow/python/training/experimental/loss_scale_optimizer.py
index a7f772ce547..ae8d1e8c788 100644
--- a/tensorflow/python/training/experimental/loss_scale_optimizer.py
+++ b/tensorflow/python/training/experimental/loss_scale_optimizer.py
@@ -68,6 +68,8 @@ class MixedPrecisionLossScaleOptimizer(optimizer.Optimizer):
     super(MixedPrecisionLossScaleOptimizer, self).__init__(use_locking, name)
 
     self._loss_scale = loss_scale_module.get(loss_scale)
+    if self._loss_scale is None:
+      raise ValueError('loss_scale cannot be None')
     self._track_trackable(self._optimizer, 'base_optimizer')
     self._track_trackable(self._loss_scale, 'loss_scale')
 
diff --git a/tensorflow/python/training/experimental/loss_scale_optimizer_test.py b/tensorflow/python/training/experimental/loss_scale_optimizer_test.py
index c2259cd7ed2..ef82696c997 100644
--- a/tensorflow/python/training/experimental/loss_scale_optimizer_test.py
+++ b/tensorflow/python/training/experimental/loss_scale_optimizer_test.py
@@ -261,6 +261,11 @@ class MixedPrecisionLossScaleOptimizerTest(test.TestCase,
       self.assertEqual(self.evaluate(loss_scale()), 1.)
       self.assertEqual(self.evaluate(loss_scale._num_good_steps), 1)
 
+  def testPassingNoneToLossScale(self):
+    opt = gradient_descent.GradientDescentOptimizer(1.0)
+    with self.assertRaisesRegexp(ValueError, r'loss_scale cannot be None'):
+      loss_scale_optimizer.MixedPrecisionLossScaleOptimizer(opt, None)
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/training/experimental/loss_scaling_gradient_tape.py b/tensorflow/python/training/experimental/loss_scaling_gradient_tape.py
index 4b75a74bc3b..caae7052b84 100644
--- a/tensorflow/python/training/experimental/loss_scaling_gradient_tape.py
+++ b/tensorflow/python/training/experimental/loss_scaling_gradient_tape.py
@@ -18,8 +18,10 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.eager import backprop
-from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.unconnected_gradients import UnconnectedGradients
 from tensorflow.python.training.experimental import loss_scale as loss_scale_module
 from tensorflow.python.util import nest
@@ -60,6 +62,13 @@ class LossScaleGradientTape(backprop.GradientTape):
     grads = tape.gradient(loss, vars)
     opt.apply_gradients(zip(grads, vars))
   ```
+
+  WARNING: Computing second-order (or higher) gradients with a
+  `LossScaleGradientTape` does not yet work properly when a
+  `tf.distribute.Strategy` is used. Computing second-order gradients will return
+  None instead of the gradient tensors. This only occurs when you nest multiple
+  gradient tapes under each other; if you do not nest them, this issue will not
+  occur.
   """
 
   def __init__(self,
@@ -133,22 +142,90 @@ class LossScaleGradientTape(backprop.GradientTape):
     if self._tape is None:  # pylint: disable=access-member-before-definition
       raise RuntimeError("GradientTape.gradient can only be called once on "
                          "non-persistent tapes.")
+    if distribution_strategy_context.in_cross_replica_context():
+      raise ValueError("LossScaleGradientTape.gradient() must be called in a "
+                       "replica context.")
 
-    ready_to_update = False
-    grads = nest.map_structure(array_ops.zeros_like, sources)
-
-    while not ready_to_update and self._loss_scale() > 1:
-      with self:  # re-enter the gradient tape so it sees the loss scaling
-        loss_scale = self._loss_scale()
-        scaled_target = nest.map_structure(lambda t: t * loss_scale, target)
-
-      old_grads = super(LossScaleGradientTape, self).gradient(
-          scaled_target, sources, output_gradients, unconnected_gradients)
-      inv_loss_scale = 1.0 / self._loss_scale()
-      grads = nest.map_structure(lambda g: inv_loss_scale * g, old_grads)
-      # Check for non-finite gradients possibly resulting from scaling
-      _, ready_to_update = self._loss_scale.update(grads)
+    # Note: DistributionStrategy does not support running a while loop in a
+    # replica context. So, we call `_compute_gradients_until_finite` in a cross-
+    # replica context.
+    replica_context = distribution_strategy_context.get_replica_context()
+    grads = replica_context.merge_call(
+        _compute_gradients_until_finite,
+        args=(self, self._loss_scale, target, sources, output_gradients,
+              unconnected_gradients))
 
     if not self._outer_persistent:
       self._tape = None  # free up resources if a persistent tape was not needed
     return grads
+
+
+def _compute_gradients_until_finite(
+    distribution, loss_scale_gradient_tapes, loss_scale, target, sources,
+    output_gradients, unconnected_gradients):
+  """Compute gradients and update the loss scale until the gradients are finite.
+
+  This must be called in a cross-replica context.
+
+  This is a function instead of a method of LossScaleGradientTape, as the `self`
+  parameter would be meaningless. There is one LossScaleGradientTape per
+  replica, but this function is called once total (not per replica), so there
+  cannot be a singular `self` parameter.
+
+  Args:
+    distribution: The distribution strategy in effect.
+    loss_scale_gradient_tapes: A PerReplica value of LossScaleGradientTapes.
+      Contains the LossScaleGradientTape of each replica.
+    loss_scale: The loss scale to use to scale the loss and unscale the
+      gradient.
+    target: a list or nested structure of Tensors or Variables to be
+      differentiated.
+    sources: a list or nested structure of Tensors or Variables. `target` will
+      be differentiated against elements in `sources`.
+    output_gradients: Passed to GradientTape.gradient
+    unconnected_gradients: Pass to GradientTape.gradient.
+
+  Returns:
+    The gradients of `target` with respect to `sources`.
+  """
+  # Autograph cannot convert this function, so we must use an explicit
+  # tf.while_loop.
+  # TODO(b/143572314): Fix Autograph so that it can convert this function, then
+  # replace the tf.while_loop with a Python while loop.
+
+  def cond(grads, ready_to_update):
+    """The condition of the while loop."""
+    del grads
+    # Equivalent to: `not ready_to_update and loss_scale() > 1`
+    return math_ops.logical_and(math_ops.logical_not(ready_to_update),
+                                math_ops.greater(loss_scale(), 1))
+
+  def body(grads, ready_to_update):
+    """The body of the while loop."""
+    del grads, ready_to_update
+    def replica_fn(gradient_tape, target, sources, output_gradients):
+      """Scales the loss, computes the gradients, and unscales the gradients."""
+      loss_scale_val = loss_scale()
+      with gradient_tape:  # re-enter gradient tape so it sees the loss scaling
+        scaled_target = nest.map_structure(lambda t: t * loss_scale_val, target)
+      old_grads = super(LossScaleGradientTape, gradient_tape).gradient(
+          scaled_target, sources, output_gradients, unconnected_gradients)
+      inv_loss_scale = 1.0 / loss_scale_val
+      grads = nest.map_structure(lambda g: inv_loss_scale * g, old_grads)
+      return grads
+
+    # Switch to a replica-context to compute gradients once per replica.
+    grads = distribution.experimental_run_v2(
+        replica_fn, args=(loss_scale_gradient_tapes, target, sources,
+                          output_gradients))
+    # Check for non-finite gradients possibly resulting from scaling
+    _, ready_to_update = loss_scale.update(grads)
+    return grads, ready_to_update
+
+  # Dummy value for initial_grads. The first iteration of the loop will
+  # overwrite `grads` to the actual gradients.
+  initial_grads = sources
+  initial_ready_to_update = False
+  grads, _ = control_flow_ops.while_loop(
+      cond, body, [initial_grads, initial_ready_to_update])
+  return grads
diff --git a/tensorflow/python/training/experimental/loss_scaling_gradient_tape_test.py b/tensorflow/python/training/experimental/loss_scaling_gradient_tape_test.py
index b8c85a929da..36d7d18a93b 100644
--- a/tensorflow/python/training/experimental/loss_scaling_gradient_tape_test.py
+++ b/tensorflow/python/training/experimental/loss_scaling_gradient_tape_test.py
@@ -20,58 +20,137 @@ from __future__ import print_function
 from absl.testing import parameterized
 import numpy as np
 from tensorflow.python.compat import v2_compat
+from tensorflow.python.distribute import distribution_strategy_context
+from tensorflow.python.distribute import mirrored_strategy
+from tensorflow.python.distribute import values
+from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import test_combinations
 from tensorflow.python.platform import test
 from tensorflow.python.training.experimental import loss_scale as loss_scale_module
 from tensorflow.python.training.experimental import loss_scaling_gradient_tape as lsgt
+from tensorflow.python.util import nest
+
+
+# If called outside any strategy.scope() calls, this will return the default
+# strategy.
+default_strategy_fn = distribution_strategy_context.get_strategy
+
+
+def create_mirrored_strategy():
+  if context.num_gpus() >= 1:
+    return mirrored_strategy.MirroredStrategy(['cpu:0', 'gpu:0'])
+  else:
+    return mirrored_strategy.MirroredStrategy(['cpu:0'])
 
 
 class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
 
-  @parameterized.parameters(loss_scale_module.FixedLossScale,
-                            loss_scale_module.DynamicLossScale)
-  def test_basic_tapes_eager_mode(self, loss_scale):
-    x = constant_op.constant(3.0)
-    with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
-      g.watch(x)
-      y = x * x
-    dy_dx = g.gradient(y, x)
-    self.assertEqual(self.evaluate(dy_dx), 6.0)
+  def _run_with_strategy(self, run_fn, strategy, use_tf_function=False):
+    """Runs `run_fn` under the DistributionStrategy `strategy`.
 
-  @parameterized.parameters(loss_scale_module.FixedLossScale,
-                            loss_scale_module.DynamicLossScale)
-  def test_basic_tapes_graph_mode(self, loss_scale):
+    Runs `run_fn` with `strategy.experimental_run_v2`. Returns a list of the
+    return values of `run_fn`, one per replica.
+
+    Args:
+      run_fn: The function to run.
+      strategy: The DistributionStrategy to run `run_fn` with.
+      use_tf_function: If True, call `run_fn` under a tf.function.
+
+    Returns:
+      A list of tensors, each being the return value of `run_fn` from one
+      replica. If a nested structure is returned from `run_fn`, returns a
+      nested structure, where each element is a list of tensors.
+    """
+    strategy_fn = lambda: strategy.experimental_run_v2(run_fn)
+    if use_tf_function:
+      strategy_fn = def_function.function(strategy_fn)
+
+    results = strategy_fn()
+
+    def convert_tensor_to_list(tensor):
+      if isinstance(tensor, values.DistributedValues):
+        return tensor.values
+      else:
+        return [tensor]
+    return nest.map_structure(convert_tensor_to_list, results)
+
+  @test_combinations.generate(test_combinations.combine(
+      loss_scale=[loss_scale_module.FixedLossScale,
+                  loss_scale_module.DynamicLossScale],
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy],
+      use_tf_function=[True, False]
+  ))
+  def test_basic_tapes(self, loss_scale, strategy_fn, use_tf_function):
     loss_scale = loss_scale(32)
-
-    @def_function.function
-    def _inner_test():
+    def run_fn():
       x = constant_op.constant(3.0)
       with lsgt.LossScaleGradientTape(loss_scale) as g:
         g.watch(x)
         y = x * x
       return g.gradient(y, x)
-    self.assertEqual(self.evaluate(_inner_test()), 6.0)
+    dy_dx_list = self._run_with_strategy(run_fn, strategy_fn(), use_tf_function)
+    self.assertEqual(loss_scale(), 32)
+    for dy_dx in dy_dx_list:
+      self.assertEqual(dy_dx, 6.0)
 
-  @parameterized.parameters(loss_scale_module.FixedLossScale,
-                            loss_scale_module.DynamicLossScale)
-  def test_nested_tapes(self, loss_scale):
-    x = constant_op.constant(3.0)
-    with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
-      g.watch(x)
-      with lsgt.LossScaleGradientTape(loss_scale(32)) as gg:
-        gg.watch(x)
+  @test_combinations.generate(test_combinations.combine(
+      loss_scale=[loss_scale_module.FixedLossScale,
+                  loss_scale_module.DynamicLossScale],
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy],
+      use_tf_function=[True, False]
+  ))
+  def test_output_gradients(self, loss_scale, strategy_fn, use_tf_function):
+    loss_scale = loss_scale(32)
+    def run_fn():
+      x = constant_op.constant(3.0)
+      with lsgt.LossScaleGradientTape(loss_scale) as g:
+        g.watch(x)
         y = x * x
-      dy_dx = gg.gradient(y, x)
-      self.assertEqual(self.evaluate(dy_dx), 6.0)
-    d2y_dx2 = g.gradient(dy_dx, x)
-    self.assertEqual(self.evaluate(d2y_dx2), 2.0)
+      return g.gradient(y, x, output_gradients=constant_op.constant(2.0))
+    dy_dx_list = self._run_with_strategy(run_fn, strategy_fn(), use_tf_function)
+    self.assertEqual(loss_scale(), 32)
+    for dy_dx in dy_dx_list:
+      self.assertEqual(dy_dx, 12.0)
 
-  @parameterized.parameters(loss_scale_module.FixedLossScale,
-                            loss_scale_module.DynamicLossScale)
-  def test_non_persistent_tapes_error(self, loss_scale):
+  @test_combinations.generate(test_combinations.combine(
+      loss_scale=[loss_scale_module.FixedLossScale,
+                  loss_scale_module.DynamicLossScale],
+      strategy_fn=[default_strategy_fn],
+      use_tf_function=[True, False]
+  ))
+  def test_nested_tapes(self, loss_scale, strategy_fn, use_tf_function):
+    # TODO(reedwm): Support nested tapes with mirrored strategy. Currently this
+    # does not work, as the set of active gradient tapes is a thread-local
+    # variable. Mirrored strategy spawns new threads, making the outer gradient
+    # tape non-active when using the inner gradient tape.
+    outer_loss_scale = loss_scale(32)
+    inner_loss_scale = loss_scale(32)
+    def run_fn():
+      x = constant_op.constant(3.0)
+      with lsgt.LossScaleGradientTape(outer_loss_scale) as g:
+        g.watch(x)
+        with lsgt.LossScaleGradientTape(inner_loss_scale) as gg:
+          gg.watch(x)
+          y = x * x
+        dy_dx = gg.gradient(y, x)
+      d2y_dx2 = g.gradient(dy_dx, x)
+      return dy_dx, d2y_dx2
+
+    dy_dx_list, d2y_dx2_list = self._run_with_strategy(run_fn, strategy_fn(),
+                                                       use_tf_function)
+    self.assertEqual(outer_loss_scale(), 32)
+    self.assertEqual(inner_loss_scale(), 32)
+    for dy_dx in dy_dx_list:
+      self.assertEqual(dy_dx, 6.0)
+    for d2y_dx2 in d2y_dx2_list:
+      self.assertEqual(d2y_dx2, 2.0)
+
+  def test_non_persistent_tapes_error(self):
     x = constant_op.constant(3.0)
-    with lsgt.LossScaleGradientTape(loss_scale(32), persistent=False) as g:
+    with lsgt.LossScaleGradientTape(loss_scale_module.FixedLossScale(32),
+                                    persistent=False) as g:
       g.watch(x)
       y = x * x
       z = y * y
@@ -79,21 +158,36 @@ class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
     with self.assertRaisesRegexp(RuntimeError, 'persistent'):
       g.gradient(y, x)
 
-  @parameterized.parameters(loss_scale_module.FixedLossScale,
-                            loss_scale_module.DynamicLossScale)
-  def test_persistent_tapes(self, loss_scale):
-    x = constant_op.constant(3.0)
-    with lsgt.LossScaleGradientTape(loss_scale(32), persistent=True) as g:
-      g.watch(x)
-      y = x * x
-      z = y * y
-    dz_dx = g.gradient(z, x)
-    self.assertEqual(self.evaluate(dz_dx), 108.0)
-    dy_dx = g.gradient(y, x)
-    self.assertEqual(self.evaluate(dy_dx), 6.0)
+  @test_combinations.generate(test_combinations.combine(
+      loss_scale=[loss_scale_module.FixedLossScale,
+                  loss_scale_module.DynamicLossScale],
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy],
+      use_tf_function=[True, False]
+  ))
+  def test_persistent_tapes(self, loss_scale, strategy_fn, use_tf_function):
 
-  @parameterized.parameters(loss_scale_module.FixedLossScale,
-                            loss_scale_module.DynamicLossScale)
+    ls = loss_scale(32)
+    def run_fn():
+      x = constant_op.constant(3.0)
+      with lsgt.LossScaleGradientTape(ls, persistent=True) as g:
+        g.watch(x)
+        y = x * x
+        z = y * y
+      dz_dx = g.gradient(z, x)
+      dy_dx = g.gradient(y, x)
+      return dz_dx, dy_dx
+
+    dz_dx_list, dy_dx_list = self._run_with_strategy(run_fn, strategy_fn(),
+                                                     use_tf_function)
+    for dz_dx in dz_dx_list:
+      self.assertEqual(dz_dx, 108.0)
+    for dy_dx in dy_dx_list:
+      self.assertEqual(dy_dx, 6.0)
+
+  @test_combinations.generate(test_combinations.combine(
+      loss_scale=[loss_scale_module.FixedLossScale,
+                  loss_scale_module.DynamicLossScale],
+  ))
   def test_nested_sources(self, loss_scale):
     x = (constant_op.constant(19.0), (constant_op.constant(8.),
                                       constant_op.constant(9.)))
@@ -103,8 +197,10 @@ class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
     dy_dx = g.gradient(y, x)
     self.assertEqual(self.evaluate(dy_dx), (13., (13., 13.)))
 
-  @parameterized.parameters(loss_scale_module.FixedLossScale,
-                            loss_scale_module.DynamicLossScale)
+  @test_combinations.generate(test_combinations.combine(
+      loss_scale=[loss_scale_module.FixedLossScale,
+                  loss_scale_module.DynamicLossScale],
+  ))
   def test_nested_targets(self, loss_scale):
     w = constant_op.constant(3.0)
     with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
@@ -115,68 +211,131 @@ class LossScaleGradientTapeTest(test.TestCase, parameterized.TestCase):
     grad = g.gradient([x, (y, z)], w)
     self.assertEqual(self.evaluate(grad), 23)
 
-  @parameterized.parameters(loss_scale_module.FixedLossScale,
-                            loss_scale_module.DynamicLossScale)
-  def test_scaling_inf_gradient(self, loss_scale):
-    x = constant_op.constant(1.0)
-    with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
-      g.watch(x)
-      y = x * np.inf
-    dy_dx = g.gradient(y, x)
-    self.assertEqual(self.evaluate(dy_dx), np.inf)
+  @test_combinations.generate(test_combinations.combine(
+      loss_scale=[loss_scale_module.FixedLossScale,
+                  loss_scale_module.DynamicLossScale],
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy],
+      non_finite_term=[np.inf, np.nan],
+  ))
+  def test_scaling_non_finite_gradient(self, loss_scale, strategy_fn,
+                                       non_finite_term):
+    loss_scale = loss_scale(32)
+    def run_fn():
+      x = constant_op.constant(1.0)
+      with lsgt.LossScaleGradientTape(loss_scale) as g:
+        g.watch(x)
+        y = x * non_finite_term
+      return g.gradient(y, x)
 
-  @parameterized.parameters(loss_scale_module.FixedLossScale,
-                            loss_scale_module.DynamicLossScale)
-  def test_scaling_nan_gradient(self, loss_scale):
-    x = constant_op.constant(1.0)
-    with lsgt.LossScaleGradientTape(loss_scale(32)) as g:
-      g.watch(x)
-      y = x * np.nan
-    dy_dx = g.gradient(y, x)
-    self.assertTrue(np.isnan(self.evaluate(dy_dx)))
+    dy_dx_list = self._run_with_strategy(run_fn, strategy_fn())
+    check_fn = np.isposinf if non_finite_term == np.inf else np.isnan
+    for dy_dx in dy_dx_list:
+      self.assertTrue(check_fn(dy_dx))
 
-  @parameterized.parameters(np.inf, np.nan)
-  def test_dynamic_scale_to_one_on_non_finite_gradient(self, non_finite_term):
+  @test_combinations.generate(test_combinations.combine(
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy],
+      non_finite_term=[np.inf, np.nan],
+      use_tf_function=[True, False],
+  ))
+  def test_dynamic_scale_to_one_on_non_finite_gradient(
+      self, strategy_fn, non_finite_term, use_tf_function):
     loss_scale = loss_scale_module.DynamicLossScale(initial_loss_scale=32)
-    x = constant_op.constant(1.0)
-    with lsgt.LossScaleGradientTape(loss_scale) as g:
-      g.watch(x)
-      y = x * non_finite_term
-    g.gradient(y, x)
+    def run_fn():
+      x = constant_op.constant(1.0)
+      with lsgt.LossScaleGradientTape(loss_scale) as g:
+        g.watch(x)
+        y = x * non_finite_term
+      g.gradient(y, x)
+
+    self._run_with_strategy(run_fn, strategy_fn(), use_tf_function)
     self.assertEqual(self.evaluate(loss_scale()), 1.0)
 
-  @parameterized.parameters([np.inf, np.isposinf], [np.nan, np.isnan])
-  def test_fixed_scaling_no_change_non_finite_gradient(self, non_finite_term,
-                                                       is_non_finite):
-    loss_scale = loss_scale_module.FixedLossScale(32)
-    x = constant_op.constant(1.0)
-    with lsgt.LossScaleGradientTape(loss_scale) as g:
-      g.watch(x)
-      y = x * non_finite_term
-    dy_dx = g.gradient(y, x)
-    self.assertTrue(is_non_finite(self.evaluate(dy_dx)))
-    self.assertEqual(self.evaluate(loss_scale()), 32.0)
-
-  def test_dynamic_loss_scaling_down_loop(self):
+  @test_combinations.generate(test_combinations.combine(
+      use_tf_function=[True, False],
+  ))
+  def test_dynamic_scale_to_one_on_non_finite_gradient_on_last_replica(
+      self, use_tf_function):
+    if context.num_gpus() < 1:
+      # Requires the mirrored strategy to have two replicas: one on the CPU and
+      # one on the GPU
+      self.skipTest('Test requires at least 1 GPU')
     loss_scale = loss_scale_module.DynamicLossScale(initial_loss_scale=32)
-    x = constant_op.constant(1.0)
-    with lsgt.LossScaleGradientTape(loss_scale) as g:
-      g.watch(x)
-      y = x * (3.0 * (10**37))  # grad will be inf after scaling
-    dy_dx = g.gradient(y, x)
-    self.assertEqual(self.evaluate(loss_scale()), 8.0)
-    self.assertAllClose(self.evaluate(dy_dx), (3.0 * (10**37)), atol=1e-06)
+    def run_fn():
+      x = constant_op.constant(1.0)
+      with lsgt.LossScaleGradientTape(loss_scale) as g:
+        g.watch(x)
+        # The gradient will be finite on the first replica, and infinite on the
+        # second
+        rep_ctx = distribution_strategy_context.get_replica_context()
+        if rep_ctx.replica_id_in_sync_group == rep_ctx.num_replicas_in_sync - 1:
+          y = x * np.inf
+        else:
+          y = x * 2
+      return g.gradient(y, x)
 
-  def test_dynamic_loss_scaling_inf_target_post_scale(self):
-    loss_scale = loss_scale_module.DynamicLossScale(initial_loss_scale=32.0)
-    x = constant_op.constant(3.0 * (10**37))
-    with lsgt.LossScaleGradientTape(loss_scale) as g:
-      g.watch(x)
-      y = x * 3.0  # target will be inf after scaling
-    dy_dx = g.gradient(y, x)
-    self.assertAllClose(self.evaluate(dy_dx), 3.0)
+    replica0_grad, replica1_grad = self._run_with_strategy(
+        run_fn, create_mirrored_strategy(), use_tf_function)
+    self.assertEqual(self.evaluate(loss_scale()), 1.0)
+    self.assertEqual(replica0_grad, 2.0)
+    self.assertEqual(replica1_grad, np.inf)
+
+  @test_combinations.generate(test_combinations.combine(
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy],
+      non_finite_term=[np.inf, np.nan],
+  ))
+  def test_fixed_scaling_no_change_non_finite_gradient(self, strategy_fn,
+                                                       non_finite_term):
+    loss_scale = loss_scale_module.FixedLossScale(32)
+    def run_fn():
+      x = constant_op.constant(1.0)
+      with lsgt.LossScaleGradientTape(loss_scale) as g:
+        g.watch(x)
+        y = x * non_finite_term
+      return g.gradient(y, x)
+
+    dy_dx_list = self._run_with_strategy(run_fn, strategy_fn())
+    check_fn = np.isposinf if non_finite_term == np.inf else np.isnan
+    for dy_dx in dy_dx_list:
+      self.assertTrue(check_fn(self.evaluate(dy_dx)))
     self.assertEqual(self.evaluate(loss_scale()), 32.0)
 
+  @test_combinations.generate(test_combinations.combine(
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy],
+      use_tf_function=[True, False]
+  ))
+  def test_dynamic_loss_scaling_down_loop(self, strategy_fn, use_tf_function):
+    loss_scale = loss_scale_module.DynamicLossScale(initial_loss_scale=32)
+    def run_fn():
+      x = constant_op.constant(1.0)
+      with lsgt.LossScaleGradientTape(loss_scale) as g:
+        g.watch(x)
+        y = x * (3.0 * (10**37))  # grad will be inf after scaling
+      return g.gradient(y, x)
+
+    dy_dx_list = self._run_with_strategy(run_fn, strategy_fn(), use_tf_function)
+    self.assertEqual(self.evaluate(loss_scale()), 8.0)
+    for dy_dx in dy_dx_list:
+      self.assertAllClose(self.evaluate(dy_dx), (3.0 * (10**37)), atol=1e-06)
+
+  @test_combinations.generate(test_combinations.combine(
+      strategy_fn=[default_strategy_fn, create_mirrored_strategy],
+      use_tf_function=[True, False]
+  ))
+  def test_dynamic_loss_scaling_inf_target_post_scale(self, strategy_fn,
+                                                      use_tf_function):
+    loss_scale = loss_scale_module.DynamicLossScale(initial_loss_scale=32.0)
+    def run_fn():
+      x = constant_op.constant(3.0 * (10**37))
+      with lsgt.LossScaleGradientTape(loss_scale) as g:
+        g.watch(x)
+        y = x * 3.0  # target will be inf after scaling
+      return g.gradient(y, x)
+
+    dy_dx_list = self._run_with_strategy(run_fn, strategy_fn(), use_tf_function)
+    self.assertEqual(self.evaluate(loss_scale()), 32.0)
+    for dy_dx in dy_dx_list:
+      self.assertAllClose(self.evaluate(dy_dx), 3.0)
+
 
 if __name__ == '__main__':
   v2_compat.enable_v2_behavior()
diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl
index 5f9f2296c3c..3adf5fe9a4e 100644
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@@ -4,6 +4,7 @@ load(
     "//tensorflow/core/platform:build_config_root.bzl",
     "if_dynamic_kernels",
     "if_static",
+    "register_extension_info",
     "tf_additional_grpc_deps_py",
     "tf_additional_xla_deps_py",
     "tf_exec_compatible_with",
@@ -47,9 +48,6 @@ load(
     "if_ngraph",
 )
 
-def register_extension_info(**kwargs):
-    pass
-
 # version for the shared libraries, can
 # not contain rc or alpha, only numbers.
 # Also update tensorflow/core/public/version.h
@@ -170,6 +168,12 @@ def if_emscripten(a):
         "//conditions:default": [],
     })
 
+def if_chromiumos(a, otherwise = []):
+    return select({
+        clean_dep("//tensorflow:chromiumos"): a,
+        "//conditions:default": otherwise,
+    })
+
 def if_macos(a, otherwise = []):
     return select({
         clean_dep("//tensorflow:macos"): a,
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt
index f7301ff180c..a79d205cf0b 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.-optimization-options.pbtxt
@@ -11,10 +11,6 @@ tf_class {
     name: "autotune"
     mtype: "<type \'property\'>"
   }
-  member {
-    name: "autotune_algorithm"
-    mtype: "<type \'property\'>"
-  }
   member {
     name: "autotune_buffers"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt
index 7df20bf331b..bb823d92022 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.-model.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "activity_regularizer"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "distribute_strategy"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "dtype"
     mtype: "<type \'property\'>"
@@ -170,7 +174,7 @@ tf_class {
   }
   member_method {
     name: "compile"
-    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\', \'distribute\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt
index 2b46b140b65..0afd4145ac5 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.-sequential.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "activity_regularizer"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "distribute_strategy"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "dtype"
     mtype: "<type \'property\'>"
@@ -175,7 +179,7 @@ tf_class {
   }
   member_method {
     name: "compile"
-    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\', \'distribute\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt
index 4862a93c628..aba2d4cddee 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-linear-model.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "activity_regularizer"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "distribute_strategy"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "dtype"
     mtype: "<type \'property\'>"
@@ -171,7 +175,7 @@ tf_class {
   }
   member_method {
     name: "compile"
-    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\', \'distribute\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt
index 45edc2e8d46..e1960038187 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.experimental.-wide-deep-model.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "activity_regularizer"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "distribute_strategy"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "dtype"
     mtype: "<type \'property\'>"
@@ -171,7 +175,7 @@ tf_class {
   }
   member_method {
     name: "compile"
-    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\', \'distribute\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt
index 6fb8f3891c0..33d54f64a86 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-model.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "activity_regularizer"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "distribute_strategy"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "dtype"
     mtype: "<type \'property\'>"
@@ -170,7 +174,7 @@ tf_class {
   }
   member_method {
     name: "compile"
-    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\', \'distribute\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt
index 069025cd8e2..d38ac47b167 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.keras.models.-sequential.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "activity_regularizer"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "distribute_strategy"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "dtype"
     mtype: "<type \'property\'>"
@@ -175,7 +179,7 @@ tf_class {
   }
   member_method {
     name: "compile"
-    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\', \'distribute\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt
index 604f676bf34..9bac746ffc8 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.raw_ops.pbtxt
@@ -966,7 +966,7 @@ tf_module {
   }
   member_method {
     name: "DebugNumericSummaryV2"
-    argspec: "args=[\'input\', \'tensor_debug_mode\', \'tensor_id\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'-1\', \'None\'], "
+    argspec: "args=[\'input\', \'output_dtype\', \'tensor_debug_mode\', \'tensor_id\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'float32\'>\", \'-1\', \'-1\', \'None\'], "
   }
   member_method {
     name: "DecodeAndCropJpeg"
diff --git a/tensorflow/tools/api/golden/v1/tensorflow.train.-checkpoint-manager.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.train.-checkpoint-manager.pbtxt
index d981983e938..86e25d86d53 100644
--- a/tensorflow/tools/api/golden/v1/tensorflow.train.-checkpoint-manager.pbtxt
+++ b/tensorflow/tools/api/golden/v1/tensorflow.train.-checkpoint-manager.pbtxt
@@ -6,6 +6,10 @@ tf_class {
     name: "checkpoints"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "directory"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "latest_checkpoint"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt
index f7301ff180c..a79d205cf0b 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.-optimization-options.pbtxt
@@ -11,10 +11,6 @@ tf_class {
     name: "autotune"
     mtype: "<type \'property\'>"
   }
-  member {
-    name: "autotune_algorithm"
-    mtype: "<type \'property\'>"
-  }
   member {
     name: "autotune_buffers"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt
index 7df20bf331b..bb823d92022 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.-model.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "activity_regularizer"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "distribute_strategy"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "dtype"
     mtype: "<type \'property\'>"
@@ -170,7 +174,7 @@ tf_class {
   }
   member_method {
     name: "compile"
-    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\', \'distribute\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt
index 2b46b140b65..0afd4145ac5 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.-sequential.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "activity_regularizer"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "distribute_strategy"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "dtype"
     mtype: "<type \'property\'>"
@@ -175,7 +179,7 @@ tf_class {
   }
   member_method {
     name: "compile"
-    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\', \'distribute\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt
index 4862a93c628..aba2d4cddee 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-linear-model.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "activity_regularizer"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "distribute_strategy"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "dtype"
     mtype: "<type \'property\'>"
@@ -171,7 +175,7 @@ tf_class {
   }
   member_method {
     name: "compile"
-    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\', \'distribute\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt
index 45edc2e8d46..e1960038187 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.experimental.-wide-deep-model.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "activity_regularizer"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "distribute_strategy"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "dtype"
     mtype: "<type \'property\'>"
@@ -171,7 +175,7 @@ tf_class {
   }
   member_method {
     name: "compile"
-    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\', \'distribute\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt
index 6fb8f3891c0..33d54f64a86 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-model.pbtxt
@@ -12,6 +12,10 @@ tf_class {
     name: "activity_regularizer"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "distribute_strategy"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "dtype"
     mtype: "<type \'property\'>"
@@ -170,7 +174,7 @@ tf_class {
   }
   member_method {
     name: "compile"
-    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\', \'distribute\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt
index 069025cd8e2..d38ac47b167 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.keras.models.-sequential.pbtxt
@@ -13,6 +13,10 @@ tf_class {
     name: "activity_regularizer"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "distribute_strategy"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "dtype"
     mtype: "<type \'property\'>"
@@ -175,7 +179,7 @@ tf_class {
   }
   member_method {
     name: "compile"
-    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\', \'distribute\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
+    argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\'], varargs=None, keywords=kwargs, defaults=[\'rmsprop\', \'None\', \'None\', \'None\', \'None\', \'None\'], "
   }
   member_method {
     name: "compute_mask"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt
index 604f676bf34..9bac746ffc8 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.raw_ops.pbtxt
@@ -966,7 +966,7 @@ tf_module {
   }
   member_method {
     name: "DebugNumericSummaryV2"
-    argspec: "args=[\'input\', \'tensor_debug_mode\', \'tensor_id\', \'name\'], varargs=None, keywords=None, defaults=[\'-1\', \'-1\', \'None\'], "
+    argspec: "args=[\'input\', \'output_dtype\', \'tensor_debug_mode\', \'tensor_id\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'float32\'>\", \'-1\', \'-1\', \'None\'], "
   }
   member_method {
     name: "DecodeAndCropJpeg"
diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.-checkpoint-manager.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.train.-checkpoint-manager.pbtxt
index d981983e938..86e25d86d53 100644
--- a/tensorflow/tools/api/golden/v2/tensorflow.train.-checkpoint-manager.pbtxt
+++ b/tensorflow/tools/api/golden/v2/tensorflow.train.-checkpoint-manager.pbtxt
@@ -6,6 +6,10 @@ tf_class {
     name: "checkpoints"
     mtype: "<type \'property\'>"
   }
+  member {
+    name: "directory"
+    mtype: "<type \'property\'>"
+  }
   member {
     name: "latest_checkpoint"
     mtype: "<type \'property\'>"
diff --git a/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16 b/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16
index 6645ad7c88b..830b073a7da 100644
--- a/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16
+++ b/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16
@@ -60,6 +60,12 @@ RUN add-apt-repository ppa:jonathonf/python-3.6 && \
     python3.6 -m pip install pip --upgrade && \
     update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.6 0
 
+# Install python 3.7
+RUN /install/install_python37.sh
+
+# Install pip3.5
+RUN wget https://bootstrap.pypa.io/get-pip.py && python3.5 get-pip.py && rm get-pip.py
+
 RUN /install/install_pip_packages.sh
 RUN /install/install_auditwheel.sh
 
diff --git a/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16_gpu b/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16_gpu
index 5db23056c89..0fee596cce2 100644
--- a/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16_gpu
+++ b/tensorflow/tools/ci_build/Dockerfile.custom_op_ubuntu_16_gpu
@@ -60,6 +60,12 @@ RUN add-apt-repository ppa:jonathonf/python-3.6 && \
     python3.6 -m pip install pip --upgrade && \
     update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.6 0
 
+# Install python 3.7
+RUN /install/install_python37.sh
+
+# Install pip3.5
+RUN wget https://bootstrap.pypa.io/get-pip.py && python3.5 get-pip.py && rm get-pip.py
+
 RUN /install/install_pip_packages.sh
 RUN /install/install_auditwheel.sh
 
diff --git a/tensorflow/tools/ci_build/ctpu/ctpu.sh b/tensorflow/tools/ci_build/ctpu/ctpu.sh
index 9888f876371..782a2b0bddb 100644
--- a/tensorflow/tools/ci_build/ctpu/ctpu.sh
+++ b/tensorflow/tools/ci_build/ctpu/ctpu.sh
@@ -99,6 +99,7 @@ function ctpu_up {
 
   if [[ -v project ]]; then
     args+=("--project=${project}")
+    echo "${project}" > "${TF_ARTIFACTS_DIR}/tpu_project"
   fi
 
   ./ctpu up "${args[@]}"
@@ -108,13 +109,18 @@ function ctpu_up {
 function ctpu_delete {
   export TPU_NAME="$(cat "${TF_GFILE_DIR}/tpu_name")"
   export TPU_ZONE="$(cat "${TF_GFILE_DIR}/tpu_zone")"
-  # TODO(rsopher): conditionally save (and load) TPU_PROJECT if it was specified.
+  TPU_PROJECT_FILE="${TF_GFILE_DIR}/tpu_project"
+  if [ -f "${TPU_PROJECT_FILE}" ]; then
+    export TPU_PROJECT="$(cat ${TPU_PROJECT_FILE})"
+  else
+    export TPU_PROJECT="tensorflow-testing"
+  fi
 
   # Retry due to rare race condition where TPU creation hasn't propagated by
   # the time we try to delete it.
   for i in 1 2 3; do
     ./ctpu delete \
-      --project=tensorflow-testing \
+      --project=${TPU_PROJECT} \
       --zone="${TPU_ZONE}" \
       --name="${TPU_NAME}" \
       --tpu-only \
diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index a8c898af72a..3f9c0e671e5 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -89,9 +89,8 @@ pip2 install pandas==0.19.2
 pip3 install pandas==0.19.2
 
 # Benchmark tests require the following:
-# 5.6.4 fails to pip2 install. TODO(b/143872855): remove pinning once fixed.
-pip2 install psutil==5.6.3
-pip3 install psutil==5.6.3
+pip2 install psutil
+pip3 install psutil
 pip2 install py-cpuinfo
 pip3 install py-cpuinfo
 
diff --git a/tensorflow/tools/ci_build/install/install_python37.sh b/tensorflow/tools/ci_build/install/install_python37.sh
new file mode 100644
index 00000000000..26f663803f8
--- /dev/null
+++ b/tensorflow/tools/ci_build/install/install_python37.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+cd /usr/src
+wget https://www.python.org/ftp/python/3.7.0/Python-3.7.0.tgz
+tar xzf Python-3.7.0.tgz
+cd Python-3.7.0
+./configure --enable-optimizations
+make altinstall
+rm /usr/src/Python-3.7.0.tgz
diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
index 0151854c43c..0152e9decc7 100644
--- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh
@@ -104,7 +104,7 @@ if [[ "$RELEASE_BUILD" == 1 ]]; then
   # Overriding eigen strong inline speeds up the compiling of conv_grad_ops_3d.cc and conv_ops_3d.cc
   # by 20 minutes. See https://github.com/tensorflow/tensorflow/issues/10521
   # Because this hurts the performance of TF, we don't override it in release build.
-  export TF_OVERRIDE_EIGEN_STRONG_INLINE=1
+  export TF_OVERRIDE_EIGEN_STRONG_INLINE=0
 else
   export TF_OVERRIDE_EIGEN_STRONG_INLINE=1
 fi
diff --git a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
index ff767f4bd47..6dd183ceb87 100644
--- a/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
+++ b/tensorflow/tools/ci_build/windows/gpu/pip/build_tf_windows.sh
@@ -104,7 +104,7 @@ if [[ "$RELEASE_BUILD" == 1 ]]; then
   # Overriding eigen strong inline speeds up the compiling of conv_grad_ops_3d.cc and conv_ops_3d.cc
   # by 20 minutes. See https://github.com/tensorflow/tensorflow/issues/10521
   # Because this hurts the performance of TF, we don't override it in release build.
-  export TF_OVERRIDE_EIGEN_STRONG_INLINE=1
+  export TF_OVERRIDE_EIGEN_STRONG_INLINE=0
 else
   export TF_OVERRIDE_EIGEN_STRONG_INLINE=1
 fi
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index ea6708ae3cc..a57bd0cf7a4 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -26,6 +26,7 @@ package(default_visibility = ["//visibility:private"])
 transitive_hdrs(
     name = "included_headers",
     deps = [
+        "//tensorflow/c/experimental:network",
         "//tensorflow/core:core_cpu",
         "//tensorflow/core:framework",
         "//tensorflow/core:lib",
diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py
index 22c8308f3eb..42ffaa857fc 100644
--- a/tensorflow/tools/pip_package/setup.py
+++ b/tensorflow/tools/pip_package/setup.py
@@ -61,7 +61,7 @@ REQUIRED_PACKAGES = [
     'numpy >= 1.16.0, < 2.0',
     'opt_einsum >= 2.3.2',
     'protobuf >= 3.8.0',
-    'tensorboard >= 2.0.0, < 2.1.0',
+    'tensorboard >= 2.1.0, < 2.2.0',
     'tensorflow_estimator >= 2.0.0, < 2.1.0',
     'termcolor >= 1.1.0',
     'wrapt >= 1.11.1',
diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl
index a79444c221c..1fb148e078a 100755
--- a/tensorflow/workspace.bzl
+++ b/tensorflow/workspace.bzl
@@ -172,11 +172,11 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
         name = "eigen_archive",
         build_file = clean_dep("//third_party:eigen.BUILD"),
         patch_file = clean_dep("//third_party/eigen3:gpu_packet_math.patch"),
-        sha256 = "9edd4860b52813eaf8c023f0de1767ec58e2d67a290b718e6702469208ac5be1",
-        strip_prefix = "eigen-eigen-54bca9936424",
+        sha256 = "65d732985b593b553c20566e1f236f48dcc626730c418aed7b2aa1d0e3f1a0af",
+        strip_prefix = "eigen-4e696901f873a2347f76d931cf2f701e31e15d05",
         urls = [
-            "https://storage.googleapis.com/mirror.tensorflow.org/bitbucket.org/eigen/eigen/get/54bca9936424.tar.gz",
-            "https://bitbucket.org/eigen/eigen/get/54bca9936424.tar.gz",
+            "https://storage.googleapis.com/mirror.tensorflow.org/gitlab.com/libeigen/eigen/-/archive/4e696901f873a2347f76d931cf2f701e31e15d05/eigen-4e696901f873a2347f76d931cf2f701e31e15d05.tar.gz",
+            "https://gitlab.com/libeigen/eigen/-/archive/4e696901f873a2347f76d931cf2f701e31e15d05/eigen-4e696901f873a2347f76d931cf2f701e31e15d05.tar.gz",
         ],
     )
 
@@ -848,6 +848,16 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""):
         ],
     )
 
+    tf_http_archive(
+        name = "rules_cc",
+        sha256 = "cf3b76a90c86c0554c5b10f4b160f05af71d252026b71362c4674e2fb9936cf9",
+        strip_prefix = "rules_cc-01d4a48911d5e7591ecb1c06d3b8af47fe872371",
+        urls = [
+            "https://storage.googleapis.com/mirror.tensorflow.org/github.com/bazelbuild/rules_cc/archive/01d4a48911d5e7591ecb1c06d3b8af47fe872371.zip",
+            "https://github.com/bazelbuild/rules_cc/archive/01d4a48911d5e7591ecb1c06d3b8af47fe872371.zip",
+        ],
+    )
+
     tf_http_archive(
         name = "build_bazel_rules_android",
         sha256 = "cd06d15dd8bb59926e4d65f9003bfc20f9da4b2519985c27e190cddc8b7a7806",
diff --git a/third_party/mlir/BUILD b/third_party/mlir/BUILD
index 26e03c46df9..1740ffc8641 100644
--- a/third_party/mlir/BUILD
+++ b/third_party/mlir/BUILD
@@ -158,6 +158,7 @@ cc_library(
         "lib/Pass/PassDetail.h",
         "lib/Pass/PassManagerOptions.cpp",
         "lib/Pass/PassRegistry.cpp",
+        "lib/Pass/PassStatistics.cpp",
         "lib/Pass/PassTiming.cpp",
     ],
     hdrs = [
@@ -454,6 +455,7 @@ cc_library(
     includes = ["include"],
     deps = [
         ":CallOpInterfacesIncGen",
+        ":CommonFolders",
         ":IR",
         ":StandardOpsIncGen",
         ":Support",
@@ -484,6 +486,7 @@ cc_library(
     deps = [
         ":EDSC",
         ":IR",
+        ":StandardOps",
         ":Support",
         ":VectorOpsIncGen",
         ":VectorTransformPatterns",
@@ -698,6 +701,7 @@ cc_library(
         ":NVVMDialect",
         ":Pass",
         ":Transforms",
+        "@llvm//:support",
     ],
     alwayslink = 1,
 )
@@ -761,6 +765,7 @@ cc_library(
         ":SPIRVDialect",
         ":SPIRVLowering",
         ":StandardToSPIRVConversions",
+        ":Support",
         ":Transforms",
     ],
     alwayslink = 1,
@@ -949,9 +954,11 @@ filegroup(
     srcs = [
         "include/mlir/Analysis/CallInterfaces.td",
         "include/mlir/Dialect/SPIRV/SPIRVArithmeticOps.td",
+        "include/mlir/Dialect/SPIRV/SPIRVAtomicOps.td",
         "include/mlir/Dialect/SPIRV/SPIRVBase.td",
         "include/mlir/Dialect/SPIRV/SPIRVBitOps.td",
         "include/mlir/Dialect/SPIRV/SPIRVCastOps.td",
+        "include/mlir/Dialect/SPIRV/SPIRVCompositeOps.td",
         "include/mlir/Dialect/SPIRV/SPIRVControlFlowOps.td",
         "include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td",
         "include/mlir/Dialect/SPIRV/SPIRVGroupOps.td",
@@ -1281,7 +1288,6 @@ cc_library(
         "lib/Transforms/LoopTiling.cpp",
         "lib/Transforms/LoopUnroll.cpp",
         "lib/Transforms/LoopUnrollAndJam.cpp",
-        "lib/Transforms/MaterializeVectors.cpp",
         "lib/Transforms/MemRefDataFlowOpt.cpp",
         "lib/Transforms/PipelineDataTransfer.cpp",
         "lib/Transforms/SimplifyAffineStructures.cpp",
@@ -1312,6 +1318,20 @@ cc_library(
     alwayslink = 1,
 )
 
+cc_library(
+    name = "CommonFolders",
+    srcs = [
+    ],
+    hdrs = [
+        "include/mlir/Dialect/CommonFolders.h",
+    ],
+    includes = ["include"],
+    deps = [
+        ":IR",
+        "@llvm//:support",
+    ],
+)
+
 cc_library(
     name = "LoopsToGPU",
     srcs = [
diff --git a/third_party/mlir/bindings/python/pybind.cpp b/third_party/mlir/bindings/python/pybind.cpp
index b1be0d21336..7d3ac044391 100644
--- a/third_party/mlir/bindings/python/pybind.cpp
+++ b/third_party/mlir/bindings/python/pybind.cpp
@@ -110,6 +110,10 @@ struct PythonValueHandle {
     return ValueHandle::create<CallIndirectOp>(value, argValues);
   }
 
+  PythonType type() const {
+    return PythonType(value.getType().getAsOpaquePointer());
+  }
+
   mlir::edsc::ValueHandle value;
 };
 
@@ -153,7 +157,7 @@ struct PythonMLIRModule {
   PythonMLIRModule()
       : mlirContext(),
         module(mlir::ModuleOp::create(mlir::UnknownLoc::get(&mlirContext))),
-        moduleManager(*module) {}
+        symbolTable(*module) {}
 
   PythonType makeMemRefType(PythonType elemType, std::vector<int64_t> sizes) {
     return ::makeMemRefType(mlir_context_t{&mlirContext}, elemType,
@@ -270,7 +274,7 @@ struct PythonMLIRModule {
   }
 
   PythonFunction getNamedFunction(const std::string &name) {
-    return moduleManager.lookupSymbol<FuncOp>(name);
+    return symbolTable.lookup<FuncOp>(name);
   }
 
   PythonFunctionContext
@@ -282,7 +286,7 @@ private:
   mlir::MLIRContext mlirContext;
   // One single module in a python-exposed MLIRContext for now.
   mlir::OwningModuleRef module;
-  mlir::ModuleManager moduleManager;
+  mlir::SymbolTable symbolTable;
 
   // An execution engine and an associated target machine. The latter must
   // outlive the former since it may be used by the transformation layers.
@@ -692,7 +696,7 @@ PythonMLIRModule::declareFunction(const std::string &name,
       UnknownLoc::get(&mlirContext), name,
       mlir::Type::getFromOpaquePointer(funcType).cast<FunctionType>(), attrs,
       inputAttrs);
-  moduleManager.insert(func);
+  symbolTable.insert(func);
   return func;
 }
 
@@ -951,7 +955,7 @@ PYBIND11_MODULE(pybind, m) {
       .def("affine_constant_map", &PythonMLIRModule::affineConstantMap,
            "Returns an affine map with single constant result.")
       .def("affine_map", &PythonMLIRModule::affineMap, "Returns an affine map.",
-           py::arg("dimCount"), py::arg("symbolCount"), py::arg("resuls"))
+           py::arg("dimCount"), py::arg("symbolCount"), py::arg("results"))
       .def("__str__", &PythonMLIRModule::getIR,
            "Get the string representation of the module");
 
@@ -1034,7 +1038,8 @@ PYBIND11_MODULE(pybind, m) {
         .def("__or__",
              [](PythonValueHandle lhs, PythonValueHandle rhs)
                  -> PythonValueHandle { return lhs.value || rhs.value; })
-        .def("__call__", &PythonValueHandle::call);
+        .def("__call__", &PythonValueHandle::call)
+        .def("type", &PythonValueHandle::type);
   }
 
   py::class_<PythonBlockAppender>(
diff --git a/third_party/mlir/bindings/python/test/test_py2and3.py b/third_party/mlir/bindings/python/test/test_py2and3.py
index 678e5023173..7849b08f19b 100644
--- a/third_party/mlir/bindings/python/test/test_py2and3.py
+++ b/third_party/mlir/bindings/python/test/test_py2and3.py
@@ -19,6 +19,7 @@
 import google_mlir.bindings.python.pybind as E
 import inspect
 
+
 # Prints `str` prefixed by the current test function name so we can use it in
 # Filecheck label directives.
 # This is achieved by inspecting the stack and getting the parent name.
@@ -26,6 +27,7 @@ def printWithCurrentFunctionName(str):
   print(inspect.stack()[1][3])
   print(str)
 
+
 class EdscTest:
 
   def setUp(self):
@@ -104,8 +106,9 @@ class EdscTest:
 
   def testBooleanOps(self):
     self.setUp()
-    with self.module.function_context(
-        "booleans", [self.boolType for _ in range(4)], []) as fun:
+    with self.module.function_context("booleans",
+                                      [self.boolType for _ in range(4)],
+                                      []) as fun:
       i, j, k, l = (fun.arg(x) for x in range(4))
       stmt1 = (i < j) & (j >= k)
       stmt2 = ~(stmt1 | (k == l))
@@ -471,15 +474,16 @@ class EdscTest:
   def testMatrixMultiply(self):
     self.setUp()
     memrefType = self.module.make_memref_type(self.f32Type, [32, 32])
-    with self.module.function_context(
-        "matmul", [memrefType, memrefType, memrefType], []) as fun:
+    with self.module.function_context("matmul",
+                                      [memrefType, memrefType, memrefType],
+                                      []) as fun:
       A = E.IndexedValue(fun.arg(0))
       B = E.IndexedValue(fun.arg(1))
       C = E.IndexedValue(fun.arg(2))
       c0 = E.constant_index(0)
       c32 = E.constant_index(32)
-      with E.LoopNestContext([c0, c0, c0], [c32, c32, c32], [1, 1, 1]) as (i, j,
-                                                                           k):
+      with E.LoopNestContext([c0, c0, c0], [c32, c32, c32],
+                             [1, 1, 1]) as (i, j, k):
         C.store([i, j], A.load([i, k]) * B.load([k, j]))
       E.ret([])
       printWithCurrentFunctionName(str(fun))
@@ -520,19 +524,33 @@ class EdscTest:
     # CHECK-LABEL: testSelectOp
     #       CHECK:  %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : i32
 
+  def testType(self):
+    self.setUp()
+    printWithCurrentFunctionName("")
+    with self.module.function_context(
+        "foo", [self.module.make_memref_type(self.f32Type, [10])], []) as fun:
+      c42 = E.constant_int(42, 32)
+      print(str(c42.type()))
+      print(str(fun.arg(0).type()))
+    # CHECK-LABEL: testType
+    #       CHECK:    i32
+    #       CHECK:    memref<10xf32>
+
 
 # Until python 3.6 this cannot be used because the order in the dict is not the
 # order of method declaration.
 def runTests():
+
   def isTest(attr):
     return inspect.ismethod(attr) and "EdscTest.setUp " not in str(attr)
 
   edscTest = EdscTest()
-  tests = sorted(filter(isTest,
-                        (getattr(edscTest, attr) for attr in dir(edscTest))),
-                 key = lambda x : str(x))
+  tests = sorted(
+      filter(isTest, (getattr(edscTest, attr) for attr in dir(edscTest))),
+      key=lambda x: str(x))
   for test in tests:
     test()
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
   runTests()
diff --git a/third_party/mlir/g3doc/ConversionToLLVMDialect.md b/third_party/mlir/g3doc/ConversionToLLVMDialect.md
index fc0572fce98..595049ad440 100644
--- a/third_party/mlir/g3doc/ConversionToLLVMDialect.md
+++ b/third_party/mlir/g3doc/ConversionToLLVMDialect.md
@@ -90,6 +90,23 @@ memref<10x?x42x?x123 x f32> -> !llvm.type<"{ float*, float*, i64, [5 x i64], [5
 memref<1x? x vector<4xf32>> -> !llvm.type<"{ <4 x float>*, <4 x float>*, i64, [1 x i64], [1 x i64] }">
 ```
 
+If the rank of the memref is unknown at compile time, the Memref is converted to
+an unranked descriptor that contains: 1. a 64-bit integer representing the
+dynamic rank of the memref, followed by 2. a pointer to a ranked memref
+descriptor with the contents listed above.
+
+Dynamic ranked memrefs should be used only to pass arguments to external library
+calls that expect a unified memref type. The called functions can parse any
+unranked memref descriptor by reading the rank and parsing the enclosed ranked
+descriptor pointer.
+
+Examples:
+
+```mlir {.mlir}
+// unranked descriptor
+memref<*xf32> -> !llvm.type<"{i64, i8*}">
+```
+
 ### Function Types
 
 Function types get converted to LLVM function types. The arguments are converted
diff --git a/third_party/mlir/g3doc/DeclarativeRewrites.md b/third_party/mlir/g3doc/DeclarativeRewrites.md
index c7276daccd8..5adcb320983 100644
--- a/third_party/mlir/g3doc/DeclarativeRewrites.md
+++ b/third_party/mlir/g3doc/DeclarativeRewrites.md
@@ -50,7 +50,7 @@ features:
 *   Matching and generating ops with block arguments.
 *   Matching multi-result ops in nested patterns.
 *   Matching and generating variadic operand/result ops in nested patterns.
-*   Packing and unpacking variaidc operands/results during generation.
+*   Packing and unpacking variadic operands/results during generation.
 *   [`NativeCodeCall`](#native-code-call-transforming-the-generated-op)
     returning more than one results.
 
@@ -259,9 +259,9 @@ def : Pat<(AOp $input, $attr), (COp (AOp $input, $attr) $attr)>;
 
 `AOp` is generated via a nested result pattern; DRR won't be able to deduce the
 result type for it. A custom builder for `AOp` should be defined and it should
-deduce the result type by itself. The builder should have the a separate
-parameter for each operand and attribute and deduce the result type internally
-by itself. For example, for the above `AOp`, a possible builder is:
+deduce the result type by itself. The builder should have the separate parameter
+for each operand and attribute and deduce the result type internally by itself.
+For example, for the above `AOp`, a possible builder is:
 
 ```c++
 
@@ -311,9 +311,10 @@ def DOp : Op<"d_op"> {
 def : Pat<(AOp $input, $ignored_attr), (DOp (BOp:$b_result) $b_result)>;
 ```
 
-In this pattern, a `AOp` is matched and replaced with a `DOp` whose two operands
-are from the result of a single `BOp`. This is only possible by binding the
-result of the `BOp` to a name and reuse it for the second operand of the `DOp`
+In this pattern, an `AOp` is matched and replaced with a `DOp` whose two
+operands are from the result of a single `BOp`. This is only possible by binding
+the result of the `BOp` to a name and reuse it for the second operand of the
+`DOp`
 
 #### `NativeCodeCall`: transforming the generated op
 
diff --git a/third_party/mlir/g3doc/Diagnostics.md b/third_party/mlir/g3doc/Diagnostics.md
index 457cab85932..0c6ef7a24fa 100644
--- a/third_party/mlir/g3doc/Diagnostics.md
+++ b/third_party/mlir/g3doc/Diagnostics.md
@@ -197,6 +197,55 @@ destroyed.
 }  // The diagnostic is automatically reported here.
 ```
 
+## Diagnostic Configuration Options
+
+Several options are provided to help control and enhance the behavior of
+diagnostics. These options are listed below:
+
+### Print Operation On Diagnostic
+
+Command Line Flag: `-mlir-print-op-on-diagnostic`
+
+When a diagnostic is emitted on an operation, via `Operation::emitError/...`,
+the textual form of that operation is printed and attached as a note to the
+diagnostic. This option is useful for understanding the current form of an
+operation that may be invalid, especially when debugging verifier failures. An
+example output is shown below:
+
+```shell
+test.mlir:3:3: error: 'module_terminator' op expects parent op 'module'
+  "module_terminator"() : () -> ()
+  ^
+test.mlir:3:3: note: see current operation: "module_terminator"() : () -> ()
+  "module_terminator"() : () -> ()
+  ^
+```
+
+### Print StackTrace On Diagnostic
+
+Command Line Flag: `-mlir-print-stacktrace-on-diagnostic`
+
+When a diagnostic is emitted, attach the current stack trace as a note to the
+diagnostic. This option is useful for understanding which part of the compiler
+generated certain diagnostics. An example output is shown below:
+
+```shell
+test.mlir:3:3: error: 'module_terminator' op expects parent op 'module'
+  "module_terminator"() : () -> ()
+  ^
+test.mlir:3:3: note: diagnostic emitted with trace:
+ #0 0x000055dd40543805 llvm::sys::PrintStackTrace(llvm::raw_ostream&) llvm/lib/Support/Unix/Signals.inc:553:11
+ #1 0x000055dd3f8ac162 emitDiag(mlir::Location, mlir::DiagnosticSeverity, llvm::Twine const&) /lib/IR/Diagnostics.cpp:292:7
+ #2 0x000055dd3f8abe8e mlir::emitError(mlir::Location, llvm::Twine const&) /lib/IR/Diagnostics.cpp:304:10
+ #3 0x000055dd3f998e87 mlir::Operation::emitError(llvm::Twine const&) /lib/IR/Operation.cpp:324:29
+ #4 0x000055dd3f99d21c mlir::Operation::emitOpError(llvm::Twine const&) /lib/IR/Operation.cpp:652:10
+ #5 0x000055dd3f96b01c mlir::OpTrait::HasParent<mlir::ModuleOp>::Impl<mlir::ModuleTerminatorOp>::verifyTrait(mlir::Operation*) /mlir/IR/OpDefinition.h:897:18
+ #6 0x000055dd3f96ab38 mlir::Op<mlir::ModuleTerminatorOp, mlir::OpTrait::ZeroOperands, mlir::OpTrait::ZeroResult, mlir::OpTrait::HasParent<mlir::ModuleOp>::Impl, mlir::OpTrait::IsTerminator>::BaseVerifier<mlir::OpTrait::HasParent<mlir::ModuleOp>::Impl<mlir::ModuleTerminatorOp>, mlir::OpTrait::IsTerminator<mlir::ModuleTerminatorOp> >::verifyTrait(mlir::Operation*) /mlir/IR/OpDefinition.h:1052:29
+ #  ...
+  "module_terminator"() : () -> ()
+  ^
+```
+
 ## Common Diagnostic Handlers
 
 To interface with the diagnostics infrastructure, users will need to register a
diff --git a/third_party/mlir/g3doc/Dialects/Affine.md b/third_party/mlir/g3doc/Dialects/Affine.md
index 60494570d0c..11cb93c10ea 100644
--- a/third_party/mlir/g3doc/Dialects/Affine.md
+++ b/third_party/mlir/g3doc/Dialects/Affine.md
@@ -66,9 +66,12 @@ function, a value defined at the top level of that function (outside of all
 loops and if operations), the result of a
 [`constant` operation](Standard.md#constant-operation), or the result of an
 [`affine.apply` operation](#affineapply-operation) that recursively takes as
-arguments any symbolic identifiers. Dimensions may be bound not only to anything
-that a symbol is bound to, but also to induction variables of enclosing
-[`affine.for` operations](#affinefor-operation), and the result of an
+arguments any symbolic identifiers, or the result of a [`dim`
+operation](Standard.md#dim-operation) on either a memref that is a function
+argument or a memref where the corresponding dimension is either static or a
+dynamic one in turn bound to a symbolic identifier.  Dimensions may be bound not
+only to anything that a symbol is bound to, but also to induction variables of
+enclosing [`affine.for` operations](#affinefor-operation), and the result of an
 [`affine.apply` operation](#affineapply-operation) (which recursively may use
 other dimensions and symbols).
 
diff --git a/third_party/mlir/g3doc/Dialects/GPU.md b/third_party/mlir/g3doc/Dialects/GPU.md
index b1cc30e510f..bcb677d7660 100644
--- a/third_party/mlir/g3doc/Dialects/GPU.md
+++ b/third_party/mlir/g3doc/Dialects/GPU.md
@@ -12,6 +12,28 @@ manipulations to launch a GPU kernel and provide a simple path towards GPU
 execution from MLIR. It may be targeted, for example, by DSLs using MLIR. The
 dialect uses `gpu` as its canonical prefix.
 
+## Memory attribution
+
+Memory buffers are defined at the function level, either in "gpu.launch" or in
+"gpu.func" ops. This encoding makes it clear where the memory belongs and makes
+the lifetime of the memory visible. The memory is only accessible while the
+kernel is launched/the function is currently invoked. The latter is more strict
+than actual GPU implementations but using static memory at the function level is
+just for convenience. It is also always possible to pass pointers to the
+workgroup memory into other functions, provided they expect the correct memory
+space.
+
+The buffers are considered live throughout the execution of the GPU function
+body. The absence of memory attribution syntax means that the function does not
+require special buffers. Rationale: although the underlying models declare
+memory buffers at the module level, we chose to do it at the function level to
+provide some structuring for the lifetime of those buffers; this avoids the
+incentive to use the buffers for communicating between different kernels or
+launches of the same kernel, which should be done through function arguments
+instead; we chose not to use `alloca`-style approach that would require more
+complex lifetime analysis following the principles of MLIR that promote
+structure and representing analysis results in the IR.
+
 ## Operations
 
 ### `gpu.block_dim`
@@ -47,155 +69,6 @@ Example:
   %gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)
 ```
 
-### `gpu.func`
-
-Defines a function that can be executed on a GPU. This supports memory
-attribution and its body has a particular execution model.
-
-GPU functions are either kernels (as indicated by the `kernel` attribute) or
-regular functions. The former can be launched from the host side, while the
-latter are device side only.
-
-The memory attribution defines SSA values that correspond to memory buffers
-allocated in the memory hierarchy of the GPU (see below).
-
-The operation has one attached region that corresponds to the body of the
-function. The region arguments consist of the function arguments without
-modification, followed by buffers defined in memory annotations. The body of a
-GPU function, when launched, is executed by multiple work items. There are no
-guarantees on the order in which work items execute, or on the connection
-between them. In particular, work items are not necessarily executed in
-lock-step. Synchronization ops such as "gpu.barrier" should be used to
-coordinate work items. Declarations of GPU functions, i.e. not having the body
-region, are not supported.
-
-#### Memory attribution
-
-Memory buffers are defined at the function level, either in "gpu.launch" or in
-"gpu.func" ops. This encoding makes it clear where the memory belongs and makes
-the lifetime of the memory visible. The memory is only accessible while the
-kernel is launched/the function is currently invoked. The latter is more strict
-than actual GPU implementations but using static memory at the function level is
-just for convenience. It is also always possible to pass pointers to the
-workgroup memory into other functions, provided they expect the correct memory
-space.
-
-The buffers are considered live throughout the execution of the GPU function
-body. The absence of memory attribution syntax means that the function does not
-require special buffers. Rationale: although the underlying models declare
-memory buffers at the module level, we chose to do it at the function level to
-provide some structuring for the lifetime of those buffers; this avoids the
-incentive to use the buffers for communicating between different kernels or
-launches of the same kernel, which should be done through function arguments
-intead; we chose not to use `alloca`-style approach that would require more
-complex lifetime analysis following the principles of MLIR that promote
-structure and representing analysis results in the IR.
-
-Syntax:
-
-``` {.ebnf}
-op ::= `gpu.func` symbol-ref-id `(` argument-list `)` (`->`
-function-result-list)?
-       memory-attribution `kernel`? function-attributes? region
-
-memory-attribution ::= (`workgroup` `(` ssa-id-and-type-list `)`)?
-                       (`private` `(` ssa-id-and-type-list `)`)?
-```
-
-Example:
-
-```mlir {.mlir}
-gpu.func @foo(%arg0: index)
-    workgroup(%workgroup: memref<32xf32, 3>)
-    private(%private: memref<1xf32, 5>)
-    kernel
-    attributes {qux: "quux"} {
-  gpu.return
-}
-```
-
-The generic form illustrates the concept
-
-```mlir {.mlir}
-"gpu.func"(%arg: index) {sym_name: "foo", kernel, qux: "quux"} ({
-^bb0(%arg0: index, %workgroup: memref<32xf32, 3>, %private: memref<1xf32, 5>):
-  "gpu.return"() : () -> ()
-}) : (index) -> ()
-```
-
-Note the non-default memory spaces used in memref types in memory-attribution.
-
-### `gpu.launch`
-
-Launch a kernel on the specified grid of thread blocks. The body of the kernel
-is defined by the single region that this operation contains. The operation
-takes at least six operands, with first three operands being grid sizes along
-x,y,z dimensions, the following three arguments being block sizes along x,y,z
-dimension, and the remaining operands are arguments of the kernel. When a
-lower-dimensional kernel is required, unused sizes must be explicitly set to
-`1`.
-
-The body region has at least _twelve_ arguments, grouped as follows:
-
--   three arguments that contain block identifiers along x,y,z dimensions;
--   three arguments that contain thread identifiers along x,y,z dimensions;
--   operands of the `gpu.launch` operation as is, including six leading operands
-    for grid and block sizes.
-
-Operations inside the body region, and any operations in the nested regions, are
-_not_ allowed to use values defined outside the _body_ region, as if this region
-was a function. If necessary, values must be passed as kernel arguments into the
-body region. Nested regions inside the kernel body are allowed to use values
-defined in their ancestor regions as long as they don't cross the kernel body
-region boundary.
-
-Syntax:
-
-``` {.ebnf}
-operation ::= `gpu.launch` `block` `(` ssa-id-list `)` `in` ssa-reassignment
-                         `threads` `(` ssa-id-list `)` `in` ssa-reassignment
-                           (`args` ssa-reassignment `:` type-list)?
-                           region attr-dict?
-ssa-reassignment ::= `(` ssa-id `=` ssa-use (`,` ssa-id `=` ssa-use)* `)`
-```
-
-Example:
-
-```mlir {.mlir}
-gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %0, %sz_by = %1, %sz_bz = %2)
-           threads(%tx, %ty, %tz) in (%sz_tx = %3, %sz_ty = %4, %sz_tz = %5)
-           args(%arg0 = %6, %arg1 = 7) : f32, memref<?xf32, 1> {
-  // Block and thread identifiers, as well as block/grid sizes are
-  // immediately usable inside body region.
-  "some_op"(%bx, %tx) : (index, index) -> ()
-  %42 = load %arg1[%bx] : memref<?xf32, 1>
-}
-
-// Generic syntax explains how the pretty syntax maps to the IR structure.
-"gpu.launch"(%cst, %cst, %c1,  // Grid sizes.
-                    %cst, %c1, %c1,   // Block sizes.
-                    %arg0, %arg1)     // Actual arguments.
-    {/*attributes*/}
-    // All sizes and identifiers have "index" size.
-    : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> () {
-// The operation passes block and thread identifiers, followed by grid and block
-// sizes, followed by actual arguments to the entry block of the region.
-^bb0(%bx : index, %by : index, %bz : index,
-     %tx : index, %ty : index, %tz : index,
-     %num_bx : index, %num_by : index, %num_bz : index,
-     %num_tx : index, %num_ty : index, %num_tz : index,
-     %arg0 : f32, %arg1 : memref<?xf32, 1>):
-  "some_op"(%bx, %tx) : (index, index) -> ()
-  %3 = "std.load"(%arg1, %bx) : (memref<?xf32, 1>, index) -> f32
-}
-```
-
-Rationale: using operation/block arguments gives analyses a clear way of
-understanding that a value has additional semantics (e.g., we will need to know
-what value corresponds to threadIdx.x for coalescing). We can recover these
-properties by analyzing the operations producing values, but it is easier just
-to have that information by construction.
-
 ### `gpu.launch_func`
 
 Launch a kernel function on the specified grid of thread blocks. `gpu.launch`
diff --git a/third_party/mlir/g3doc/Dialects/SPIR-V.md b/third_party/mlir/g3doc/Dialects/SPIR-V.md
index 82922de6d11..58bd5ee828f 100644
--- a/third_party/mlir/g3doc/Dialects/SPIR-V.md
+++ b/third_party/mlir/g3doc/Dialects/SPIR-V.md
@@ -474,7 +474,7 @@ the representational differences between SPIR-V dialect and binary format:
 Similarly, a few transformations are performed during deserialization:
 
 *   Instructions for execution environment requirements will be placed as
-    attribues on `spv.module`.
+    attributes on `spv.module`.
 *   `OpConstant*` instructions are materialized as `spv.constant` at each use
     site.
 *   `OpPhi` instructions are converted to block arguments.
diff --git a/third_party/mlir/g3doc/Dialects/Standard.md b/third_party/mlir/g3doc/Dialects/Standard.md
index ed650a56636..9d53eba328e 100644
--- a/third_party/mlir/g3doc/Dialects/Standard.md
+++ b/third_party/mlir/g3doc/Dialects/Standard.md
@@ -454,6 +454,84 @@ tensor_store %8, %10 : memref<4x?xf32, #layout, memspace0>
 
 ## Unary Operations
 
+### 'absf' operation
+
+Syntax:
+
+``` {.ebnf}
+operation ::= ssa-id `=` `absf` ssa-use `:` type
+```
+
+Examples:
+
+```mlir {.mlir}
+// Scalar absolute value.
+%a = absf %b : f64
+
+// SIMD vector element-wise absolute value.
+%f = absf %g : vector<4xf32>
+
+// Tensor element-wise absolute value.
+%x = absf %y : tensor<4x?xf8>
+```
+
+The `absf` operation computes the absolute value. It takes one operand and
+returns one result of the same type. This type may be a float scalar type, a
+vector whose element type is float, or a tensor of floats. It has no standard
+attributes.
+
+### 'ceilf' operation
+
+Syntax:
+
+``` {.ebnf}
+operation ::= ssa-id `=` `ceilf` ssa-use `:` type
+```
+
+Examples:
+
+```mlir {.mlir}
+// Scalar ceiling value.
+%a = ceilf %b : f64
+
+// SIMD vector element-wise ceiling value.
+%f = ceilf %g : vector<4xf32>
+
+// Tensor element-wise ceiling value.
+%x = ceilf %y : tensor<4x?xf8>
+```
+
+The `ceilf` operation computes the ceiling of a given value. It takes one
+operand and returns one result of the same type. This type may be a float
+scalar type, a vector whose element type is float, or a tensor of floats. It
+has no standard attributes.
+
+### 'cos' operation
+
+Syntax:
+
+``` {.ebnf}
+operation ::= ssa-id `=` `cos` ssa-use `:` type
+```
+
+Examples:
+
+```mlir {.mlir}
+// Scalar cosine value.
+%a = cos %b : f64
+
+// SIMD vector element-wise cosine value.
+%f = cos %g : vector<4xf32>
+
+// Tensor element-wise cosine value.
+%x = cos %y : tensor<4x?xf8>
+```
+
+The `cos` operation computes the cosine of a given value. It takes one operand
+and returns one result of the same type. This type may be a float scalar type,
+a vector whose element type is float, or a tensor of floats. It has no standard
+attributes.
+
 ### 'exp' operation
 
 Syntax:
@@ -479,6 +557,58 @@ The `exp` operation takes one operand and returns one result of the same type.
 This type may be a float scalar type, a vector whose element type is float, or a
 tensor of floats. It has no standard attributes.
 
+### 'negf' operation
+
+Syntax:
+
+``` {.ebnf}
+operation ::= ssa-id `=` `negf` ssa-use `:` type
+```
+
+Examples:
+
+```mlir {.mlir}
+// Scalar negation value.
+%a = negf %b : f64
+
+// SIMD vector element-wise negation value.
+%f = negf %g : vector<4xf32>
+
+// Tensor element-wise negation value.
+%x = negf %y : tensor<4x?xf8>
+```
+
+The `negf` operation computes the negation of a given value. It takes one
+operand and returns one result of the same type. This type may be a float
+scalar type, a vector whose element type is float, or a tensor of floats. It
+has no standard attributes.
+
+### 'tanh' operation
+
+Syntax:
+
+``` {.ebnf}
+operation ::= ssa-id `=` `tanh` ssa-use `:` type
+```
+
+Examples:
+
+```mlir {.mlir}
+// Scalar hyperbolic tangent value.
+%a = tanh %b : f64
+
+// SIMD vector element-wise hyperbolic tangent value.
+%f = tanh %g : vector<4xf32>
+
+// Tensor element-wise hyperbolic tangent value.
+%x = tanh %y : tensor<4x?xf8>
+```
+
+The `tanh` operation computes the hyperbolic tangent. It takes one operand and
+returns one result of the same type. This type may be a float scalar type, a
+vector whose element type is float, or a tensor of floats. It has no standard
+attributes.
+
 ## Arithmetic Operations
 
 Basic arithmetic in MLIR is specified by standard operations described in this
@@ -675,6 +805,32 @@ compiler is multithreaded, and disallowing SSA values to directly reference a
 function simplifies this
 ([rationale](../Rationale.md#multithreading-the-compiler)).
 
+### 'copysign' operation
+
+Syntax:
+
+``` {.ebnf}
+operation ::= ssa-id `=` `copysign` ssa-use `:` type
+```
+
+Examples:
+
+```mlir {.mlir}
+// Scalar copysign value.
+%a = copysign %b %c : f64
+
+// SIMD vector element-wise copysign value.
+%f = copysign %g %h : vector<4xf32>
+
+// Tensor element-wise copysign value.
+%x = copysign %y %z : tensor<4x?xf8>
+```
+
+The `copysign` returns a value with the magnitude of the first operand and the
+sign of the second operand. It takes two operands and returns one result of the
+same type. This type may be a float scalar type, a vector whose element type is
+float, or a tensor of floats. It has no standard attributes.
+
 ### 'divis' operation
 
 Signed integer division. Rounds towards zero. Treats the leading bit as sign,
@@ -756,12 +912,21 @@ Examples:
 
 // Convert to a type with more known dimensions.
 %4 = memref_cast %3 : memref<?x?xf32> to memref<4x?xf32>
+
+// Convert to a type with unknown rank.
+%5 = memref_cast %3 : memref<?x?xf32> to memref<*xf32>
+
+// Convert to a type with static rank.
+%6 = memref_cast %5 : memref<*xf32> to memref<?x?xf32>
 ```
 
 Convert a memref from one type to an equivalent type without changing any data
-elements. The source and destination types must both be memref types with the
-same element type, same mappings, same address space, and same rank. The
-operation is invalid if converting to a mismatching constant dimension.
+elements. The types are equivalent if 1. they both have the same static rank,
+same element type, same mappings, same address space. The operation is invalid
+if converting to a mismatching constant dimension, or 2. exactly one of the
+operands have an unknown rank, and they both have the same element type and same
+address space. The operation is invalid if both operands are of dynamic rank or
+if converting to a mismatching static rank.
 
 ### 'mulf' operation
 
diff --git a/third_party/mlir/g3doc/LangRef.md b/third_party/mlir/g3doc/LangRef.md
index 4e4345d450d..a810330d37e 100644
--- a/third_party/mlir/g3doc/LangRef.md
+++ b/third_party/mlir/g3doc/LangRef.md
@@ -36,8 +36,8 @@ representing many different concepts: allocating buffers, producing views to
 transform them, target-independent arithmetic, target-specific operations, and
 even arbitrary user-defined high-level operations including the
 [Module](#module) and [Function](#functions) operations. Operations may contain
-[Regions](#regions) that contain a Control Flow Graph (CFG) of
-[Blocks](#blocks), which contain operations and end with a
+[Regions](#regions) that represent a Control Flow Graph (CFG) of
+[Blocks](#blocks), that contain operations and end with a
 [terminator operation](#terminator-operations) (like branches).
 
 Here's an example of an MLIR module:
@@ -162,9 +162,10 @@ Syntax:
 // Identifiers
 bare-id ::= (letter|[_]) (letter|digit|[_$.])*
 bare-id-list ::= bare-id (`,` bare-id)*
-ssa-id ::= `%` (digit+ | ((letter|id-punct) (letter|id-punct|digit)*))
+ssa-id ::= `%` suffix-id
+suffix-id ::= (digit+ | ((letter|id-punct) (letter|id-punct|digit)*))
 
-symbol-ref-id ::= `@` (bare-id | string-literal)
+symbol-ref-id ::= `@` (suffix-id | string-literal)
 ssa-id-list ::= ssa-id (`,` ssa-id)*
 
 // Uses of an SSA value, e.g. in an operand list to an operation.
@@ -238,13 +239,17 @@ GPUs), and are required to align with the LLVM definition of these intrinsics.
 Syntax:
 
 ``` {.ebnf}
-operation ::= op-result? string-literal `(` ssa-use-list? `)`
-              (`[` successor-list `]`)? (`(` region-list `)`)?
-              attribute-dict? `:` function-type
-op-result ::= ssa-id ((`:` integer-literal) | (`,` ssa-id)*) `=`
-successor ::= caret-id (`:` bb-arg-list)?
-successor-list ::= successor (`,` successor)*
-region-list    ::= region (`,` region)*
+operation         ::= op-result-list? (generic-operation | custom-operation)
+                      trailing-location?
+generic-operation ::= string-literal '(' ssa-use-list? ')' attribute-dict?
+                      `:` function-type
+custom-operation  ::= bare-id custom-operation-format
+op-result-list    ::= op-result (`,` op-result)* `=`
+op-result         ::= ssa-id (`:` integer-literal)
+successor-list    ::= successor (`,` successor)*
+successor         ::= caret-id (`:` bb-arg-list)?
+region-list       ::= region (`,` region)*
+trailing-location ::= (`loc` `(` location `)`)?
 ```
 
 MLIR introduces a uniform concept called _operations_ to enable describing many
@@ -276,7 +281,6 @@ Example:
 // Invoke a TensorFlow function called tf.scramble with two inputs
 // and an attribute "fruit".
 %2 = "tf.scramble"(%result#0, %bar) {fruit: "banana"} : (f32, i32) -> f32
-
 ```
 
 In addition to the basic syntax above, dialects may register known operations.
@@ -374,16 +378,16 @@ func @example_fn_attr() attributes {dialectName.attrName = false}
 Syntax:
 
 ``` {.ebnf}
-block           ::= bb-label operation+
-bb-label        ::= bb-id bb-arg-list? `:`
-bb-id           ::= caret-id
-caret-id        ::= `^` bare-id
+block           ::= block-label operation+
+block-label     ::= block-id block-arg-list? `:`
+block-id        ::= caret-id
+caret-id        ::= `^` suffix-id
 ssa-id-and-type ::= ssa-id `:` type
 
 // Non-empty list of names and types.
 ssa-id-and-type-list ::= ssa-id-and-type (`,` ssa-id-and-type)*
 
-bb-arg-list ::= `(` ssa-id-and-type-list? `)`
+block-arg-list ::= `(` ssa-id-and-type-list? `)`
 ```
 
 A [block](https://en.wikipedia.org/wiki/Basic_block) is a sequential list of
@@ -444,7 +448,7 @@ The first block in the region cannot be a successor of any other block. The
 syntax for the region is as follows:
 
 ``` {.ebnf}
-region ::= `{` block+ `}`
+region ::= `{` block* `}`
 ```
 
 The function body is an example of a region: it consists of a CFG of blocks and
@@ -756,9 +760,15 @@ TODO: Need to decide on a representation for quantized integers
 Syntax:
 
 ``` {.ebnf}
-memref-type ::= `memref` `<` dimension-list-ranked tensor-memref-element-type
-                (`,` layout-specification)? |
-                (`,` memory-space)? `>`
+
+memref-type ::= ranked-memref-type | unranked-memref-type
+
+ranked-memref-type ::= `memref` `<` dimension-list-ranked tensor-memref-element-type
+                      (`,` layout-specification)? |
+                      (`,` memory-space)? `>`
+
+unranked-memref-type ::= `memref` `<*x` tensor-memref-element-type
+                         (`,` memory-space)? `>`
 
 stride-list ::= `[` (dimension (`,` dimension)*)? `]`
 strided-layout ::= `offset:` dimension `,` `strides: ` stride-list
@@ -770,9 +780,56 @@ A `memref` type is a reference to a region of memory (similar to a buffer
 pointer, but more powerful). The buffer pointed to by a memref can be allocated,
 aliased and deallocated. A memref can be used to read and write data from/to the
 memory region which it references. Memref types use the same shape specifier as
-tensor types, but do not allow unknown rank. Note that `memref<f32>`, `memref<0
-x f32>`, `memref<1 x 0 x f32>`, and `memref<0 x 1 x f32>` are all different
-types.
+tensor types. Note that `memref<f32>`, `memref<0 x f32>`, `memref<1 x 0 x f32>`,
+and `memref<0 x 1 x f32>` are all different types.
+
+A `memref` is allowed to have an unknown rank (e.g. `memref<*xf32>`). The
+purpose of unranked memrefs is to allow external library functions to receive
+memref arguments of any rank without versioning the functions based on the rank.
+Other uses of this type are disallowed or will have undefined behavior.
+
+##### Codegen of Unranked Memref
+
+Using unranked memref in codegen besides the case mentioned above is highly
+discouraged. Codegen is concerned with generating loop nests and specialized
+instructions for high-performance, unranked memref is concerned with hiding the
+rank and thus, the number of enclosing loops required to iterate over the data.
+However, if there is a need to code-gen unranked memref, one possible path is to
+cast into a static ranked type based on the dynamic rank. Another possible path
+is to emit a single while loop conditioned on a linear index and perform
+delinearization of the linear index to a dynamic array containing the (unranked)
+indices. While this is possible, it is expected to not be a good idea to perform
+this during codegen as the cost of the translations is expected to be
+prohibitive and optimizations at this level are not expected to be worthwhile.
+If expressiveness is the main concern, irrespective of performance, passing
+unranked memrefs to an external C++ library and implementing rank-agnostic logic
+there is expected to be significantly simpler.
+
+Unranked memrefs may provide expressiveness gains in the future and help bridge
+the gap with unranked tensors. Unranked memrefs will not be expected to be
+exposed to codegen but one may query the rank of an unranked memref (a special
+op will be needed for this purpose) and perform a switch and cast to a ranked
+memref as a prerequisite to codegen.
+
+Example 
+```mlir {.mlir} 
+// With static ranks, we need a function for each
+// possible argument type 
+%A = alloc() : memref<16x32xf32> 
+%B = alloc() : memref<16x32x64xf32> 
+call @helper_2D(%A) : (memref<16x32xf32>)->() 
+call @helper_3D(%B) : (memref<16x32x64xf32>)->()
+
+// With unknown rank, the functions can be unified under one unranked type 
+%A = alloc() : memref<16x32xf32>
+%B = alloc() : memref<16x32x64xf32>
+// Remove rank info
+%A_u = memref_cast %A : memref<16x32xf32> -> memref<*xf32>
+%B_u = memref_cast %B : memref<16x32x64xf32> -> memref<*xf32>
+// call same function with dynamic ranks 
+call @helper(%A_u) : (memref<*xf32>)->()
+call @helper(%B_u) : (memref<*xf32>)->() 
+```
 
 The core syntax and representation of a layout specification is a
 [semi-affine map](Dialects/Affine.md#semi-affine-maps). Additionally, syntactic
@@ -1120,7 +1177,7 @@ attribute-value ::= attribute-alias | dialect-attribute | standard-attribute
 ### Attribute Value Aliases
 
 ``` {.ebnf}
-attribute-alias ::= '#' alias-name '=' 'type' type
+attribute-alias ::= '#' alias-name '=' attribute-value
 attribute-alias ::= '#' alias-name
 ```
 
diff --git a/third_party/mlir/g3doc/OpDefinitions.md b/third_party/mlir/g3doc/OpDefinitions.md
index 25865593800..7fb0e53ea17 100644
--- a/third_party/mlir/g3doc/OpDefinitions.md
+++ b/third_party/mlir/g3doc/OpDefinitions.md
@@ -60,16 +60,17 @@ allowed in a TableGen file (typically with filename suffix `.td`) can be found
 [here][TableGenIntro]. The formal language specification can be found
 [here][TableGenRef]. _Roughly_ speaking,
 
-* TableGen `class` is similar to C++ class; it can be templated and subclassed.
-* TableGen `def` is similar to C++ object; it can be declared by specializing
-  a TableGen `class` (e.g., `def MyDef : MyClass<...>;`) or completely
-  independently (e.g., `def MyDef;`). It cannot be further templated or
-  subclassed.
-* TableGen `dag` is a dedicated type for directed graph of elements. A `dag`
-  has one operator and zero or more arguments. Its syntax is `(operator arg0,
-  arg1, argN)`. The operator can be any TableGen `def`; an argument can be
-  anything, including `dag` itself. We can have names attached to both the
-  operator and the arguments like `(MyOp:$op_name MyArg:$arg_name)`.
+*   TableGen `class` is similar to C++ class; it can be templated and
+    subclassed.
+*   TableGen `def` is similar to C++ object; it can be declared by specializing
+    a TableGen `class` (e.g., `def MyDef : MyClass<...>;`) or completely
+    independently (e.g., `def MyDef;`). It cannot be further templated or
+    subclassed.
+*   TableGen `dag` is a dedicated type for directed acyclic graph of elements. A
+    `dag` has one operator and zero or more arguments. Its syntax is `(operator
+    arg0, arg1, argN)`. The operator can be any TableGen `def`; an argument can
+    be anything, including `dag` itself. We can have names attached to both the
+    operator and the arguments like `(MyOp:$op_name MyArg:$arg_name)`.
 
 Please see the [language introduction][TableGenIntro] to learn about all the
 types and expressions supported by TableGen.
@@ -214,13 +215,13 @@ places like constraints.
 To declare a variadic operand, wrap the `TypeConstraint` for the operand with
 `Variadic<...>`.
 
-Normally operations have no variadic operands or just one variadic operand.
-For the latter case, it is easily deduce which dynamic operands are for the
-static variadic operand definition. But if an operation has more than one
-variadic operands, it would be impossible to attribute dynamic operands to the
+Normally operations have no variadic operands or just one variadic operand. For
+the latter case, it is easy to deduce which dynamic operands are for the static
+variadic operand definition. But if an operation has more than one variadic
+operands, it would be impossible to attribute dynamic operands to the
 corresponding static variadic operand definitions without further information
-from the operation. Therefore, the `SameVariadicOperandSize` trait is needed
-to indicate that all variadic operands have the same number of dynamic values.
+from the operation. Therefore, the `SameVariadicOperandSize` trait is needed to
+indicate that all variadic operands have the same number of dynamic values.
 
 #### Optional attributes
 
@@ -263,7 +264,7 @@ TODO: Design and implement more primitive constraints
 Similar to operands, results are specified inside the `dag`-typed `results`, led
 by `outs`:
 
-```tablgen
+```tablegen
 let results = (outs
   <type-constraint>:$<result-name>,
   ...
@@ -776,7 +777,7 @@ duplication, which is being worked on right now.
 ### Enum attributes
 
 Some attributes can only take values from an predefined enum, e.g., the
-comparsion kind of a comparsion op. To define such attributes, ODS provides
+comparison kind of a comparison op. To define such attributes, ODS provides
 several mechanisms: `StrEnumAttr`, `IntEnumAttr`, and `BitEnumAttr`.
 
 *   `StrEnumAttr`: each enum case is a string, the attribute is stored as a
@@ -1042,53 +1043,54 @@ possible).
 We considered the approaches of several contemporary systems and focused on
 requirements that were desirable:
 
-* Ops registered using a registry separate from C++ code.
-  * Unknown ops are allowed in MLIR, so ops need not be registered. The
-    ability of the compiler to optimize those ops or graphs containing those
-    ops is constrained but correct.
-  * The current proposal does not include a runtime op description, but it
-    does not preclude such description, it can be added later.
-  * The op registry is essential for generating C++ classes that make
-    manipulating ops, verifying correct construction etc. in C++ easier by
-    providing a typed representation and accessors.
-* The op registry will be defined in
-  [TableGen](https://llvm.org/docs/TableGen/index.html) and be used to
-  generate C++ classes and utility functions
-  (builder/verifier/parser/printer).
-  * TableGen is a modelling specification language used by LLVM's backends
-    and fits in well with trait based modelling. This is an implementation
-    decision and there are alternative ways of doing this. But the
-    specification language is good for the requirements of modelling the
-    traits (as seen from usage in LLVM processor backend modelling) and easy
-    to extend, so a practical choice. If another good option comes up, we
-    will consider it.
-* MLIR allows both defined and undefined ops.
-  * Defined ops should have fixed semantics and could have a corresponding
-    reference implementation defined using, for example, EDSC.
-  * Dialects are under full control of the dialect owner and normally live
-    with the framework of the dialect.
-* The op's traits (e.g., commutative) are modelled along with the op in
-  the registry.
-* The op's operand/return type constraints are modelled along with the op in
-  the registry (see [Shape inference](#shape-inference) discussion below),
-  this allows (e.g.) optimized concise syntax in textual dumps.
-* Behavior of the op is documented along with the op with a summary and a
-  description. The description is written in markdown and extracted for
-  inclusion in the generated LangRef section of the dialect.
-* The generic assembly form of printing and parsing is available as normal,
-  but a custom parser and printer can either be specified or automatically
-  generated from an optional string representation showing the mapping of the
-  "assembly" string to operands/type.
-  * Parser-level remappings (e.g., `eq` to enum) will be supported as part
-    of the parser generation.
-* Matching patterns are specified separately from the op description.
-  * Contrasted with LLVM there is no "base" set of ops that every backend
-    needs to be aware of. Instead there are many different dialects and the
-    transformations/legalizations between these dialects form a graph of
-    transformations.
-* Reference implementation may be provided along with the op definition.
-  * The reference implementation may be in terms of either standard ops or
-    other reference implementations.
+*   Ops registered using a registry separate from C++ code.
+    *   Unknown ops are allowed in MLIR, so ops need not be registered. The
+        ability of the compiler to optimize those ops or graphs containing those
+        ops is constrained but correct.
+    *   The current proposal does not include a runtime op description, but it
+        does not preclude such description, it can be added later.
+    *   The op registry is essential for generating C++ classes that make
+        manipulating ops, verifying correct construction etc. in C++ easier by
+        providing a typed representation and accessors.
+*   The op registry will be defined in
+    [TableGen](https://llvm.org/docs/TableGen/index.html) and be used to
+    generate C++ classes and utility functions
+    (builder/verifier/parser/printer).
+    *   TableGen is a modelling specification language used by LLVM's backends
+        and fits in well with trait-based modelling. This is an implementation
+        decision and there are alternative ways of doing this. But the
+        specification language is good for the requirements of modelling the
+        traits (as seen from usage in LLVM processor backend modelling) and easy
+        to extend, so a practical choice. If another good option comes up, we
+        will consider it.
+*   MLIR allows both defined and undefined ops.
+    *   Defined ops should have fixed semantics and could have a corresponding
+        reference implementation defined using, for example, EDSC.
+    *   Dialects are under full control of the dialect owner and normally live
+        with the framework of the dialect.
+*   The op's traits (e.g., commutative) are modelled along with the op in the
+    registry.
+*   The op's operand/return type constraints are modelled along with the op in
+    the registry (see [Shape inference](#shape-inference) discussion below),
+    this allows (e.g.) optimized concise syntax in textual dumps.
+*   Behavior of the op is documented along with the op with a summary and a
+    description. The description is written in markdown and extracted for
+    inclusion in the generated LangRef section of the dialect.
+*   The generic assembly form of printing and parsing is available as normal,
+    but a custom parser and printer can either be specified or automatically
+    generated from an optional string representation showing the mapping of the
+    "assembly" string to operands/type.
+    *   Parser-level remappings (e.g., `eq` to enum) will be supported as part
+        of the parser generation.
+*   Matching patterns are specified separately from the op description.
+    *   Contrasted with LLVM there is no "base" set of ops that every backend
+        needs to be aware of. Instead there are many different dialects and the
+        transformations/legalizations between these dialects form a graph of
+        transformations.
+*   Reference implementation may be provided along with the op definition.
+
+    *   The reference implementation may be in terms of either standard ops or
+        other reference implementations.
 
     TODO: document expectation if the dependent op's definition changes.
 
diff --git a/third_party/mlir/g3doc/Tutorials/Toy/Ch-2.md b/third_party/mlir/g3doc/Tutorials/Toy/Ch-2.md
index d797624ed72..ce46788f4ae 100755
--- a/third_party/mlir/g3doc/Tutorials/Toy/Ch-2.md
+++ b/third_party/mlir/g3doc/Tutorials/Toy/Ch-2.md
@@ -434,7 +434,7 @@ invariants of the operation have already been verified:
 ```tablegen
 def ConstantOp : Toy_Op<"constant", [NoSideEffect]> {
   // Provide a summary and description for this operation. This can be used to
-  // auto-generate documenatation of the operations within our dialect.
+  // auto-generate documentation of the operations within our dialect.
   let summary = "constant operation";
   let description = [{
     Constant operation turns a literal into an SSA value. The data is attached
diff --git a/third_party/mlir/g3doc/Tutorials/Toy/Ch-5.md b/third_party/mlir/g3doc/Tutorials/Toy/Ch-5.md
index 2cf5e59d1b3..5573354aef1 100644
--- a/third_party/mlir/g3doc/Tutorials/Toy/Ch-5.md
+++ b/third_party/mlir/g3doc/Tutorials/Toy/Ch-5.md
@@ -118,8 +118,8 @@ struct TransposeOpLowering : public mlir::ConversionPattern {
           // This allows for using the nice named accessors that are generated
           // by the ODS. This adaptor is automatically provided by the ODS
           // framework.
-          TransposeOpOperandAdaptor tranposeAdaptor(memRefOperands);
-          mlir::Value *input = tranposeAdaptor.input();
+          TransposeOpOperandAdaptor transposeAdaptor(memRefOperands);
+          mlir::Value *input = transposeAdaptor.input();
 
           // Transpose the elements by generating a load from the reverse
           // indices.
diff --git a/third_party/mlir/g3doc/WritingAPass.md b/third_party/mlir/g3doc/WritingAPass.md
index 1e4564aa21d..784757139d3 100644
--- a/third_party/mlir/g3doc/WritingAPass.md
+++ b/third_party/mlir/g3doc/WritingAPass.md
@@ -122,7 +122,7 @@ An analysis may provide additional hooks to control various behavior:
 
 Given a preserved analysis set, the analysis returns true if it should truly be
 invalidated. This allows for more fine-tuned invalidation in cases where an
-analysis wasn't explicitly marked preserved, but may be preserved(or
+analysis wasn't explicitly marked preserved, but may be preserved (or
 invalidated) based upon other properties such as analyses sets.
 
 ### Querying Analyses
@@ -319,10 +319,10 @@ program has been run through the passes. This provides several benefits:
 
 ## Pass Registration
 
-Briefly shown in the example definitions of the various
-pass types is the `PassRegistration` class. This is a utility to
-register derived pass classes so that they may be created, and inspected, by
-utilities like mlir-opt. Registering a pass class takes the form:
+Briefly shown in the example definitions of the various pass types is the
+`PassRegistration` class. This is a utility to register derived pass classes so
+that they may be created, and inspected, by utilities like mlir-opt. Registering
+a pass class takes the form:
 
 ```c++
 static PassRegistration<MyPass> pass("command-line-arg", "description");
@@ -469,6 +469,76 @@ struct MyPassOptions : public PassOptions<MyPassOptions> {
 static PassRegistration<MyPass, MyPassOptions> pass("my-pass", "description");
 ```
 
+## Pass Statistics
+
+Statistics are a way to keep track of what the compiler is doing and how
+effective various transformations are. It is often useful to see what effect
+specific transformations have on a particular program, and how often they
+trigger. Pass statistics are instance specific which allow for taking this a
+step further as you are able to see the effect of placing a particular
+transformation at specific places within the pass pipeline. For example, they
+help answer questions like `What happens if I run CSE again here?`.
+
+Statistics can be added to a pass by using the 'Pass::Statistic' class. This
+class takes as a constructor arguments: the parent pass, a name, and a
+description. This class acts like an unsigned integer, and may be incremented
+and updated accordingly. These statistics use the same infrastructure as
+[`llvm::Statistic`](http://llvm.org/docs/ProgrammersManual.html#the-statistic-class-stats-option)
+and thus have similar usage constraints. Collected statistics can be dumped by
+the [pass manager](#pass-manager) programmatically via
+`PassManager::enableStatistics`; or via `-pass-statistics` and
+`-pass-statistics-display` on the command line.
+
+An example is shown below:
+
+```c++
+struct MyPass : public OperationPass<MyPass> {
+  Statistic testStat{this, "testStat", "A test statistic"};
+
+  void runOnOperation() {
+    ...
+
+    // Update our statistic after some invariant was hit.
+    ++testStat;
+
+    ...
+  }
+};
+```
+
+The collected statistics may be aggregated in two types of views:
+
+A pipeline view that models the structure of the pass manager, this is the
+default view:
+
+```shell
+$ mlir-opt -pass-pipeline='func(my-pass,my-pass)' foo.mlir -pass-statistics
+
+===-------------------------------------------------------------------------===
+                         ... Pass statistics report ...
+===-------------------------------------------------------------------------===
+'func' Pipeline
+  MyPass
+    (S) 15 testStat - A test statistic
+  VerifierPass
+  MyPass
+    (S)  6 testStat - A test statistic
+  VerifierPass
+VerifierPass
+```
+
+And a list view that aggregates all instances of a specific pass together:
+
+```shell
+$ mlir-opt -pass-pipeline='func(my-pass, my-pass)' foo.mlir -pass-statistics -pass-statistics-display=list
+
+===-------------------------------------------------------------------------===
+                         ... Pass statistics report ...
+===-------------------------------------------------------------------------===
+MyPass
+  (S) 21 testStat - A test statistic
+```
+
 ## Pass Instrumentation
 
 MLIR provides a customizable framework to instrument pass execution and analysis
@@ -554,7 +624,7 @@ pipeline. This display mode is available in mlir-opt via
 `-pass-timing-display=list`.
 
 ```shell
-$ mlir-opt foo.mlir -disable-pass-threading -cse -canonicalize -convert-std-to-llvm -pass-timing -pass-timing-display=list
+$ mlir-opt foo.mlir -disable-pass-threading -pass-pipeline='func(cse,canonicalize)' -convert-std-to-llvm -pass-timing -pass-timing-display=list
 
 ===-------------------------------------------------------------------------===
                       ... Pass execution timing report ...
@@ -579,7 +649,7 @@ the most time, and can also be used to identify when analyses are being
 invalidated and recomputed. This is the default display mode.
 
 ```shell
-$ mlir-opt foo.mlir -disable-pass-threading -cse -canonicalize -convert-std-to-llvm -pass-timing
+$ mlir-opt foo.mlir -disable-pass-threading -pass-pipeline='func(cse,canonicalize)' -convert-std-to-llvm -pass-timing
 
 ===-------------------------------------------------------------------------===
                       ... Pass execution timing report ...
@@ -610,7 +680,7 @@ perceived time, or clock time, whereas the `User Time` will display the total
 cpu time.
 
 ```shell
-$ mlir-opt foo.mlir -cse -canonicalize -convert-std-to-llvm -pass-timing
+$ mlir-opt foo.mlir -pass-pipeline='func(cse,canonicalize)' -convert-std-to-llvm -pass-timing
 
 ===-------------------------------------------------------------------------===
                       ... Pass execution timing report ...
@@ -646,7 +716,7 @@ this instrumentation:
     *   Print the IR before every pass in the pipeline.
 
 ```shell
-$ mlir-opt foo.mlir -cse -print-ir-before=cse
+$ mlir-opt foo.mlir -pass-pipeline='func(cse)' -print-ir-before=cse
 
 *** IR Dump Before CSE ***
 func @simple_constant() -> (i32, i32) {
@@ -662,7 +732,28 @@ func @simple_constant() -> (i32, i32) {
     *   Print the IR after every pass in the pipeline.
 
 ```shell
-$ mlir-opt foo.mlir -cse -print-ir-after=cse
+$ mlir-opt foo.mlir -pass-pipeline='func(cse)' -print-ir-after=cse
+
+*** IR Dump After CSE ***
+func @simple_constant() -> (i32, i32) {
+  %c1_i32 = constant 1 : i32
+  return %c1_i32, %c1_i32 : i32, i32
+}
+```
+
+*   `print-ir-after-change`
+    *   Only print the IR after a pass if the pass mutated the IR. This helps to
+        reduce the number of IR dumps for "uninteresting" passes.
+    *   Note: Changes are detected by comparing a hash of the operation before
+        and after the pass. This adds additional run-time to compute the hash of
+        the IR, and in some rare cases may result in false-positives depending
+        on the collision rate of the hash algorithm used.
+    *   Note: This option should be used in unison with one of the other
+        'print-ir-after' options above, as this option alone does not enable
+        printing.
+
+```shell
+$ mlir-opt foo.mlir -pass-pipeline='func(cse,cse)' -print-ir-after=cse -print-ir-after-change
 
 *** IR Dump After CSE ***
 func @simple_constant() -> (i32, i32) {
@@ -678,7 +769,7 @@ func @simple_constant() -> (i32, i32) {
         is disabled(`-disable-pass-threading`)
 
 ```shell
-$ mlir-opt foo.mlir -disable-pass-threading -cse -print-ir-after=cse -print-ir-module-scope
+$ mlir-opt foo.mlir -disable-pass-threading -pass-pipeline='func(cse)' -print-ir-after=cse -print-ir-module-scope
 
 *** IR Dump After CSE ***  ('func' operation: @bar)
 func @bar(%arg0: f32, %arg1: f32) -> f32 {
diff --git a/third_party/mlir/include/mlir/Analysis/AffineStructures.h b/third_party/mlir/include/mlir/Analysis/AffineStructures.h
index ce49c842a40..143956e3b5c 100644
--- a/third_party/mlir/include/mlir/Analysis/AffineStructures.h
+++ b/third_party/mlir/include/mlir/Analysis/AffineStructures.h
@@ -655,7 +655,7 @@ public:
   Optional<int64_t> getConstantUpperBound(unsigned pos) const;
 
   /// Gets the lower and upper bound of the pos^th identifier treating
-  /// [0, offset) U [offset + num, symbStartPos) as dimensions and
+  /// [0, offset) U [offset + num, symStartPos) as dimensions and
   /// [symStartPos, getNumDimAndSymbolIds) as symbols. The returned
   /// multi-dimensional maps in the pair represent the max and min of
   /// potentially multiple affine expressions. The upper bound is exclusive.
@@ -664,7 +664,7 @@ public:
   std::pair<AffineMap, AffineMap>
   getLowerAndUpperBound(unsigned pos, unsigned offset, unsigned num,
                         unsigned symStartPos, ArrayRef<AffineExpr> localExprs,
-                        MLIRContext *context);
+                        MLIRContext *context) const;
 
   /// Returns true if the set can be trivially detected as being
   /// hyper-rectangular on the specified contiguous set of identifiers.
@@ -788,11 +788,13 @@ private:
 AffineExpr simplifyAffineExpr(AffineExpr expr, unsigned numDims,
                               unsigned numSymbols);
 
-/// Flattens 'expr' into 'flattenedExpr'. Returns failure if 'expr' could not be
-/// flattened (i.e., semi-affine is not yet handled). 'cst' contains constraints
-/// that connect newly introduced local identifiers to existing dimensional and
-/// symbolic identifiers. See documentation for AffineExprFlattener on how
-/// mod's and div's are flattened.
+/// Flattens 'expr' into 'flattenedExpr', which contains the coefficients of the
+/// dimensions, symbols, and additional variables that represent floor divisions
+/// of dimensions, symbols, and in turn other floor divisions.  Returns failure
+/// if 'expr' could not be flattened (i.e., semi-affine is not yet handled).
+/// 'cst' contains constraints that connect newly introduced local identifiers
+/// to existing dimensional and symbolic identifiers. See documentation for
+/// AffineExprFlattener on how mod's and div's are flattened.
 LogicalResult
 getFlattenedAffineExpr(AffineExpr expr, unsigned numDims, unsigned numSymbols,
                        llvm::SmallVectorImpl<int64_t> *flattenedExpr,
diff --git a/third_party/mlir/include/mlir/Analysis/InferTypeOpInterface.td b/third_party/mlir/include/mlir/Analysis/InferTypeOpInterface.td
index 56c7319181e..2fa1cc887ca 100644
--- a/third_party/mlir/include/mlir/Analysis/InferTypeOpInterface.td
+++ b/third_party/mlir/include/mlir/Analysis/InferTypeOpInterface.td
@@ -37,20 +37,21 @@ def InferTypeOpInterface : OpInterface<"InferTypeOpInterface"> {
   }];
 
   let methods = [
-    InterfaceMethod<
-      /*desc=*/[{Returns the return types that an op would generate.
+    StaticInterfaceMethod<
+      /*desc=*/[{Infer the return types that an op would generate.
 
       The method takes an optional location which, if set, will be used to
       report errors on. The operands and attributes correspond to those with
       which an Operation would be created (e.g., as used in Operation::create)
       and the regions of the op.
       }],
-      /*retTy=*/"SmallVector<Type, 2>",
+      /*retTy=*/"LogicalResult",
       /*methodName=*/"inferReturnTypes",
       /*args=*/(ins "llvm::Optional<Location>":$location,
-                    "ArrayRef<Value*>":$operands,
+                    "ValueRange":$operands,
                     "ArrayRef<NamedAttribute>":$attributes,
-                    "ArrayRef<Region>":$regions)
+                    "ArrayRef<Region>":$regions,
+                    "SmallVectorImpl<Type>&":$inferedReturnTypes)
     >,
     StaticInterfaceMethod<
       /*desc=*/"Returns whether two array of types are compatible result types"
diff --git a/third_party/mlir/include/mlir/Analysis/LoopAnalysis.h b/third_party/mlir/include/mlir/Analysis/LoopAnalysis.h
index 8832c1469bc..140d9e91719 100644
--- a/third_party/mlir/include/mlir/Analysis/LoopAnalysis.h
+++ b/third_party/mlir/include/mlir/Analysis/LoopAnalysis.h
@@ -57,23 +57,6 @@ llvm::Optional<uint64_t> getConstantTripCount(AffineForOp forOp);
 /// this method is thus able to determine non-trivial divisors.
 uint64_t getLargestDivisorOfTripCount(AffineForOp forOp);
 
-/// Given an induction variable `iv` of type AffineForOp and an `index` of type
-/// IndexType, returns `true` if `index` is independent of `iv` and false
-/// otherwise.
-/// The determination supports composition with at most one AffineApplyOp.
-/// The at most one AffineApplyOp comes from the fact that composition of
-/// AffineApplyOp need to be canonicalized by construction to avoid writing code
-/// that composes arbitrary numbers of AffineApplyOps everywhere. To achieve
-/// this, at the very least, the compose-affine-apply pass must have been run.
-///
-/// Prerequisites:
-///   1. `iv` and `index` of the proper type;
-///   2. at most one reachable AffineApplyOp from index;
-///
-/// Returns false in cases with more than one AffineApplyOp, this is
-/// conservative.
-bool isAccessInvariant(Value *iv, Value *index);
-
 /// Given an induction variable `iv` of type AffineForOp and `indices` of type
 /// IndexType, returns the set of `indices` that are independent of `iv`.
 ///
diff --git a/third_party/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h b/third_party/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h
index f617986cdcc..134dbf40b4d 100644
--- a/third_party/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h
+++ b/third_party/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.h
@@ -27,10 +27,12 @@
 namespace mlir {
 class SPIRVTypeConverter;
 /// Appends to a pattern list additional patterns for translating GPU Ops to
-/// SPIR-V ops.
+/// SPIR-V ops. Needs the workgroup size as input since SPIR-V/Vulkan requires
+/// the workgroup size to be statically specified.
 void populateGPUToSPIRVPatterns(MLIRContext *context,
                                 SPIRVTypeConverter &typeConverter,
-                                OwningRewritePatternList &patterns);
+                                OwningRewritePatternList &patterns,
+                                ArrayRef<int64_t> workGroupSize);
 } // namespace mlir
 
 #endif // MLIR_CONVERSION_GPUTOSPIRV_CONVERTGPUTOSPIRV_H
diff --git a/third_party/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h b/third_party/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h
index be8cad2b3d1..8f0a910c74d 100644
--- a/third_party/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h
+++ b/third_party/mlir/include/mlir/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.h
@@ -22,6 +22,8 @@
 #ifndef MLIR_CONVERSION_GPUTOSPIRV_CONVERTGPUTOSPIRVPASS_H
 #define MLIR_CONVERSION_GPUTOSPIRV_CONVERTGPUTOSPIRVPASS_H
 
+#include "mlir/Support/LLVM.h"
+
 #include <memory>
 
 namespace mlir {
@@ -29,8 +31,10 @@ namespace mlir {
 class ModuleOp;
 template <typename T> class OpPassBase;
 
-/// Pass to convert GPU Ops to SPIR-V ops.
-std::unique_ptr<OpPassBase<ModuleOp>> createConvertGPUToSPIRVPass();
+/// Pass to convert GPU Ops to SPIR-V ops.  Needs the workgroup size as input
+/// since SPIR-V/Vulkan requires the workgroup size to be statically specified.
+std::unique_ptr<OpPassBase<ModuleOp>>
+createConvertGPUToSPIRVPass(ArrayRef<int64_t> workGroupSize);
 
 } // namespace mlir
 #endif // MLIR_CONVERSION_GPUTOSPIRV_CONVERTGPUTOSPIRVPASS_H
diff --git a/third_party/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h b/third_party/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
index e50a8a548db..cef80cf0b23 100644
--- a/third_party/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
+++ b/third_party/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h
@@ -34,6 +34,9 @@ class Type;
 } // namespace llvm
 
 namespace mlir {
+
+class UnrankedMemRefType;
+
 namespace LLVM {
 class LLVMDialect;
 class LLVMType;
@@ -116,6 +119,10 @@ private:
   //   2. as many index types as memref has dynamic dimensions.
   Type convertMemRefType(MemRefType type);
 
+  // Convert an unranked memref type to an LLVM type that captures the
+  // runtime rank and a pointer to the static ranked memref desc
+  Type convertUnrankedMemRefType(UnrankedMemRefType type);
+
   // Convert a 1D vector type into an LLVM vector type.
   Type convertVectorType(VectorType type);
 
@@ -127,16 +134,47 @@ private:
   LLVM::LLVMType unwrap(Type type);
 };
 
+/// Helper class to produce LLVM dialect operations extracting or inserting
+/// values to a struct.
+class StructBuilder {
+public:
+  /// Construct a helper for the given value.
+  explicit StructBuilder(Value *v);
+  /// Builds IR creating an `undef` value of the descriptor type.
+  static StructBuilder undef(OpBuilder &builder, Location loc,
+                             Type descriptorType);
+
+  /*implicit*/ operator Value *() { return value; }
+
+protected:
+  // LLVM value
+  Value *value;
+  // Cached struct type.
+  Type structType;
+
+protected:
+  /// Builds IR to extract a value from the struct at position pos
+  Value *extractPtr(OpBuilder &builder, Location loc, unsigned pos);
+  /// Builds IR to set a value in the struct at position pos
+  void setPtr(OpBuilder &builder, Location loc, unsigned pos, Value *ptr);
+};
 /// Helper class to produce LLVM dialect operations extracting or inserting
 /// elements of a MemRef descriptor. Wraps a Value pointing to the descriptor.
 /// The Value may be null, in which case none of the operations are valid.
-class MemRefDescriptor {
+class MemRefDescriptor : public StructBuilder {
 public:
   /// Construct a helper for the given descriptor value.
   explicit MemRefDescriptor(Value *descriptor);
   /// Builds IR creating an `undef` value of the descriptor type.
   static MemRefDescriptor undef(OpBuilder &builder, Location loc,
                                 Type descriptorType);
+  /// Builds IR creating a MemRef descriptor that represents `type` and
+  /// populates it with static shape and stride information extracted from the
+  /// type.
+  static MemRefDescriptor fromStaticShape(OpBuilder &builder, Location loc,
+                                          LLVMTypeConverter &typeConverter,
+                                          MemRefType type, Value *memory);
+
   /// Builds IR extracting the allocated pointer from the descriptor.
   Value *allocatedPtr(OpBuilder &builder, Location loc);
   /// Builds IR inserting the allocated pointer into the descriptor.
@@ -153,38 +191,49 @@ public:
 
   /// Builds IR inserting the offset into the descriptor.
   void setOffset(OpBuilder &builder, Location loc, Value *offset);
+  void setConstantOffset(OpBuilder &builder, Location loc, uint64_t offset);
 
   /// Builds IR extracting the pos-th size from the descriptor.
   Value *size(OpBuilder &builder, Location loc, unsigned pos);
 
   /// Builds IR inserting the pos-th size into the descriptor
   void setSize(OpBuilder &builder, Location loc, unsigned pos, Value *size);
+  void setConstantSize(OpBuilder &builder, Location loc, unsigned pos,
+                       uint64_t size);
 
   /// Builds IR extracting the pos-th size from the descriptor.
   Value *stride(OpBuilder &builder, Location loc, unsigned pos);
 
   /// Builds IR inserting the pos-th stride into the descriptor
   void setStride(OpBuilder &builder, Location loc, unsigned pos, Value *stride);
+  void setConstantStride(OpBuilder &builder, Location loc, unsigned pos,
+                         uint64_t stride);
 
   /// Returns the (LLVM) type this descriptor points to.
   LLVM::LLVMType getElementType();
 
-  /*implicit*/ operator Value *() { return value; }
-
 private:
-  Value *extractPtr(OpBuilder &builder, Location loc, unsigned pos);
-  void setPtr(OpBuilder &builder, Location loc, unsigned pos, Value *ptr);
-
-  // Cached descriptor type.
-  Type structType;
-
   // Cached index type.
   Type indexType;
-
-  // Actual descriptor.
-  Value *value;
 };
 
+class UnrankedMemRefDescriptor : public StructBuilder {
+public:
+  /// Construct a helper for the given descriptor value.
+  explicit UnrankedMemRefDescriptor(Value *descriptor);
+  /// Builds IR creating an `undef` value of the descriptor type.
+  static UnrankedMemRefDescriptor undef(OpBuilder &builder, Location loc,
+                                        Type descriptorType);
+
+  /// Builds IR extracting the rank from the descriptor
+  Value *rank(OpBuilder &builder, Location loc);
+  /// Builds IR setting the rank in the descriptor
+  void setRank(OpBuilder &builder, Location loc, Value *value);
+  /// Builds IR extracting ranked memref descriptor ptr
+  Value *memRefDescPtr(OpBuilder &builder, Location loc);
+  /// Builds IR setting ranked memref descriptor ptr
+  void setMemRefDescPtr(OpBuilder &builder, Location loc, Value *value);
+};
 /// Base class for operation conversions targeting the LLVM IR dialect. Provides
 /// conversion patterns with an access to the containing LLVMLowering for the
 /// purpose of type conversions.
diff --git a/third_party/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h b/third_party/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h
index 98e105aa2b5..c5c17b36f5e 100644
--- a/third_party/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h
+++ b/third_party/mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h
@@ -57,25 +57,40 @@ void populateStdToLLVMConversionPatterns(LLVMTypeConverter &converter,
                                          OwningRewritePatternList &patterns);
 
 /// Creates a pass to convert the Standard dialect into the LLVMIR dialect.
-std::unique_ptr<OpPassBase<ModuleOp>> createLowerToLLVMPass();
+/// By default stdlib malloc/free are used for allocating MemRef payloads.
+/// Specifying `useAlloca-true` emits stack allocations instead. In the future
+/// this may become an enum when we have concrete uses for other options.
+std::unique_ptr<OpPassBase<ModuleOp>>
+createLowerToLLVMPass(bool useAlloca = false);
 
 /// Creates a pass to convert operations to the LLVMIR dialect.  The conversion
 /// is defined by a list of patterns and a type converter that will be obtained
 /// during the pass using the provided callbacks.
+/// By default stdlib malloc/free are used for allocating MemRef payloads.
+/// Specifying `useAlloca-true` emits stack allocations instead. In the future
+/// this may become an enum when we have concrete uses for other options.
 std::unique_ptr<OpPassBase<ModuleOp>>
 createLowerToLLVMPass(LLVMPatternListFiller patternListFiller,
-                      LLVMTypeConverterMaker typeConverterMaker);
+                      LLVMTypeConverterMaker typeConverterMaker,
+                      bool useAlloca = false);
 
 /// Creates a pass to convert operations to the LLVMIR dialect.  The conversion
 /// is defined by a list of patterns obtained during the pass using the provided
 /// callback and an optional type conversion class, an instance is created
 /// during the pass.
+/// By default stdlib malloc/free are used for allocating MemRef payloads.
+/// Specifying `useAlloca-true` emits stack allocations instead. In the future
+/// this may become an enum when we have concrete uses for other options.
 template <typename TypeConverter = LLVMTypeConverter>
 std::unique_ptr<OpPassBase<ModuleOp>>
-createLowerToLLVMPass(LLVMPatternListFiller patternListFiller) {
-  return createLowerToLLVMPass(patternListFiller, [](MLIRContext *context) {
-    return std::make_unique<TypeConverter>(context);
-  });
+createLowerToLLVMPass(LLVMPatternListFiller patternListFiller,
+                      bool useAlloca = false) {
+  return createLowerToLLVMPass(
+      patternListFiller,
+      [](MLIRContext *context) {
+        return std::make_unique<TypeConverter>(context);
+      },
+      useAlloca);
 }
 
 namespace LLVM {
diff --git a/third_party/mlir/include/mlir/Dialect/AffineOps/AffineOps.h b/third_party/mlir/include/mlir/Dialect/AffineOps/AffineOps.h
index 35dd3a29348..8d36473674b 100644
--- a/third_party/mlir/include/mlir/Dialect/AffineOps/AffineOps.h
+++ b/third_party/mlir/include/mlir/Dialect/AffineOps/AffineOps.h
@@ -70,7 +70,7 @@ public:
 
   /// Builds an affine apply op with the specified map and operands.
   static void build(Builder *builder, OperationState &result, AffineMap map,
-                    ArrayRef<Value *> operands);
+                    ValueRange operands);
 
   /// Returns the affine map to be applied by this operation.
   AffineMap getAffineMap() {
@@ -144,11 +144,10 @@ public:
   using Op::Op;
 
   static void build(Builder *builder, OperationState &result, Value *srcMemRef,
-                    AffineMap srcMap, ArrayRef<Value *> srcIndices,
-                    Value *destMemRef, AffineMap dstMap,
-                    ArrayRef<Value *> destIndices, Value *tagMemRef,
-                    AffineMap tagMap, ArrayRef<Value *> tagIndices,
-                    Value *numElements, Value *stride = nullptr,
+                    AffineMap srcMap, ValueRange srcIndices, Value *destMemRef,
+                    AffineMap dstMap, ValueRange destIndices, Value *tagMemRef,
+                    AffineMap tagMap, ValueRange tagIndices, Value *numElements,
+                    Value *stride = nullptr,
                     Value *elementsPerStride = nullptr);
 
   /// Returns the operand index of the src memref.
@@ -334,7 +333,7 @@ public:
   using Op::Op;
 
   static void build(Builder *builder, OperationState &result, Value *tagMemRef,
-                    AffineMap tagMap, ArrayRef<Value *> tagIndices,
+                    AffineMap tagMap, ValueRange tagIndices,
                     Value *numElements);
 
   static StringRef getOperationName() { return "affine.dma_wait"; }
@@ -403,13 +402,13 @@ public:
 
   /// Builds an affine load op with the specified map and operands.
   static void build(Builder *builder, OperationState &result, AffineMap map,
-                    ArrayRef<Value *> operands);
+                    ValueRange operands);
   /// Builds an affine load op with an identity map and operands.
   static void build(Builder *builder, OperationState &result, Value *memref,
-                    ArrayRef<Value *> indices = {});
+                    ValueRange indices = {});
   /// Builds an affine load op with the specified map and its operands.
   static void build(Builder *builder, OperationState &result, Value *memref,
-                    AffineMap map, ArrayRef<Value *> mapOperands);
+                    AffineMap map, ValueRange mapOperands);
 
   /// Returns the operand index of the memref.
   unsigned getMemRefOperandIndex() { return 0; }
@@ -471,12 +470,11 @@ public:
 
   /// Builds an affine store operation with the provided indices (identity map).
   static void build(Builder *builder, OperationState &result,
-                    Value *valueToStore, Value *memref,
-                    ArrayRef<Value *> indices);
+                    Value *valueToStore, Value *memref, ValueRange indices);
   /// Builds an affine store operation with the specified map and its operands.
   static void build(Builder *builder, OperationState &result,
                     Value *valueToStore, Value *memref, AffineMap map,
-                    ArrayRef<Value *> mapOperands);
+                    ValueRange mapOperands);
 
   /// Get value to be stored by store operation.
   Value *getValueToStore() { return getOperand(0); }
diff --git a/third_party/mlir/include/mlir/Dialect/AffineOps/AffineOps.td b/third_party/mlir/include/mlir/Dialect/AffineOps/AffineOps.td
index c57ee50524d..4d4060414dd 100644
--- a/third_party/mlir/include/mlir/Dialect/AffineOps/AffineOps.td
+++ b/third_party/mlir/include/mlir/Dialect/AffineOps/AffineOps.td
@@ -90,8 +90,8 @@ def AffineForOp : Affine_Op<"for",
     OpBuilder<"Builder *builder, OperationState &result, "
               "int64_t lowerBound, int64_t upperBound, int64_t step = 1">,
     OpBuilder<"Builder *builder, OperationState &result, "
-              "ArrayRef<Value *> lbOperands, AffineMap lbMap, "
-              "ArrayRef<Value *> ubOperands, AffineMap ubMap, "
+              "ValueRange lbOperands, AffineMap lbMap, "
+              "ValueRange ubOperands, AffineMap ubMap, "
               "int64_t step = 1">
   ];
 
@@ -139,10 +139,10 @@ def AffineForOp : Affine_Op<"for",
 
     /// Set lower bound. The new bound must have the same number of operands as
     /// the current bound map. Otherwise, 'replaceForLowerBound' should be used.
-    void setLowerBound(ArrayRef<Value *> operands, AffineMap map);
+    void setLowerBound(ValueRange operands, AffineMap map);
     /// Set upper bound. The new bound must not have more operands than the
     /// current bound map. Otherwise, 'replaceForUpperBound' should be used.
-    void setUpperBound(ArrayRef<Value *> operands, AffineMap map);
+    void setUpperBound(ValueRange operands, AffineMap map);
 
     /// Set the lower bound map without changing operands.
     void setLowerBoundMap(AffineMap map);
@@ -213,7 +213,7 @@ def AffineIfOp : Affine_Op<"if", [ImplicitAffineTerminator]> {
 
   let builders = [
     OpBuilder<"Builder *builder, OperationState &result, "
-              "IntegerSet set, ArrayRef<Value *> args, bool withElseRegion">
+              "IntegerSet set, ValueRange args, bool withElseRegion">
   ];
 
   let extraClassDeclaration = [{
@@ -225,7 +225,7 @@ def AffineIfOp : Affine_Op<"if", [ImplicitAffineTerminator]> {
     /// Sets the integer set with its operands. The size of 'operands' must not
     /// exceed the current number of operands for this instance, as the operands
     /// list of AffineIf is not resizable.
-    void setConditional(IntegerSet set, ArrayRef<Value *> operands);
+    void setConditional(IntegerSet set, ValueRange operands);
 
     OpBuilder getThenBodyBuilder() {
       assert(!thenRegion().empty() && "Unexpected empty 'then' region.");
diff --git a/third_party/mlir/include/mlir/Dialect/CommonFolders.h b/third_party/mlir/include/mlir/Dialect/CommonFolders.h
new file mode 100644
index 00000000000..28619ce0118
--- /dev/null
+++ b/third_party/mlir/include/mlir/Dialect/CommonFolders.h
@@ -0,0 +1,82 @@
+//===- CommonFolders.h - Common Operation Folders----------------*- C++ -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This header file declares various common operation folders. These folders
+// are intended to be used by dialects to support common folding behavior
+// without requiring each dialect to provide its own implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_COMMONFOLDERS_H
+#define MLIR_DIALECT_COMMONFOLDERS_H
+
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/StandardTypes.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+
+namespace mlir {
+/// Performs constant folding `calculate` with element-wise behavior on the two
+/// attributes in `operands` and returns the result if possible.
+template <class AttrElementT,
+          class ElementValueT = typename AttrElementT::ValueType,
+          class CalculationT =
+              llvm::function_ref<ElementValueT(ElementValueT, ElementValueT)>>
+Attribute constFoldBinaryOp(llvm::ArrayRef<Attribute> operands,
+                            const CalculationT &calculate) {
+  assert(operands.size() == 2 && "binary op takes two operands");
+  if (!operands[0] || !operands[1])
+    return {};
+  if (operands[0].getType() != operands[1].getType())
+    return {};
+
+  if (operands[0].isa<AttrElementT>() && operands[1].isa<AttrElementT>()) {
+    auto lhs = operands[0].cast<AttrElementT>();
+    auto rhs = operands[1].cast<AttrElementT>();
+
+    return AttrElementT::get(lhs.getType(),
+                             calculate(lhs.getValue(), rhs.getValue()));
+  } else if (operands[0].isa<SplatElementsAttr>() &&
+             operands[1].isa<SplatElementsAttr>()) {
+    // Both operands are splats so we can avoid expanding the values out and
+    // just fold based on the splat value.
+    auto lhs = operands[0].cast<SplatElementsAttr>();
+    auto rhs = operands[1].cast<SplatElementsAttr>();
+
+    auto elementResult = calculate(lhs.getSplatValue<ElementValueT>(),
+                                   rhs.getSplatValue<ElementValueT>());
+    return DenseElementsAttr::get(lhs.getType(), elementResult);
+  } else if (operands[0].isa<ElementsAttr>() &&
+             operands[1].isa<ElementsAttr>()) {
+    // Operands are ElementsAttr-derived; perform an element-wise fold by
+    // expanding the values.
+    auto lhs = operands[0].cast<ElementsAttr>();
+    auto rhs = operands[1].cast<ElementsAttr>();
+
+    auto lhsIt = lhs.getValues<ElementValueT>().begin();
+    auto rhsIt = rhs.getValues<ElementValueT>().begin();
+    SmallVector<ElementValueT, 4> elementResults;
+    elementResults.reserve(lhs.getNumElements());
+    for (size_t i = 0, e = lhs.getNumElements(); i < e; ++i, ++lhsIt, ++rhsIt)
+      elementResults.push_back(calculate(*lhsIt, *rhsIt));
+    return DenseElementsAttr::get(lhs.getType(), elementResults);
+  }
+  return {};
+}
+} // namespace mlir
+
+#endif // MLIR_DIALECT_COMMONFOLDERS_H
diff --git a/third_party/mlir/include/mlir/Dialect/GPU/GPUDialect.h b/third_party/mlir/include/mlir/Dialect/GPU/GPUDialect.h
index fb906b2ace5..e92cdccfaaa 100644
--- a/third_party/mlir/include/mlir/Dialect/GPU/GPUDialect.h
+++ b/third_party/mlir/include/mlir/Dialect/GPU/GPUDialect.h
@@ -61,6 +61,10 @@ public:
   /// 'gpu.kernel' attribute.
   static bool isKernel(Operation *op);
 
+  /// Returns the numeric value used to identify the workgroup memory address
+  /// space.
+  static int getWorkgroupAddressSpace() { return 3; }
+
   LogicalResult verifyOperationAttribute(Operation *op,
                                          NamedAttribute attr) override;
 };
@@ -73,74 +77,6 @@ struct KernelDim3 {
   Value *z;
 };
 
-/// GPU kernel launch operation.  Takes a 3D grid of thread blocks as leading
-/// operands, followed by kernel data operands.  Has one region representing
-/// the kernel to be executed.  This region is not allowed to use values defined
-/// outside it.
-class LaunchOp : public Op<LaunchOp, OpTrait::AtLeastNOperands<6>::Impl,
-                           OpTrait::ZeroResult, OpTrait::IsIsolatedFromAbove> {
-public:
-  using Op::Op;
-
-  static void build(Builder *builder, OperationState &result, Value *gridSizeX,
-                    Value *gridSizeY, Value *gridSizeZ, Value *blockSizeX,
-                    Value *blockSizeY, Value *blockSizeZ,
-                    ArrayRef<Value *> operands);
-
-  /// Get the kernel region.
-  Region &getBody();
-
-  /// Get the SSA values corresponding to kernel block identifiers.
-  KernelDim3 getBlockIds();
-  /// Get the SSA values corresponding to kernel thread identifiers.
-  KernelDim3 getThreadIds();
-  /// Get the SSA values corresponding to kernel grid size.
-  KernelDim3 getGridSize();
-  /// Get the SSA values corresponding to kernel block size.
-  KernelDim3 getBlockSize();
-  /// Get the operand values passed as kernel arguments.
-  operand_range getKernelOperandValues();
-  /// Get the operand types passed as kernel arguments.
-  operand_type_range getKernelOperandTypes();
-
-  /// Get the SSA values passed as operands to specify the grid size.
-  KernelDim3 getGridSizeOperandValues();
-  /// Get the SSA values passed as operands to specify the block size.
-  KernelDim3 getBlockSizeOperandValues();
-
-  /// Get the SSA values of the kernel arguments.
-  llvm::iterator_range<Block::args_iterator> getKernelArguments();
-
-  LogicalResult verify();
-
-  /// Custom syntax support.
-  void print(OpAsmPrinter &p);
-  static ParseResult parse(OpAsmParser &parser, OperationState &result);
-
-  static StringRef getOperationName() { return "gpu.launch"; }
-
-  /// Erase the `index`-th kernel argument.  Both the entry block argument and
-  /// the operand will be dropped.  The block argument must not have any uses.
-  void eraseKernelArgument(unsigned index);
-
-  /// Append canonicalization patterns to `results`.
-  static void getCanonicalizationPatterns(OwningRewritePatternList &results,
-                                          MLIRContext *context);
-
-private:
-  static StringRef getBlocksKeyword() { return "blocks"; }
-  static StringRef getThreadsKeyword() { return "threads"; }
-  static StringRef getArgsKeyword() { return "args"; }
-
-  /// The number of launch configuration operands, placed at the leading
-  /// positions of the operand list.
-  static constexpr unsigned kNumConfigOperands = 6;
-
-  /// The number of region attributes containing the launch configuration,
-  /// placed in the leading positions of the argument list.
-  static constexpr unsigned kNumConfigRegionAttributes = 12;
-};
-
 /// Operation to launch a kernel given as outlined function.
 class LaunchFuncOp : public Op<LaunchFuncOp, OpTrait::AtLeastNOperands<6>::Impl,
                                OpTrait::ZeroResult> {
@@ -150,11 +86,11 @@ public:
   static void build(Builder *builder, OperationState &result, FuncOp kernelFunc,
                     Value *gridSizeX, Value *gridSizeY, Value *gridSizeZ,
                     Value *blockSizeX, Value *blockSizeY, Value *blockSizeZ,
-                    ArrayRef<Value *> kernelOperands);
+                    ValueRange kernelOperands);
 
   static void build(Builder *builder, OperationState &result, FuncOp kernelFunc,
                     KernelDim3 gridSize, KernelDim3 blockSize,
-                    ArrayRef<Value *> kernelOperands);
+                    ValueRange kernelOperands);
 
   /// The kernel function specified by the operation's `kernel` attribute.
   StringRef kernel();
@@ -193,88 +129,6 @@ private:
   static StringRef getKernelModuleAttrName() { return "kernel_module"; }
 };
 
-class GPUFuncOp : public Op<GPUFuncOp, OpTrait::FunctionLike,
-                            OpTrait::IsIsolatedFromAbove, OpTrait::Symbol> {
-public:
-  using Op::Op;
-
-  /// Returns the name of the operation.
-  static StringRef getOperationName() { return "gpu.func"; }
-
-  /// Constructs a FuncOp, hook for Builder methods.
-  static void build(Builder *builder, OperationState &result, StringRef name,
-                    FunctionType type, ArrayRef<Type> workgroupAttributions,
-                    ArrayRef<Type> privateAttributions,
-                    ArrayRef<NamedAttribute> attrs);
-
-  /// Prints the Op in custom format.
-  void print(OpAsmPrinter &p);
-
-  /// Parses the Op in custom format.
-  static ParseResult parse(OpAsmParser &parser, OperationState &result);
-
-  /// Returns `true` if the GPU function defined by this Op is a kernel, i.e.
-  /// it is intended to be launched from host.
-  bool isKernel() {
-    return getAttrOfType<UnitAttr>(GPUDialect::getKernelFuncAttrName()) !=
-           nullptr;
-  }
-
-  /// Returns the type of the function this Op defines.
-  FunctionType getType() {
-    return getTypeAttr().getValue().cast<FunctionType>();
-  }
-
-  /// Returns the number of buffers located in the workgroup memory.
-  unsigned getNumWorkgroupAttributions() {
-    return getAttrOfType<IntegerAttr>(getNumWorkgroupAttributionsAttrName())
-        .getInt();
-  }
-
-  /// Returns a list of block arguments that correspond to buffers located in
-  /// the workgroup memory
-  ArrayRef<BlockArgument *> getWorkgroupAttributions() {
-    auto begin =
-        std::next(getBody().front().args_begin(), getType().getNumInputs());
-    auto end = std::next(begin, getNumWorkgroupAttributions());
-    return {begin, end};
-  }
-
-  /// Returns a list of block arguments that correspond to buffers located in
-  /// the private memory.
-  ArrayRef<BlockArgument *> getPrivateAttributions() {
-    auto begin =
-        std::next(getBody().front().args_begin(),
-                  getType().getNumInputs() + getNumWorkgroupAttributions());
-    return {begin, getBody().front().args_end()};
-  }
-
-private:
-  // FunctionLike trait needs access to the functions below.
-  friend class OpTrait::FunctionLike<GPUFuncOp>;
-
-  /// Hooks for the input/output type enumeration in FunctionLike .
-  unsigned getNumFuncArguments() { return getType().getNumInputs(); }
-  unsigned getNumFuncResults() { return getType().getNumResults(); }
-
-  /// Returns the name of the attribute containing the number of buffers located
-  /// in the workgroup memory.
-  static StringRef getNumWorkgroupAttributionsAttrName() {
-    return "workgroup_attibutions";
-  }
-
-  /// Returns the keywords used in the custom syntax for this Op.
-  static StringRef getWorkgroupKeyword() { return "workgroup"; }
-  static StringRef getPrivateKeyword() { return "private"; }
-  static StringRef getKernelKeyword() { return "kernel"; }
-
-  /// Hook for FunctionLike verifier.
-  LogicalResult verifyType();
-
-  /// Verifies the body of the function.
-  LogicalResult verifyBody();
-};
-
 #define GET_OP_CLASSES
 #include "mlir/Dialect/GPU/GPUOps.h.inc"
 
diff --git a/third_party/mlir/include/mlir/Dialect/GPU/GPUOps.td b/third_party/mlir/include/mlir/Dialect/GPU/GPUOps.td
index 4329084ff50..26f2b4da3a8 100644
--- a/third_party/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/third_party/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -42,6 +42,282 @@ def GPU_BlockIdOp : GPU_IndexOp<"block_id">;
 def GPU_GridDimOp : GPU_IndexOp<"grid_dim">;
 def GPU_ThreadIdOp : GPU_IndexOp<"thread_id">;
 
+def GPU_GPUFuncOp : GPU_Op<"func", [FunctionLike, IsolatedFromAbove, Symbol]> {
+  let summary = "Function executable on a GPU";
+
+  let description = [{
+    Defines a function that can be executed on a GPU. This supports memory
+    attribution and its body has a particular execution model.
+
+    GPU functions are either kernels (as indicated by the `kernel` attribute) or
+    regular functions. The former can be launched from the host side, while the
+    latter are device side only.
+
+    The memory attribution defines SSA values that correspond to memory buffers
+    allocated in the memory hierarchy of the GPU (see below).
+
+    The operation has one attached region that corresponds to the body of the
+    function. The region arguments consist of the function arguments without
+    modification, followed by buffers defined in memory annotations. The body of
+    a GPU function, when launched, is executed by multiple work items. There are
+    no guarantees on the order in which work items execute, or on the connection
+    between them. In particular, work items are not necessarily executed in
+    lock-step. Synchronization ops such as "gpu.barrier" should be used to
+    coordinate work items. Declarations of GPU functions, i.e. not having the
+    body region, are not supported.
+
+    Syntax:
+
+    ``` {.ebnf}
+    op ::= `gpu.func` symbol-ref-id `(` argument-list `)` (`->`
+    function-result-list)?
+           memory-attribution `kernel`? function-attributes? region
+
+    memory-attribution ::= (`workgroup` `(` ssa-id-and-type-list `)`)?
+                           (`private` `(` ssa-id-and-type-list `)`)?
+    ```
+
+    Example:
+
+    ```mlir
+    gpu.func @foo(%arg0: index)
+        workgroup(%workgroup: memref<32xf32, 3>)
+        private(%private: memref<1xf32, 5>)
+        kernel
+        attributes {qux: "quux"} {
+      gpu.return
+    }
+    ```
+
+    The generic form illustrates the concept
+
+    ```mlir
+    "gpu.func"(%arg: index) {sym_name: "foo", kernel, qux: "quux"} ({
+    ^bb0(%arg0: index, %workgroup: memref<32xf32, 3>,
+         %private: memref<1xf32, 5>):
+      "gpu.return"() : () -> ()
+    }) : (index) -> ()
+    ```
+
+    Note the non-default memory spaces used in memref types in memory
+    attribution.
+  }];
+
+  let regions = (region AnyRegion:$body);
+
+  let skipDefaultBuilders = 1;
+
+  let builders = [
+    OpBuilder<"Builder *builder, OperationState &result, StringRef name, "
+              "FunctionType type, ArrayRef<Type> workgroupAttributions, "
+              "ArrayRef<Type> privateAttributions, "
+              "ArrayRef<NamedAttribute> attrs">
+  ];
+
+  let extraClassDeclaration = [{
+    /// Returns `true` if the GPU function defined by this Op is a kernel, i.e.
+    /// it is intended to be launched from host.
+    bool isKernel() {
+      return getAttrOfType<UnitAttr>(GPUDialect::getKernelFuncAttrName()) !=
+             nullptr;
+    }
+
+    /// Returns the type of the function this Op defines.
+    FunctionType getType() {
+      return getTypeAttr().getValue().cast<FunctionType>();
+    }
+
+    /// Returns the number of buffers located in the workgroup memory.
+    unsigned getNumWorkgroupAttributions() {
+      return getAttrOfType<IntegerAttr>(getNumWorkgroupAttributionsAttrName())
+          .getInt();
+    }
+
+    /// Returns a list of block arguments that correspond to buffers located in
+    /// the workgroup memory
+    ArrayRef<BlockArgument *> getWorkgroupAttributions() {
+      auto begin =
+          std::next(getBody().front().args_begin(), getType().getNumInputs());
+      auto end = std::next(begin, getNumWorkgroupAttributions());
+      return {begin, end};
+    }
+
+    /// Returns a list of block arguments that correspond to buffers located in
+    /// the private memory.
+    ArrayRef<BlockArgument *> getPrivateAttributions() {
+      auto begin =
+          std::next(getBody().front().args_begin(),
+                    getType().getNumInputs() + getNumWorkgroupAttributions());
+      return {begin, getBody().front().args_end()};
+    }
+
+    /// Returns the name of the attribute containing the number of buffers
+    /// located in the workgroup memory.
+    static StringRef getNumWorkgroupAttributionsAttrName() {
+      return "workgroup_attibutions";
+    }
+
+    // FunctionLike trait needs access to the functions below.
+    friend class OpTrait::FunctionLike<GPUFuncOp>;
+
+    /// Hooks for the input/output type enumeration in FunctionLike .
+    unsigned getNumFuncArguments() { return getType().getNumInputs(); }
+    unsigned getNumFuncResults() { return getType().getNumResults(); }
+
+    /// Returns the keywords used in the custom syntax for this Op.
+    static StringRef getWorkgroupKeyword() { return "workgroup"; }
+    static StringRef getPrivateKeyword() { return "private"; }
+    static StringRef getKernelKeyword() { return "kernel"; }
+
+    /// Hook for FunctionLike verifier.
+    LogicalResult verifyType();
+
+    /// Verifies the body of the function.
+    LogicalResult verifyBody();
+  }];
+
+  // let verifier = [{ return ::verifFuncOpy(*this); }];
+  let printer = [{ printGPUFuncOp(p, *this); }];
+  let parser = [{ return parseGPUFuncOp(parser, result); }];
+}
+
+def GPU_LaunchOp : GPU_Op<"launch", [IsolatedFromAbove]>,
+    Arguments<(ins Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
+               Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
+               Variadic<AnyType>:$operands)>,
+    Results<(outs)> {
+  let summary = "GPU kernel launch operation";
+
+  let description = [{
+    Launch a kernel on the specified grid of thread blocks. The body of the
+    kernel is defined by the single region that this operation contains. The
+    operation takes at least six operands, with first three operands being grid
+    sizes along x,y,z dimensions, the following three arguments being block
+    sizes along x,y,z dimension, and the remaining operands are arguments of the
+    kernel. When a lower-dimensional kernel is required, unused sizes must be
+    explicitly set to `1`.
+
+    The body region has at least _twelve_ arguments, grouped as follows:
+
+    -   three arguments that contain block identifiers along x,y,z dimensions;
+    -   three arguments that contain thread identifiers along x,y,z dimensions;
+    -   operands of the `gpu.launch` operation as is, including six leading
+        operands for grid and block sizes.
+
+    Operations inside the body region, and any operations in the nested regions,
+    are _not_ allowed to use values defined outside the _body_ region, as if
+    this region was a function. If necessary, values must be passed as kernel
+    arguments into the body region. Nested regions inside the kernel body are
+    allowed to use values defined in their ancestor regions as long as they
+    don't cross the kernel body region boundary.
+
+    Syntax:
+
+    ``` {.ebnf}
+    operation ::= `gpu.launch` `block` `(` ssa-id-list `)` `in` ssa-reassignment
+                             `threads` `(` ssa-id-list `)` `in` ssa-reassignment
+                               (`args` ssa-reassignment `:` type-list)?
+                               region attr-dict?
+    ssa-reassignment ::= `(` ssa-id `=` ssa-use (`,` ssa-id `=` ssa-use)* `)`
+    ```
+
+    Example:
+
+    ```mlir {.mlir}
+    gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %0, %sz_by = %1, %sz_bz = %2)
+               threads(%tx, %ty, %tz) in (%sz_tx = %3, %sz_ty = %4, %sz_tz = %5)
+               args(%arg0 = %6, %arg1 = 7) : f32, memref<?xf32, 1> {
+      // Block and thread identifiers, as well as block/grid sizes are
+      // immediately usable inside body region.
+      "some_op"(%bx, %tx) : (index, index) -> ()
+      %42 = load %arg1[%bx] : memref<?xf32, 1>
+    }
+
+    // Generic syntax explains how the pretty syntax maps to the IR structure.
+    "gpu.launch"(%cst, %cst, %c1,  // Grid sizes.
+                        %cst, %c1, %c1,   // Block sizes.
+                        %arg0, %arg1)     // Actual arguments.
+        {/*attributes*/}
+        // All sizes and identifiers have "index" size.
+        : (index, index, index, index, index, index, f32, memref<?xf32, 1>)
+            -> () {
+    // The operation passes block and thread identifiers, followed by grid and
+    // block sizes, followed by actual arguments to the entry block of the
+    // region.
+    ^bb0(%bx : index, %by : index, %bz : index,
+         %tx : index, %ty : index, %tz : index,
+         %num_bx : index, %num_by : index, %num_bz : index,
+         %num_tx : index, %num_ty : index, %num_tz : index,
+         %arg0 : f32, %arg1 : memref<?xf32, 1>):
+      "some_op"(%bx, %tx) : (index, index) -> ()
+      %3 = "std.load"(%arg1, %bx) : (memref<?xf32, 1>, index) -> f32
+    }
+    ```
+
+    Rationale: using operation/block arguments gives analyses a clear way of
+    understanding that a value has additional semantics (e.g., we will need to
+    know what value corresponds to threadIdx.x for coalescing). We can recover
+    these properties by analyzing the operations producing values, but it is
+    easier just to have that information by construction.
+  }];
+
+  let regions = (region AnyRegion:$body);
+
+  let skipDefaultBuilders = 1;
+
+  let builders = [
+    OpBuilder<"Builder *builder, OperationState &result, Value *gridSizeX,"
+              "Value *gridSizeY, Value *gridSizeZ, Value *blockSizeX,"
+              "Value *blockSizeY, Value *blockSizeZ,"
+              "ValueRange operands">
+  ];
+
+  let hasCanonicalizer = 1;
+
+  let extraClassDeclaration = [{
+    /// Get the SSA values corresponding to kernel block identifiers.
+    KernelDim3 getBlockIds();
+    /// Get the SSA values corresponding to kernel thread identifiers.
+    KernelDim3 getThreadIds();
+    /// Get the SSA values corresponding to kernel grid size.
+    KernelDim3 getGridSize();
+    /// Get the SSA values corresponding to kernel block size.
+    KernelDim3 getBlockSize();
+    /// Get the operand values passed as kernel arguments.
+    operand_range getKernelOperandValues();
+    /// Get the operand types passed as kernel arguments.
+    operand_type_range getKernelOperandTypes();
+
+    /// Get the SSA values passed as operands to specify the grid size.
+    KernelDim3 getGridSizeOperandValues();
+    /// Get the SSA values passed as operands to specify the block size.
+    KernelDim3 getBlockSizeOperandValues();
+
+    /// Get the SSA values of the kernel arguments.
+    llvm::iterator_range<Block::args_iterator> getKernelArguments();
+
+    /// Erase the `index`-th kernel argument.  Both the entry block argument and
+    /// the operand will be dropped.  The block argument must not have any uses.
+    void eraseKernelArgument(unsigned index);
+
+    static StringRef getBlocksKeyword() { return "blocks"; }
+    static StringRef getThreadsKeyword() { return "threads"; }
+    static StringRef getArgsKeyword() { return "args"; }
+
+    /// The number of launch configuration operands, placed at the leading
+    /// positions of the operand list.
+    static constexpr unsigned kNumConfigOperands = 6;
+
+    /// The number of region attributes containing the launch configuration,
+    /// placed in the leading positions of the argument list.
+    static constexpr unsigned kNumConfigRegionAttributes = 12;
+  }];
+
+  let parser = [{ return parseLaunchOp(parser, result); }];
+  let printer = [{ printLaunchOp(p, *this); }];
+  let verifier = [{ return ::verify(*this); }];
+}
+
 def GPU_ReturnOp : GPU_Op<"return", [Terminator]>, Arguments<(ins)>,
     Results<(outs)> {
   let summary = "Terminator for GPU launch regions.";
diff --git a/third_party/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/third_party/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 573542ba838..166cc5c4f9f 100644
--- a/third_party/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/third_party/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -38,7 +38,7 @@ class LLVM_Builder<string builder> {
 
 def LLVM_OneResultOpBuilder : OpBuilder<
   "Builder *, OperationState &result, Type resultType, "
-  "ArrayRef<Value *> operands, ArrayRef<NamedAttribute> attributes = {}",
+  "ValueRange operands, ArrayRef<NamedAttribute> attributes = {}",
   [{
     if (resultType) result.addTypes(resultType);
     result.addOperands(operands);
@@ -48,7 +48,7 @@ def LLVM_OneResultOpBuilder : OpBuilder<
   }]>;
 
 def LLVM_ZeroResultOpBuilder : OpBuilder<
-  "Builder *, OperationState &result, ArrayRef<Value *> operands, "
+  "Builder *, OperationState &result, ValueRange operands, "
   "ArrayRef<NamedAttribute> attributes = {}",
   [{
     result.addOperands(operands);
@@ -71,7 +71,7 @@ class LLVM_OneResultOp<string mnemonic, list<OpTrait> traits = []> :
 // to indicate no result.
 def LLVM_VoidResultTypeOpBuilder : OpBuilder<
   "Builder *builder, OperationState &result, Type resultType, "
-  "ArrayRef<Value *> operands, ArrayRef<NamedAttribute> attributes = {}",
+  "ValueRange operands, ArrayRef<NamedAttribute> attributes = {}",
   [{
     auto llvmType = resultType.dyn_cast<LLVM::LLVMType>(); (void)llvmType;
     assert(llvmType && "result must be an LLVM type");
@@ -94,9 +94,9 @@ class LLVM_TerminatorOp<string mnemonic, list<OpTrait> traits = []> :
   let builders = [
     OpBuilder<
       "Builder *, OperationState &result, "
-      "ArrayRef<Value *> properOperands, "
+      "ValueRange properOperands, "
       "ArrayRef<Block *> destinations, "
-      "ArrayRef<ArrayRef<Value *>> operands, "
+      "ArrayRef<ValueRange> operands, "
       "ArrayRef<NamedAttribute> attributes = {}",
       [{
         result.addOperands(properOperands);
@@ -110,11 +110,11 @@ class LLVM_TerminatorOp<string mnemonic, list<OpTrait> traits = []> :
     >,
     OpBuilder<
       "Builder *builder, OperationState &result, "
-      "ArrayRef<Value *> properOperands, "
+      "ValueRange properOperands, "
       "ArrayRef<Block *> destinations, "
       "ArrayRef<NamedAttribute> attributes = {}",
     [{
-        SmallVector<ArrayRef<Value *>, 2> operands(destinations.size(), {});
+        SmallVector<ValueRange, 2> operands(destinations.size(), {});
         build(builder, result, properOperands,
             destinations, operands, attributes);
       }]
@@ -509,8 +509,9 @@ def LLVM_AddressOfOp
 
     OpBuilder<"Builder *builder, OperationState &result, GlobalOp global, "
               "ArrayRef<NamedAttribute> attrs = {}", [{
-      build(builder, result, global.getType().getPointerTo(), global.sym_name(),
-            attrs);}]>
+      build(builder, result,
+            global.getType().getPointerTo(global.addr_space().getZExtValue()),
+            global.sym_name(), attrs);}]>
   ];
 
   let extraClassDeclaration = [{
@@ -552,7 +553,8 @@ def LLVM_GlobalOp
   let builders = [
     OpBuilder<"Builder *builder, OperationState &result, LLVMType type, "
               "bool isConstant, Linkage linkage, StringRef name, "
-              "Attribute value, ArrayRef<NamedAttribute> attrs = {}">
+              "Attribute value, unsigned addrSpace = 0, "
+              "ArrayRef<NamedAttribute> attrs = {}">
   ];
 
   let extraClassDeclaration = [{
@@ -584,9 +586,7 @@ def LLVM_GlobalOp
 }
 
 def LLVM_LLVMFuncOp
-    : LLVM_ZeroResultOp<"func",
-                        [NativeOpTrait<"IsIsolatedFromAbove">,
-                         NativeOpTrait<"FunctionLike">, Symbol]>,
+    : LLVM_ZeroResultOp<"func", [IsolatedFromAbove, FunctionLike, Symbol]>,
       Arguments<(ins DefaultValuedAttr<Linkage,
                                        "Linkage::External">:$linkage)> {
   let summary = "LLVM dialect function, has wrapped LLVM IR function type";
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgBase.td b/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgBase.td
index ca1b58d7961..edc81250aae 100644
--- a/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgBase.td
+++ b/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgBase.td
@@ -27,29 +27,96 @@ include "mlir/IR/OpBase.td"
 def Linalg_Dialect : Dialect {
   let name = "linalg";
   let description = [{
-    The Linalg dialect groups together a set of types and operations that are
-    useful to implement a "linear algebra"-like abstraction where ops can lower
-    to scalar load/store and operations or to more general library calls.
+    The `linalg` dialect groups together a set of types, operations and
+    transformations that are useful to implement a structured abstraction where
+    ops can lower to scalar load/store and operations or to more general library
+    calls.
 
-    The Linalg dialect adopts a convention that is similar to BLAS when
+    The `linalg` dialect manipulates the following types and operations:
+
+    ### Core data types and special ops.
+
+    The following abstractions are used by the `linalg` dialect:
+
+    #### Views
+    The current implementation uses the strided memref abstraction. In the
+    future other abstractions than strided memref will be used.
+
+    #### `!linalg.range`
+    This data type is currently just a triple (`min`,`max`, `step`) that does
+    not pass function boundaries.
+
+    #### `linalg.yield`
+    This op is used as a terminator within the appropriate `linalg` regions.
+
+    In the future, richer `view` and `range` representations are expected, in
+    particular to represent sparse traversals.
+
+    ### Metadata Ops
+    A set of ops that manipulate metadata but do not move memory. These ops take
+    `view` operands + extra attributes and return new `view`s. The returned
+    `view`s generally alias the operand `view`. At the moment the existing ops
+    are:
+
+        * `std.view`,
+        * `std.subview`,
+        * `linalg.range`,
+        * `linalg.slice`,
+        * `linalg.transpose`.
+
+    Future ops are added on a per-need basis but should include:
+
+        * `linalg.reshape`,
+        * `linalg.tile`,
+        * `linalg.intersection`,
+        * `linalg.convex_union`,
+        * `linalg.difference` (would need to work on a list of views).
+
+    ### Payload Ops
+    A set of payload carrying operations that implement the [structured ops](
+    https://docs.google.com/presentation/d/1P-j1GrH6Q5gLBjao0afQ-GfvcAeF-QU4GXXeSy0eJ9I/edit#slide=id.p
+    )
+    abstraction on buffers. `linalg` has `2` generic operations `linalg.generic`
+    and `linalg.indexed_generic` for expressing custom operations. This is
+    subject to further evolution as transformations and analyses continue to be
+    developed.
+
+    Additionally, `linalg` provides some common named operations:
+
+        * `linalg.copy`,
+        * `linalg.fill`,
+        * `linalg.dot`,
+        * `linalg.matmul`,
+        * `linalg.conv`.
+
+    Future ops are added on a per-need basis but should include:
+
+        * `linalg.pad`.
+
+    In an ideal world, all the named ops would be automatically generated from
+    a description in terms of only the `2` generic ops. Unfortunately we do not
+    have such support yet (contributions are most welcome).
+
+    ### Convention for external library interop
+    The `linalg` dialect adopts a convention that is similar to `BLAS` when
     offloading operations to fast library implementations: pass a non-owning
     pointer to input and output data with additional metadata. This convention
-    is also found in libraries such as MKL, OpenBLAS, cuBLAS, cuDNN, etc.. and
-    more generally at interface points across language boundaries (e.g. C++ /
-    Python).
+    is also found in libraries such as `MKL`, `OpenBLAS`, `BLIS`, `cuBLAS`,
+    `cuDNN`, etc.. and more generally at interface points across language
+    boundaries (e.g. C++ / Python).
 
-    Generally, Linalg passes non-owning pointers to strided memref data
+    Generally, `linalg` passes non-owning pointers to strided memref data
     structures to precompiled library calls linked externally. The name `view`
-    is used interchangeably in Linalg to signify strided memref.
+    is used interchangeably in `linalg` to signify strided memref discussed at
+    length in the [strided memref RFC](
+    https://groups.google.com/a/tensorflow.org/g/mlir/c/MaL8m2nXuio/m/a_v07o9yBwAJ).
   }];
 }
 
-// Whether a type is a BufferType.
-def LinalgIsBufferTypePred : CPred<"$_self.isa<BufferType>()">;
-def Buffer : Type<LinalgIsBufferTypePred, "buffer">;
-
 // Whether a type is a RangeType.
 def LinalgIsRangeTypePred : CPred<"$_self.isa<RangeType>()">;
 def Range : Type<LinalgIsRangeTypePred, "range">;
 
+// TODO(ntv): inject the doc for LinalgLibraryOps.td here.
+
 #endif // LINALG_BASE
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgLibraryOps.td b/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgLibraryOps.td
index afaf039ffd5..14d782fb6d6 100644
--- a/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgLibraryOps.td
+++ b/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgLibraryOps.td
@@ -137,7 +137,7 @@ def LinalgLibraryInterface : OpInterface<"LinalgOp"> {
       }],
       "Operation *", "create",
       (ins "OpBuilder &":$builder, "Location":$loc,
-           "ArrayRef<Value *>":$operands,
+           "ValueRange":$operands,
            "ArrayRef<NamedAttribute>":$attributes), [{
         return builder.create<ConcreteOp>(loc, ArrayRef<Type>{}, operands,
                                           attributes);
@@ -151,7 +151,7 @@ def LinalgLibraryInterface : OpInterface<"LinalgOp"> {
         is used to abstract away the optional underlying region creation.
       }],
       "Operation *", "clone",
-      (ins "OpBuilder &":$b, "Location":$loc, "ArrayRef<Value *>":$operands), [{
+      (ins "OpBuilder &":$b, "Location":$loc, "ValueRange":$operands), [{
         BlockAndValueMapping map;
         unsigned numRegions = op.getOperation()->getNumRegions();
         Operation *res = create(b, loc, operands, op.getAttrs());
@@ -318,6 +318,7 @@ def ConvOp : LinalgLibrary_Op<"conv", [NInputsAndOutputs<2, 1>]> {
                        q]
     ```
   }];
+
   // TODO(ntv) padding.
   // Following the TF source of truth above, strides and dilations are integer
   // attributes of the same rank as the number of window dimensions.
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index 414a5869f72..64e0bb089b7 100644
--- a/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/third_party/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -39,18 +39,21 @@ class Linalg_Op<string mnemonic, list<OpTrait> traits = []> :
   let parser = [{ return ::parse$cppClass(parser, result); }];
 }
 
-def RangeOp :
+def Linalg_RangeOp :
     Linalg_Op<"range", [NoSideEffect]>,
     Arguments<(ins Index:$min, Index:$max, Index:$step)>,
     Results<(outs Range)> {
-  let summary = "Create a range type value, used to create views";
+  let summary = "Create a `range` type value, used to create `view`s";
   let description = [{
-    The `linalg.range` op creates a linalg.range from 3 values of type `index`
-    that represent the min, max and step values of the range.
+    The `linalg.range` op creates a `!linalg.range` from 3 values of type
+    `index` that represent the min, max and step values of the `range`. This
+    type does not pass function boundaries at the moment.
 
     Example:
 
+    ```mlir
       %3 = linalg.range %0:%1:%2 : !linalg.range
+    ````
   }];
   let builders = [OpBuilder<
     "Builder *builder, OperationState &result, Value *min, Value *max, "
@@ -64,45 +67,53 @@ def RangeOp :
   let verifier = ?;
 }
 
-def SliceOp : Linalg_Op<"slice", [NoSideEffect]>,
+def Linalg_SliceOp : Linalg_Op<"slice", [NoSideEffect]>,
     Arguments<(ins AnyStridedMemRef:$view, Variadic<AnyTypeOf<[Range, Index]>>:$indexings)>,
     Results<(outs AnyStridedMemRef)> {
-  let summary = "Produce a linalg.view which is a subview of a base view.";
+  let summary = "Produce a rank-reduced `subview` of a base `view`.";
   let description = [{
-    The "linalg.slice" op produces a linalg.view which is a subview of a given
-    base view. This allows defining a subregion within the underlying buffer to
-    operate on only a subset of the buffer.
+    The `linalg.slice` op allows defining a subregion of a smaller rank than the
+    operand `view` within the underlying buffer.
 
-    A "linalg.slice" op takes a view and a variadic number of indexings and
-    produces a linalg.view of the same elemental type. An indexing is either:
-      1. a linalg.range, in which case it does not reduce the rank of the parent
-         view.
-      2. an index, in which case it reduces the rank of the parent view by one.
+    A `linalg.slice` op takes a view and a variadic number of indexings and
+    produces a `view` of the same elemental type. An indexing is either:
+      1. a `linalg.range`, in which case it does not reduce the rank of the
+         parent `view` along the corresponding dimension.
+      2. an `index`, in which case it reduces the rank of the parent view by
+         one.
 
-    If an indexing extends past the size of the view, the slice operation
-    automatically truncates it to be within the bounds.
+    If an indexing extends past the size of the `view`, this is undefined
+    behavior. Ideally the `linalg.slice` operation would automatically truncate
+    it to be within bounds but there are tradeoffs involved now that `std.view`
+    is a standard op.
 
     Examples:
 
-      1. rank-preserving slice:
+      1. rank-preserving `slice`:
 
-        %4 = linalg.slice %0[%1, %2] : memref<?x?xf32, stride_specification>,
-          !linalg.range, !linalg.range, memref<?x?xf32, stride_specification>
+      ```mlir
+        %4 = linalg.slice %0[%1, %2] : memref<?x?xf32, stride_spec>,
+          !linalg.range, !linalg.range, memref<?x?xf32, stride_spec>
+       ```
 
-      2. rank-reducing slice (from 2-D to 1-D):
+      2. rank-reducing `slice` (from 2-D to 1-D):
 
-        %4 = linalg.slice %0[%1, %2] : memref<?x?xf32, stride_specification>,
-          index, !linalg.range, memref<?x?xf32, stride_specification>
+      ```mlir
+        %4 = linalg.slice %0[%1, %2] : memref<?x?xf32, stride_spec>,
+          index, !linalg.range, memref<?x?xf32, stride_spec>
+      ```
 
-      3. rank-reducing slice (from 2-D to 0-D):
+      3. rank-reducing `slice` (from 2-D to 0-D):
 
-        %4 = linalg.slice %0[%1, %2] : memref<?x?xf32, stride_specification>,
-          index, index, memref<?x?xf32, stride_specification>
+      ```mlir
+        %4 = linalg.slice %0[%1, %2] : memref<?x?xf32, stride_spec>,
+          index, index, memref<?x?xf32, stride_spec>
+      ```
   }];
 
   let builders = [OpBuilder<
     "Builder *b, OperationState &result, Value *base, "
-    "ArrayRef<Value *> indexings">];
+    "ValueRange indexings">];
 
   let extraClassDeclaration = [{
     enum { FirstIndexingOperand = 1 };
@@ -126,18 +137,20 @@ def SliceOp : Linalg_Op<"slice", [NoSideEffect]>,
   }];
 }
 
-def TransposeOp : Linalg_Op<"transpose", [NoSideEffect]>,
+def Linalg_TransposeOp : Linalg_Op<"transpose", [NoSideEffect]>,
     Arguments<(ins AnyStridedMemRef:$view, AffineMapAttr:$permutation)>,
     Results<(outs AnyStridedMemRef)> {
   let summary = "transpose operation produces a new strided memref (metadata-only)";
   let description = [{
-    The "linalg.transpose" op produces a strided memref whose sizes and strides
-    are a permutation of the original. This is a pure metadata transformation.
+    The `linalg.transpose` op produces a strided memref whose sizes and strides
+    are a permutation of the original `view`. This is a pure metadata
+    transformation.
 
     Example:
 
-       %1 = linalg.transpose %0 (i, j) -> (j, i) :
-        memref<?x?xf32, stride_specification>
+    ```mlir
+       %1 = linalg.transpose %0 (i, j) -> (j, i) : memref<?x?xf32, stride_spec>
+    ```
   }];
 
   let builders = [OpBuilder<
@@ -158,16 +171,19 @@ def TransposeOp : Linalg_Op<"transpose", [NoSideEffect]>,
   }];
 }
 
-def YieldOp : Linalg_Op<"yield", [NativeOpTrait<"IsTerminator">]>,
+def Linalg_YieldOp : Linalg_Op<"yield", [NativeOpTrait<"IsTerminator">]>,
     Arguments<(ins Variadic<AnyType>:$values)> {
   let summary = "Linalg yield operation";
   let description = [{
-    "linalg.yield" is a special terminator operation for blocks inside regions
-    in linalg ops. It returns values to the immediately enclosing linalg op.
+    `linalg.yield` is a special terminator operation for blocks inside regions
+    in `linalg` generic ops. It returns values to the immediately enclosing
+    `linalg` generic op.
 
     Example:
 
+    ```mlir
        linalg.yield %f0, %f1 : f32, f32
+    ```
   }];
 }
 
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td b/third_party/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td
index 8bc0eaf2097..f558fa5da48 100644
--- a/third_party/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td
+++ b/third_party/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td
@@ -57,9 +57,13 @@ class TileAndFuseLinalgOp<
 // In the future, tile sizes should be derived from op properties + machine
 // description but we do not need to wait on this to start having useful
 // patterns.
-class TileLinalgOp<list<int> sizes, string value> : NativeCodeCall<
+// `permutation` is an optional parameter to specify the ordering of the
+// tiled loops. If provided, it must be a list of integers with the same number
+// of elements as `sizes`.
+class TileLinalgOp<list<int> sizes, string value, list<int> permutation=[]> : NativeCodeCall<
   "if (failed(tileLinalgOpAndSetMarker($_builder, $0, {" #
-  StrJoinInt<sizes>.result # "}, \"" # value # "\")))" #
+  StrJoinInt<sizes>.result # "}, \"" # value # "\", {" #
+  StrJoinInt<permutation>.result # "})))" #
   "  return matchFailure();">;
 
 //===----------------------------------------------------------------------===//
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransforms.h b/third_party/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransforms.h
index 966b8f93135..89615e113c7 100644
--- a/third_party/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransforms.h
+++ b/third_party/mlir/include/mlir/Dialect/Linalg/Transforms/LinalgTransforms.h
@@ -58,11 +58,20 @@ bool isProducedByOpOfType(Operation *consumerOp, Value *consumedView) {
 // success.
 ////////////////////////////////////////////////////////////////////////////////
 
-// Tiles `op` by `sizes` and sets the attribute `kLinalgTransformMarker` to
-// `linalgMarker`.
+/// Tiles `op` by `sizes` permuting the looops according to `permutation`
+/// and sets the attribute `kLinalgTransformMarker` to `linalgMarker`.
+/// The permutation is expressed as a list of integers that specify
+/// the new ordering of the loop nest. The length of `permutation`
+/// must be equal to the length of `tileSizes`.
+/// E.g. the permutation `(i,j,k) -> (j,k,i)` will be expressed with
+/// `permutation = [1,2,0]`. All values in `permutation` must be
+/// integers, in the range 0..`tileSizes.size()` without duplications
+/// (i.e. `[1,1,2]` is an invalid permutation). An empty list
+/// states for the identity permutation.
 LogicalResult tileLinalgOpAndSetMarker(PatternRewriter &rewriter, Operation *op,
                                        ArrayRef<int64_t> sizes,
-                                       StringRef linalgMarker);
+                                       StringRef linalgMarker,
+                                       ArrayRef<unsigned> permutation);
 
 // Tiles `op` by `sizes`, fuses the producers of `operandIndicesToFuse` and sets
 // the attribute `kLinalgTransformMarker` to `linalgMarker`.
diff --git a/third_party/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/third_party/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
index 91c7082b264..8dc78458c87 100644
--- a/third_party/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/third_party/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -134,23 +134,43 @@ struct TiledLinalgOp {
 };
 
 /// Performs standalone tiling of a single LinalgOp by `tileSizes`.
-/// Returns a struct containing the tiled loops and the cloned op if successful,
-/// llvm::None otherwise.
+/// and permute the loop nest according to `permutation`
+/// The permutation is expressed as a list of integers that specify
+/// the new ordering of the loop nest. The length of `permutation`
+/// must be equal to the length of `tileSizes`.
+/// E.g. the permutation `(i,j,k) -> (j,k,i)` will be expressed with
+/// `permutation = [1,2,0]`. All values in `permutation` must be
+/// integers, in the range 0..`tileSizes.size()` without duplications
+/// (i.e. `[1,1,2]` is an invalid permutation). An empty list
+/// states for the identity permutation.
+/// Returns a struct containing the tiled loops in the specified order
+/// and the cloned op if successful, llvm::None otherwise.
 /// When non-null, the optional pointer `folder` is used to call into the
 /// `createAndFold` builder method. If `folder` is null, the regular `create`
 /// method is called.
 llvm::Optional<TiledLinalgOp> tileLinalgOp(OpBuilder &b, LinalgOp op,
                                            ArrayRef<Value *> tileSizes,
+                                           ArrayRef<unsigned> permutation = {},
                                            OperationFolder *folder = nullptr);
 
 /// Performs standalone tiling of a single LinalgOp by constant `tileSizes`.
-/// Returns a struct containing the tiled loops and the cloned op if successful,
-/// llvm::None otherwise.
+/// and permute the loop nest according to `permutation`
+/// The permutation is expressed as a list of integers that specify
+/// the new ordering of the loop nest. The length of `permutation`
+/// must be equal to the length of `tileSizes`.
+/// E.g. the permutation `(i,j,k) -> (j,k,i)` will be expressed with
+/// `permutation = [1,2,0]`. All values in `permutation` must be
+/// integers, in the range 0..`tileSizes.size()` without duplications
+/// (i.e. `[1,1,2]` is an invalid permutation). An empty list
+/// states for the identity permutation.
+/// Returns a struct containing the tiled loops in the specified order
+/// and the cloned op if successful, llvm::None otherwise.
 /// When non-null, the optional pointer `folder` is used to call into the
 /// `createAndFold` builder method. If `folder` is null, the regular `create`
 /// method is called.
 llvm::Optional<TiledLinalgOp> tileLinalgOp(OpBuilder &b, LinalgOp op,
                                            ArrayRef<int64_t> tileSizes,
+                                           ArrayRef<unsigned> permutation = {},
                                            OperationFolder *folder = nullptr);
 
 template <typename... Args>
diff --git a/third_party/mlir/include/mlir/Dialect/QuantOps/QuantTypes.h b/third_party/mlir/include/mlir/Dialect/QuantOps/QuantTypes.h
index b705026ac91..a681d16c3ee 100644
--- a/third_party/mlir/include/mlir/Dialect/QuantOps/QuantTypes.h
+++ b/third_party/mlir/include/mlir/Dialect/QuantOps/QuantTypes.h
@@ -75,10 +75,10 @@ public:
   static constexpr unsigned MaxStorageBits = 32;
 
   static LogicalResult
-  verifyConstructionInvariants(llvm::Optional<Location> loc,
-                               MLIRContext *context, unsigned flags,
-                               Type storageType, Type expressedType,
-                               int64_t storageTypeMin, int64_t storageTypeMax);
+  verifyConstructionInvariants(Optional<Location> loc, MLIRContext *context,
+                               unsigned flags, Type storageType,
+                               Type expressedType, int64_t storageTypeMin,
+                               int64_t storageTypeMax);
 
   /// Support method to enable LLVM-style type casting.
   static bool classof(Type type) {
@@ -238,10 +238,10 @@ public:
 
   /// Verifies construction invariants and issues errors/warnings.
   static LogicalResult
-  verifyConstructionInvariants(llvm::Optional<Location> loc,
-                               MLIRContext *context, unsigned flags,
-                               Type storageType, Type expressedType,
-                               int64_t storageTypeMin, int64_t storageTypeMax);
+  verifyConstructionInvariants(Optional<Location> loc, MLIRContext *context,
+                               unsigned flags, Type storageType,
+                               Type expressedType, int64_t storageTypeMin,
+                               int64_t storageTypeMax);
 };
 
 /// Represents a family of uniform, quantized types.
@@ -298,7 +298,7 @@ public:
 
   /// Verifies construction invariants and issues errors/warnings.
   static LogicalResult verifyConstructionInvariants(
-      llvm::Optional<Location> loc, MLIRContext *context, unsigned flags,
+      Optional<Location> loc, MLIRContext *context, unsigned flags,
       Type storageType, Type expressedType, double scale, int64_t zeroPoint,
       int64_t storageTypeMin, int64_t storageTypeMax);
 
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVAtomicOps.td b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVAtomicOps.td
new file mode 100644
index 00000000000..7042bf2cd3e
--- /dev/null
+++ b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVAtomicOps.td
@@ -0,0 +1,74 @@
+//===-- SPIRVAtomicOps.td - MLIR SPIR-V Atomic Ops ---------*- tablegen -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file contains atomic ops for the SPIR-V dialect. It corresponds to
+// "3.32.18. Atomic Instructions" of the SPIR-V specification.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPIRV_ATOMIC_OPS
+#define SPIRV_ATOMIC_OPS
+
+// -----
+
+def SPV_AtomicCompareExchangeWeakOp : SPV_Op<"AtomicCompareExchangeWeak", []> {
+  let summary = "Deprecated (use OpAtomicCompareExchange).";
+
+  let description = [{
+    Has the same semantics as OpAtomicCompareExchange.
+
+    Memory must be a valid memory Scope.
+
+    ### Custom assembly form
+
+    ``` {.ebnf}
+    scope ::= `"CrossDevice"` | `"Device"` | `"Workgroup"` | ...
+
+    memory-semantics ::= `"None"` | `"Acquire"` | "Release"` | ...
+
+    atomic-compare-exchange-weak-op ::=
+        `spv.AtomicCompareExchangeWeak` scope memory-semantics memory-semantics
+                                        ssa-use `,` ssa-use `,` ssa-use
+                                        `:` spv-pointer-type
+    ```
+
+    For example:
+
+    ```
+    %0 = spv.AtomicCompareExchangeWeak "Workgroup" "Acquire" "None"
+                                       %pointer, %value, %comparator
+                                       : !spv.ptr<i32, WorkGroup>
+    ```
+  }];
+
+  let arguments = (ins
+    SPV_AnyPtr:$pointer,
+    SPV_ScopeAttr:$memory_scope,
+    SPV_MemorySemanticsAttr:$equal_semantics,
+    SPV_MemorySemanticsAttr:$unequal_semantics,
+    SPV_Integer:$value,
+    SPV_Integer:$comparator
+  );
+
+  let results = (outs
+    SPV_Integer:$result
+  );
+}
+
+// -----
+
+#endif // SPIRV_ATOMIC_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td
index 2ee8f3bdd43..62095a518e9 100644
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td
+++ b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td
@@ -62,20 +62,60 @@ def SPV_Dialect : Dialect {
 // https://github.com/KhronosGroup/SPIRV-Registry has the full list.
 def SPV_KHR_16bit_storage                : StrEnumAttrCase<"SPV_KHR_16bit_storage">;
 def SPV_KHR_8bit_storage                 : StrEnumAttrCase<"SPV_KHR_8bit_storage">;
+def SPV_KHR_device_group                 : StrEnumAttrCase<"SPV_KHR_device_group">;
 def SPV_KHR_float_controls               : StrEnumAttrCase<"SPV_KHR_float_controls">;
+def SPV_KHR_physical_storage_buffer      : StrEnumAttrCase<"SPV_KHR_physical_storage_buffer">;
+def SPV_KHR_multiview                    : StrEnumAttrCase<"SPV_KHR_multiview">;
+def SPV_KHR_no_integer_wrap_decoration   : StrEnumAttrCase<"SPV_KHR_no_integer_wrap_decoration">;
+def SPV_KHR_post_depth_coverage          : StrEnumAttrCase<"SPV_KHR_post_depth_coverage">;
 def SPV_KHR_shader_atomic_counter_ops    : StrEnumAttrCase<"SPV_KHR_shader_atomic_counter_ops">;
 def SPV_KHR_shader_ballot                : StrEnumAttrCase<"SPV_KHR_shader_ballot">;
+def SPV_KHR_shader_draw_parameters       : StrEnumAttrCase<"SPV_KHR_shader_draw_parameters">;
 def SPV_KHR_storage_buffer_storage_class : StrEnumAttrCase<"SPV_KHR_storage_buffer_storage_class">;
 def SPV_KHR_subgroup_vote                : StrEnumAttrCase<"SPV_KHR_subgroup_vote">;
 def SPV_KHR_variable_pointers            : StrEnumAttrCase<"SPV_KHR_variable_pointers">;
 def SPV_KHR_vulkan_memory_model          : StrEnumAttrCase<"SPV_KHR_vulkan_memory_model">;
 
+def SPV_EXT_fragment_fully_covered       : StrEnumAttrCase<"SPV_EXT_fragment_fully_covered">;
+def SPV_EXT_fragment_invocation_density  : StrEnumAttrCase<"SPV_EXT_fragment_invocation_density">;
+def SPV_EXT_fragment_shader_interlock    : StrEnumAttrCase<"SPV_EXT_fragment_shader_interlock">;
+def SPV_EXT_physical_storage_buffer      : StrEnumAttrCase<"SPV_EXT_physical_storage_buffer">;
+def SPV_EXT_shader_stencil_export        : StrEnumAttrCase<"SPV_EXT_shader_stencil_export">;
+
+def SPV_AMD_shader_explicit_vertex_parameter : StrEnumAttrCase<"SPV_AMD_shader_explicit_vertex_parameter">;
+
+def SPV_GOOGLE_user_type                 : StrEnumAttrCase<"SPV_GOOGLE_user_type">;
+
+def SPV_NV_compute_shader_derivatives    : StrEnumAttrCase<"SPV_NV_compute_shader_derivatives">;
+def SPV_NV_fragment_shader_barycentric   : StrEnumAttrCase<"SPV_NV_fragment_shader_barycentric">;
+def SPV_NV_geometry_shader_passthrough   : StrEnumAttrCase<"SPV_NV_geometry_shader_passthrough">;
+def SPV_NV_mesh_shader                   : StrEnumAttrCase<"SPV_NV_mesh_shader">;
+def SPV_NV_ray_tracing                   : StrEnumAttrCase<"SPV_NV_ray_tracing">;
+def SPV_NV_sample_mask_override_coverage : StrEnumAttrCase<"SPV_NV_sample_mask_override_coverage">;
+def SPV_NV_shader_sm_builtins            : StrEnumAttrCase<"SPV_NV_shader_sm_builtins">;
+def SPV_NV_shading_rate                  : StrEnumAttrCase<"SPV_NV_shading_rate">;
+def SPV_NV_stereo_view_rendering         : StrEnumAttrCase<"SPV_NV_stereo_view_rendering">;
+def SPV_NV_viewport_array2               : StrEnumAttrCase<"SPV_NV_viewport_array2">;
+
+def SPV_NVX_multiview_per_view_attributes : StrEnumAttrCase<"SPV_NVX_multiview_per_view_attributes">;
+
 def SPV_ExtensionAttr :
     StrEnumAttr<"Extension", "supported SPIR-V extensions", [
-      SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_float_controls,
+      SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_device_group,
+      SPV_KHR_float_controls, SPV_KHR_physical_storage_buffer, SPV_KHR_multiview,
+      SPV_KHR_no_integer_wrap_decoration, SPV_KHR_post_depth_coverage,
       SPV_KHR_shader_atomic_counter_ops, SPV_KHR_shader_ballot,
-      SPV_KHR_storage_buffer_storage_class, SPV_KHR_subgroup_vote,
-      SPV_KHR_variable_pointers, SPV_KHR_vulkan_memory_model
+      SPV_KHR_shader_draw_parameters, SPV_KHR_storage_buffer_storage_class,
+      SPV_KHR_subgroup_vote, SPV_KHR_variable_pointers,
+      SPV_KHR_vulkan_memory_model, SPV_EXT_fragment_fully_covered,
+      SPV_EXT_fragment_invocation_density, SPV_EXT_fragment_shader_interlock,
+      SPV_EXT_physical_storage_buffer, SPV_EXT_shader_stencil_export,
+      SPV_AMD_shader_explicit_vertex_parameter, SPV_GOOGLE_user_type,
+      SPV_NV_compute_shader_derivatives, SPV_NV_fragment_shader_barycentric,
+      SPV_NV_geometry_shader_passthrough, SPV_NV_mesh_shader, SPV_NV_ray_tracing,
+      SPV_NV_sample_mask_override_coverage, SPV_NV_shader_sm_builtins,
+      SPV_NV_shading_rate, SPV_NV_stereo_view_rendering,
+      SPV_NV_viewport_array2, SPV_NVX_multiview_per_view_attributes,
     ]> {
   let cppNamespace = "::mlir::spirv";
 }
@@ -991,128 +1031,130 @@ class SPV_OpCode<string name, int val> {
 
 // Begin opcode section. Generated from SPIR-V spec; DO NOT MODIFY!
 
-def SPV_OC_OpNop                    : I32EnumAttrCase<"OpNop", 0>;
-def SPV_OC_OpUndef                  : I32EnumAttrCase<"OpUndef", 1>;
-def SPV_OC_OpSourceContinued        : I32EnumAttrCase<"OpSourceContinued", 2>;
-def SPV_OC_OpSource                 : I32EnumAttrCase<"OpSource", 3>;
-def SPV_OC_OpSourceExtension        : I32EnumAttrCase<"OpSourceExtension", 4>;
-def SPV_OC_OpName                   : I32EnumAttrCase<"OpName", 5>;
-def SPV_OC_OpMemberName             : I32EnumAttrCase<"OpMemberName", 6>;
-def SPV_OC_OpString                 : I32EnumAttrCase<"OpString", 7>;
-def SPV_OC_OpExtension              : I32EnumAttrCase<"OpExtension", 10>;
-def SPV_OC_OpExtInstImport          : I32EnumAttrCase<"OpExtInstImport", 11>;
-def SPV_OC_OpExtInst                : I32EnumAttrCase<"OpExtInst", 12>;
-def SPV_OC_OpMemoryModel            : I32EnumAttrCase<"OpMemoryModel", 14>;
-def SPV_OC_OpEntryPoint             : I32EnumAttrCase<"OpEntryPoint", 15>;
-def SPV_OC_OpExecutionMode          : I32EnumAttrCase<"OpExecutionMode", 16>;
-def SPV_OC_OpCapability             : I32EnumAttrCase<"OpCapability", 17>;
-def SPV_OC_OpTypeVoid               : I32EnumAttrCase<"OpTypeVoid", 19>;
-def SPV_OC_OpTypeBool               : I32EnumAttrCase<"OpTypeBool", 20>;
-def SPV_OC_OpTypeInt                : I32EnumAttrCase<"OpTypeInt", 21>;
-def SPV_OC_OpTypeFloat              : I32EnumAttrCase<"OpTypeFloat", 22>;
-def SPV_OC_OpTypeVector             : I32EnumAttrCase<"OpTypeVector", 23>;
-def SPV_OC_OpTypeArray              : I32EnumAttrCase<"OpTypeArray", 28>;
-def SPV_OC_OpTypeRuntimeArray       : I32EnumAttrCase<"OpTypeRuntimeArray", 29>;
-def SPV_OC_OpTypeStruct             : I32EnumAttrCase<"OpTypeStruct", 30>;
-def SPV_OC_OpTypePointer            : I32EnumAttrCase<"OpTypePointer", 32>;
-def SPV_OC_OpTypeFunction           : I32EnumAttrCase<"OpTypeFunction", 33>;
-def SPV_OC_OpConstantTrue           : I32EnumAttrCase<"OpConstantTrue", 41>;
-def SPV_OC_OpConstantFalse          : I32EnumAttrCase<"OpConstantFalse", 42>;
-def SPV_OC_OpConstant               : I32EnumAttrCase<"OpConstant", 43>;
-def SPV_OC_OpConstantComposite      : I32EnumAttrCase<"OpConstantComposite", 44>;
-def SPV_OC_OpConstantNull           : I32EnumAttrCase<"OpConstantNull", 46>;
-def SPV_OC_OpSpecConstantTrue       : I32EnumAttrCase<"OpSpecConstantTrue", 48>;
-def SPV_OC_OpSpecConstantFalse      : I32EnumAttrCase<"OpSpecConstantFalse", 49>;
-def SPV_OC_OpSpecConstant           : I32EnumAttrCase<"OpSpecConstant", 50>;
-def SPV_OC_OpSpecConstantComposite  : I32EnumAttrCase<"OpSpecConstantComposite", 51>;
-def SPV_OC_OpFunction               : I32EnumAttrCase<"OpFunction", 54>;
-def SPV_OC_OpFunctionParameter      : I32EnumAttrCase<"OpFunctionParameter", 55>;
-def SPV_OC_OpFunctionEnd            : I32EnumAttrCase<"OpFunctionEnd", 56>;
-def SPV_OC_OpFunctionCall           : I32EnumAttrCase<"OpFunctionCall", 57>;
-def SPV_OC_OpVariable               : I32EnumAttrCase<"OpVariable", 59>;
-def SPV_OC_OpLoad                   : I32EnumAttrCase<"OpLoad", 61>;
-def SPV_OC_OpStore                  : I32EnumAttrCase<"OpStore", 62>;
-def SPV_OC_OpAccessChain            : I32EnumAttrCase<"OpAccessChain", 65>;
-def SPV_OC_OpDecorate               : I32EnumAttrCase<"OpDecorate", 71>;
-def SPV_OC_OpMemberDecorate         : I32EnumAttrCase<"OpMemberDecorate", 72>;
-def SPV_OC_OpCompositeExtract       : I32EnumAttrCase<"OpCompositeExtract", 81>;
-def SPV_OC_OpConvertFToU            : I32EnumAttrCase<"OpConvertFToU", 109>;
-def SPV_OC_OpConvertFToS            : I32EnumAttrCase<"OpConvertFToS", 110>;
-def SPV_OC_OpConvertSToF            : I32EnumAttrCase<"OpConvertSToF", 111>;
-def SPV_OC_OpConvertUToF            : I32EnumAttrCase<"OpConvertUToF", 112>;
-def SPV_OC_OpUConvert               : I32EnumAttrCase<"OpUConvert", 113>;
-def SPV_OC_OpSConvert               : I32EnumAttrCase<"OpSConvert", 114>;
-def SPV_OC_OpFConvert               : I32EnumAttrCase<"OpFConvert", 115>;
-def SPV_OC_OpBitcast                : I32EnumAttrCase<"OpBitcast", 124>;
-def SPV_OC_OpFNegate                : I32EnumAttrCase<"OpFNegate", 127>;
-def SPV_OC_OpIAdd                   : I32EnumAttrCase<"OpIAdd", 128>;
-def SPV_OC_OpFAdd                   : I32EnumAttrCase<"OpFAdd", 129>;
-def SPV_OC_OpISub                   : I32EnumAttrCase<"OpISub", 130>;
-def SPV_OC_OpFSub                   : I32EnumAttrCase<"OpFSub", 131>;
-def SPV_OC_OpIMul                   : I32EnumAttrCase<"OpIMul", 132>;
-def SPV_OC_OpFMul                   : I32EnumAttrCase<"OpFMul", 133>;
-def SPV_OC_OpUDiv                   : I32EnumAttrCase<"OpUDiv", 134>;
-def SPV_OC_OpSDiv                   : I32EnumAttrCase<"OpSDiv", 135>;
-def SPV_OC_OpFDiv                   : I32EnumAttrCase<"OpFDiv", 136>;
-def SPV_OC_OpUMod                   : I32EnumAttrCase<"OpUMod", 137>;
-def SPV_OC_OpSRem                   : I32EnumAttrCase<"OpSRem", 138>;
-def SPV_OC_OpSMod                   : I32EnumAttrCase<"OpSMod", 139>;
-def SPV_OC_OpFRem                   : I32EnumAttrCase<"OpFRem", 140>;
-def SPV_OC_OpFMod                   : I32EnumAttrCase<"OpFMod", 141>;
-def SPV_OC_OpLogicalEqual           : I32EnumAttrCase<"OpLogicalEqual", 164>;
-def SPV_OC_OpLogicalNotEqual        : I32EnumAttrCase<"OpLogicalNotEqual", 165>;
-def SPV_OC_OpLogicalOr              : I32EnumAttrCase<"OpLogicalOr", 166>;
-def SPV_OC_OpLogicalAnd             : I32EnumAttrCase<"OpLogicalAnd", 167>;
-def SPV_OC_OpLogicalNot             : I32EnumAttrCase<"OpLogicalNot", 168>;
-def SPV_OC_OpSelect                 : I32EnumAttrCase<"OpSelect", 169>;
-def SPV_OC_OpIEqual                 : I32EnumAttrCase<"OpIEqual", 170>;
-def SPV_OC_OpINotEqual              : I32EnumAttrCase<"OpINotEqual", 171>;
-def SPV_OC_OpUGreaterThan           : I32EnumAttrCase<"OpUGreaterThan", 172>;
-def SPV_OC_OpSGreaterThan           : I32EnumAttrCase<"OpSGreaterThan", 173>;
-def SPV_OC_OpUGreaterThanEqual      : I32EnumAttrCase<"OpUGreaterThanEqual", 174>;
-def SPV_OC_OpSGreaterThanEqual      : I32EnumAttrCase<"OpSGreaterThanEqual", 175>;
-def SPV_OC_OpULessThan              : I32EnumAttrCase<"OpULessThan", 176>;
-def SPV_OC_OpSLessThan              : I32EnumAttrCase<"OpSLessThan", 177>;
-def SPV_OC_OpULessThanEqual         : I32EnumAttrCase<"OpULessThanEqual", 178>;
-def SPV_OC_OpSLessThanEqual         : I32EnumAttrCase<"OpSLessThanEqual", 179>;
-def SPV_OC_OpFOrdEqual              : I32EnumAttrCase<"OpFOrdEqual", 180>;
-def SPV_OC_OpFUnordEqual            : I32EnumAttrCase<"OpFUnordEqual", 181>;
-def SPV_OC_OpFOrdNotEqual           : I32EnumAttrCase<"OpFOrdNotEqual", 182>;
-def SPV_OC_OpFUnordNotEqual         : I32EnumAttrCase<"OpFUnordNotEqual", 183>;
-def SPV_OC_OpFOrdLessThan           : I32EnumAttrCase<"OpFOrdLessThan", 184>;
-def SPV_OC_OpFUnordLessThan         : I32EnumAttrCase<"OpFUnordLessThan", 185>;
-def SPV_OC_OpFOrdGreaterThan        : I32EnumAttrCase<"OpFOrdGreaterThan", 186>;
-def SPV_OC_OpFUnordGreaterThan      : I32EnumAttrCase<"OpFUnordGreaterThan", 187>;
-def SPV_OC_OpFOrdLessThanEqual      : I32EnumAttrCase<"OpFOrdLessThanEqual", 188>;
-def SPV_OC_OpFUnordLessThanEqual    : I32EnumAttrCase<"OpFUnordLessThanEqual", 189>;
-def SPV_OC_OpFOrdGreaterThanEqual   : I32EnumAttrCase<"OpFOrdGreaterThanEqual", 190>;
-def SPV_OC_OpFUnordGreaterThanEqual : I32EnumAttrCase<"OpFUnordGreaterThanEqual", 191>;
-def SPV_OC_OpShiftRightLogical      : I32EnumAttrCase<"OpShiftRightLogical", 194>;
-def SPV_OC_OpShiftRightArithmetic   : I32EnumAttrCase<"OpShiftRightArithmetic", 195>;
-def SPV_OC_OpShiftLeftLogical       : I32EnumAttrCase<"OpShiftLeftLogical", 196>;
-def SPV_OC_OpBitwiseOr              : I32EnumAttrCase<"OpBitwiseOr", 197>;
-def SPV_OC_OpBitwiseXor             : I32EnumAttrCase<"OpBitwiseXor", 198>;
-def SPV_OC_OpBitwiseAnd             : I32EnumAttrCase<"OpBitwiseAnd", 199>;
-def SPV_OC_OpNot                    : I32EnumAttrCase<"OpNot", 200>;
-def SPV_OC_OpBitFieldInsert         : I32EnumAttrCase<"OpBitFieldInsert", 201>;
-def SPV_OC_OpBitFieldSExtract       : I32EnumAttrCase<"OpBitFieldSExtract", 202>;
-def SPV_OC_OpBitFieldUExtract       : I32EnumAttrCase<"OpBitFieldUExtract", 203>;
-def SPV_OC_OpBitReverse             : I32EnumAttrCase<"OpBitReverse", 204>;
-def SPV_OC_OpBitCount               : I32EnumAttrCase<"OpBitCount", 205>;
-def SPV_OC_OpControlBarrier         : I32EnumAttrCase<"OpControlBarrier", 224>;
-def SPV_OC_OpMemoryBarrier          : I32EnumAttrCase<"OpMemoryBarrier", 225>;
-def SPV_OC_OpPhi                    : I32EnumAttrCase<"OpPhi", 245>;
-def SPV_OC_OpLoopMerge              : I32EnumAttrCase<"OpLoopMerge", 246>;
-def SPV_OC_OpSelectionMerge         : I32EnumAttrCase<"OpSelectionMerge", 247>;
-def SPV_OC_OpLabel                  : I32EnumAttrCase<"OpLabel", 248>;
-def SPV_OC_OpBranch                 : I32EnumAttrCase<"OpBranch", 249>;
-def SPV_OC_OpBranchConditional      : I32EnumAttrCase<"OpBranchConditional", 250>;
-def SPV_OC_OpReturn                 : I32EnumAttrCase<"OpReturn", 253>;
-def SPV_OC_OpReturnValue            : I32EnumAttrCase<"OpReturnValue", 254>;
-def SPV_OC_OpUnreachable            : I32EnumAttrCase<"OpUnreachable", 255>;
-def SPV_OC_OpModuleProcessed        : I32EnumAttrCase<"OpModuleProcessed", 330>;
-def SPV_OC_OpGroupNonUniformBallot  : I32EnumAttrCase<"OpGroupNonUniformBallot", 339>;
-def SPV_OC_OpSubgroupBallotKHR      : I32EnumAttrCase<"OpSubgroupBallotKHR", 4421>;
+def SPV_OC_OpNop                       : I32EnumAttrCase<"OpNop", 0>;
+def SPV_OC_OpUndef                     : I32EnumAttrCase<"OpUndef", 1>;
+def SPV_OC_OpSourceContinued           : I32EnumAttrCase<"OpSourceContinued", 2>;
+def SPV_OC_OpSource                    : I32EnumAttrCase<"OpSource", 3>;
+def SPV_OC_OpSourceExtension           : I32EnumAttrCase<"OpSourceExtension", 4>;
+def SPV_OC_OpName                      : I32EnumAttrCase<"OpName", 5>;
+def SPV_OC_OpMemberName                : I32EnumAttrCase<"OpMemberName", 6>;
+def SPV_OC_OpString                    : I32EnumAttrCase<"OpString", 7>;
+def SPV_OC_OpExtension                 : I32EnumAttrCase<"OpExtension", 10>;
+def SPV_OC_OpExtInstImport             : I32EnumAttrCase<"OpExtInstImport", 11>;
+def SPV_OC_OpExtInst                   : I32EnumAttrCase<"OpExtInst", 12>;
+def SPV_OC_OpMemoryModel               : I32EnumAttrCase<"OpMemoryModel", 14>;
+def SPV_OC_OpEntryPoint                : I32EnumAttrCase<"OpEntryPoint", 15>;
+def SPV_OC_OpExecutionMode             : I32EnumAttrCase<"OpExecutionMode", 16>;
+def SPV_OC_OpCapability                : I32EnumAttrCase<"OpCapability", 17>;
+def SPV_OC_OpTypeVoid                  : I32EnumAttrCase<"OpTypeVoid", 19>;
+def SPV_OC_OpTypeBool                  : I32EnumAttrCase<"OpTypeBool", 20>;
+def SPV_OC_OpTypeInt                   : I32EnumAttrCase<"OpTypeInt", 21>;
+def SPV_OC_OpTypeFloat                 : I32EnumAttrCase<"OpTypeFloat", 22>;
+def SPV_OC_OpTypeVector                : I32EnumAttrCase<"OpTypeVector", 23>;
+def SPV_OC_OpTypeArray                 : I32EnumAttrCase<"OpTypeArray", 28>;
+def SPV_OC_OpTypeRuntimeArray          : I32EnumAttrCase<"OpTypeRuntimeArray", 29>;
+def SPV_OC_OpTypeStruct                : I32EnumAttrCase<"OpTypeStruct", 30>;
+def SPV_OC_OpTypePointer               : I32EnumAttrCase<"OpTypePointer", 32>;
+def SPV_OC_OpTypeFunction              : I32EnumAttrCase<"OpTypeFunction", 33>;
+def SPV_OC_OpConstantTrue              : I32EnumAttrCase<"OpConstantTrue", 41>;
+def SPV_OC_OpConstantFalse             : I32EnumAttrCase<"OpConstantFalse", 42>;
+def SPV_OC_OpConstant                  : I32EnumAttrCase<"OpConstant", 43>;
+def SPV_OC_OpConstantComposite         : I32EnumAttrCase<"OpConstantComposite", 44>;
+def SPV_OC_OpConstantNull              : I32EnumAttrCase<"OpConstantNull", 46>;
+def SPV_OC_OpSpecConstantTrue          : I32EnumAttrCase<"OpSpecConstantTrue", 48>;
+def SPV_OC_OpSpecConstantFalse         : I32EnumAttrCase<"OpSpecConstantFalse", 49>;
+def SPV_OC_OpSpecConstant              : I32EnumAttrCase<"OpSpecConstant", 50>;
+def SPV_OC_OpSpecConstantComposite     : I32EnumAttrCase<"OpSpecConstantComposite", 51>;
+def SPV_OC_OpFunction                  : I32EnumAttrCase<"OpFunction", 54>;
+def SPV_OC_OpFunctionParameter         : I32EnumAttrCase<"OpFunctionParameter", 55>;
+def SPV_OC_OpFunctionEnd               : I32EnumAttrCase<"OpFunctionEnd", 56>;
+def SPV_OC_OpFunctionCall              : I32EnumAttrCase<"OpFunctionCall", 57>;
+def SPV_OC_OpVariable                  : I32EnumAttrCase<"OpVariable", 59>;
+def SPV_OC_OpLoad                      : I32EnumAttrCase<"OpLoad", 61>;
+def SPV_OC_OpStore                     : I32EnumAttrCase<"OpStore", 62>;
+def SPV_OC_OpAccessChain               : I32EnumAttrCase<"OpAccessChain", 65>;
+def SPV_OC_OpDecorate                  : I32EnumAttrCase<"OpDecorate", 71>;
+def SPV_OC_OpMemberDecorate            : I32EnumAttrCase<"OpMemberDecorate", 72>;
+def SPV_OC_OpCompositeExtract          : I32EnumAttrCase<"OpCompositeExtract", 81>;
+def SPV_OC_OpCompositeInsert           : I32EnumAttrCase<"OpCompositeInsert", 82>;
+def SPV_OC_OpConvertFToU               : I32EnumAttrCase<"OpConvertFToU", 109>;
+def SPV_OC_OpConvertFToS               : I32EnumAttrCase<"OpConvertFToS", 110>;
+def SPV_OC_OpConvertSToF               : I32EnumAttrCase<"OpConvertSToF", 111>;
+def SPV_OC_OpConvertUToF               : I32EnumAttrCase<"OpConvertUToF", 112>;
+def SPV_OC_OpUConvert                  : I32EnumAttrCase<"OpUConvert", 113>;
+def SPV_OC_OpSConvert                  : I32EnumAttrCase<"OpSConvert", 114>;
+def SPV_OC_OpFConvert                  : I32EnumAttrCase<"OpFConvert", 115>;
+def SPV_OC_OpBitcast                   : I32EnumAttrCase<"OpBitcast", 124>;
+def SPV_OC_OpFNegate                   : I32EnumAttrCase<"OpFNegate", 127>;
+def SPV_OC_OpIAdd                      : I32EnumAttrCase<"OpIAdd", 128>;
+def SPV_OC_OpFAdd                      : I32EnumAttrCase<"OpFAdd", 129>;
+def SPV_OC_OpISub                      : I32EnumAttrCase<"OpISub", 130>;
+def SPV_OC_OpFSub                      : I32EnumAttrCase<"OpFSub", 131>;
+def SPV_OC_OpIMul                      : I32EnumAttrCase<"OpIMul", 132>;
+def SPV_OC_OpFMul                      : I32EnumAttrCase<"OpFMul", 133>;
+def SPV_OC_OpUDiv                      : I32EnumAttrCase<"OpUDiv", 134>;
+def SPV_OC_OpSDiv                      : I32EnumAttrCase<"OpSDiv", 135>;
+def SPV_OC_OpFDiv                      : I32EnumAttrCase<"OpFDiv", 136>;
+def SPV_OC_OpUMod                      : I32EnumAttrCase<"OpUMod", 137>;
+def SPV_OC_OpSRem                      : I32EnumAttrCase<"OpSRem", 138>;
+def SPV_OC_OpSMod                      : I32EnumAttrCase<"OpSMod", 139>;
+def SPV_OC_OpFRem                      : I32EnumAttrCase<"OpFRem", 140>;
+def SPV_OC_OpFMod                      : I32EnumAttrCase<"OpFMod", 141>;
+def SPV_OC_OpLogicalEqual              : I32EnumAttrCase<"OpLogicalEqual", 164>;
+def SPV_OC_OpLogicalNotEqual           : I32EnumAttrCase<"OpLogicalNotEqual", 165>;
+def SPV_OC_OpLogicalOr                 : I32EnumAttrCase<"OpLogicalOr", 166>;
+def SPV_OC_OpLogicalAnd                : I32EnumAttrCase<"OpLogicalAnd", 167>;
+def SPV_OC_OpLogicalNot                : I32EnumAttrCase<"OpLogicalNot", 168>;
+def SPV_OC_OpSelect                    : I32EnumAttrCase<"OpSelect", 169>;
+def SPV_OC_OpIEqual                    : I32EnumAttrCase<"OpIEqual", 170>;
+def SPV_OC_OpINotEqual                 : I32EnumAttrCase<"OpINotEqual", 171>;
+def SPV_OC_OpUGreaterThan              : I32EnumAttrCase<"OpUGreaterThan", 172>;
+def SPV_OC_OpSGreaterThan              : I32EnumAttrCase<"OpSGreaterThan", 173>;
+def SPV_OC_OpUGreaterThanEqual         : I32EnumAttrCase<"OpUGreaterThanEqual", 174>;
+def SPV_OC_OpSGreaterThanEqual         : I32EnumAttrCase<"OpSGreaterThanEqual", 175>;
+def SPV_OC_OpULessThan                 : I32EnumAttrCase<"OpULessThan", 176>;
+def SPV_OC_OpSLessThan                 : I32EnumAttrCase<"OpSLessThan", 177>;
+def SPV_OC_OpULessThanEqual            : I32EnumAttrCase<"OpULessThanEqual", 178>;
+def SPV_OC_OpSLessThanEqual            : I32EnumAttrCase<"OpSLessThanEqual", 179>;
+def SPV_OC_OpFOrdEqual                 : I32EnumAttrCase<"OpFOrdEqual", 180>;
+def SPV_OC_OpFUnordEqual               : I32EnumAttrCase<"OpFUnordEqual", 181>;
+def SPV_OC_OpFOrdNotEqual              : I32EnumAttrCase<"OpFOrdNotEqual", 182>;
+def SPV_OC_OpFUnordNotEqual            : I32EnumAttrCase<"OpFUnordNotEqual", 183>;
+def SPV_OC_OpFOrdLessThan              : I32EnumAttrCase<"OpFOrdLessThan", 184>;
+def SPV_OC_OpFUnordLessThan            : I32EnumAttrCase<"OpFUnordLessThan", 185>;
+def SPV_OC_OpFOrdGreaterThan           : I32EnumAttrCase<"OpFOrdGreaterThan", 186>;
+def SPV_OC_OpFUnordGreaterThan         : I32EnumAttrCase<"OpFUnordGreaterThan", 187>;
+def SPV_OC_OpFOrdLessThanEqual         : I32EnumAttrCase<"OpFOrdLessThanEqual", 188>;
+def SPV_OC_OpFUnordLessThanEqual       : I32EnumAttrCase<"OpFUnordLessThanEqual", 189>;
+def SPV_OC_OpFOrdGreaterThanEqual      : I32EnumAttrCase<"OpFOrdGreaterThanEqual", 190>;
+def SPV_OC_OpFUnordGreaterThanEqual    : I32EnumAttrCase<"OpFUnordGreaterThanEqual", 191>;
+def SPV_OC_OpShiftRightLogical         : I32EnumAttrCase<"OpShiftRightLogical", 194>;
+def SPV_OC_OpShiftRightArithmetic      : I32EnumAttrCase<"OpShiftRightArithmetic", 195>;
+def SPV_OC_OpShiftLeftLogical          : I32EnumAttrCase<"OpShiftLeftLogical", 196>;
+def SPV_OC_OpBitwiseOr                 : I32EnumAttrCase<"OpBitwiseOr", 197>;
+def SPV_OC_OpBitwiseXor                : I32EnumAttrCase<"OpBitwiseXor", 198>;
+def SPV_OC_OpBitwiseAnd                : I32EnumAttrCase<"OpBitwiseAnd", 199>;
+def SPV_OC_OpNot                       : I32EnumAttrCase<"OpNot", 200>;
+def SPV_OC_OpBitFieldInsert            : I32EnumAttrCase<"OpBitFieldInsert", 201>;
+def SPV_OC_OpBitFieldSExtract          : I32EnumAttrCase<"OpBitFieldSExtract", 202>;
+def SPV_OC_OpBitFieldUExtract          : I32EnumAttrCase<"OpBitFieldUExtract", 203>;
+def SPV_OC_OpBitReverse                : I32EnumAttrCase<"OpBitReverse", 204>;
+def SPV_OC_OpBitCount                  : I32EnumAttrCase<"OpBitCount", 205>;
+def SPV_OC_OpControlBarrier            : I32EnumAttrCase<"OpControlBarrier", 224>;
+def SPV_OC_OpMemoryBarrier             : I32EnumAttrCase<"OpMemoryBarrier", 225>;
+def SPV_OC_OpAtomicCompareExchangeWeak : I32EnumAttrCase<"OpAtomicCompareExchangeWeak", 231>;
+def SPV_OC_OpPhi                       : I32EnumAttrCase<"OpPhi", 245>;
+def SPV_OC_OpLoopMerge                 : I32EnumAttrCase<"OpLoopMerge", 246>;
+def SPV_OC_OpSelectionMerge            : I32EnumAttrCase<"OpSelectionMerge", 247>;
+def SPV_OC_OpLabel                     : I32EnumAttrCase<"OpLabel", 248>;
+def SPV_OC_OpBranch                    : I32EnumAttrCase<"OpBranch", 249>;
+def SPV_OC_OpBranchConditional         : I32EnumAttrCase<"OpBranchConditional", 250>;
+def SPV_OC_OpReturn                    : I32EnumAttrCase<"OpReturn", 253>;
+def SPV_OC_OpReturnValue               : I32EnumAttrCase<"OpReturnValue", 254>;
+def SPV_OC_OpUnreachable               : I32EnumAttrCase<"OpUnreachable", 255>;
+def SPV_OC_OpModuleProcessed           : I32EnumAttrCase<"OpModuleProcessed", 330>;
+def SPV_OC_OpGroupNonUniformBallot     : I32EnumAttrCase<"OpGroupNonUniformBallot", 339>;
+def SPV_OC_OpSubgroupBallotKHR         : I32EnumAttrCase<"OpSubgroupBallotKHR", 4421>;
 
 def SPV_OpcodeAttr :
     I32EnumAttr<"Opcode", "valid SPIR-V instructions", [
@@ -1129,20 +1171,20 @@ def SPV_OpcodeAttr :
       SPV_OC_OpSpecConstantComposite, SPV_OC_OpFunction, SPV_OC_OpFunctionParameter,
       SPV_OC_OpFunctionEnd, SPV_OC_OpFunctionCall, SPV_OC_OpVariable, SPV_OC_OpLoad,
       SPV_OC_OpStore, SPV_OC_OpAccessChain, SPV_OC_OpDecorate,
-      SPV_OC_OpMemberDecorate, SPV_OC_OpCompositeExtract, SPV_OC_OpConvertFToU,
-      SPV_OC_OpConvertFToS, SPV_OC_OpConvertSToF, SPV_OC_OpConvertUToF,
-      SPV_OC_OpUConvert, SPV_OC_OpSConvert, SPV_OC_OpFConvert, SPV_OC_OpBitcast,
-      SPV_OC_OpFNegate, SPV_OC_OpIAdd, SPV_OC_OpFAdd, SPV_OC_OpISub, SPV_OC_OpFSub,
-      SPV_OC_OpIMul, SPV_OC_OpFMul, SPV_OC_OpUDiv, SPV_OC_OpSDiv, SPV_OC_OpFDiv,
-      SPV_OC_OpUMod, SPV_OC_OpSRem, SPV_OC_OpSMod, SPV_OC_OpFRem, SPV_OC_OpFMod,
-      SPV_OC_OpLogicalEqual, SPV_OC_OpLogicalNotEqual, SPV_OC_OpLogicalOr,
-      SPV_OC_OpLogicalAnd, SPV_OC_OpLogicalNot, SPV_OC_OpSelect, SPV_OC_OpIEqual,
-      SPV_OC_OpINotEqual, SPV_OC_OpUGreaterThan, SPV_OC_OpSGreaterThan,
-      SPV_OC_OpUGreaterThanEqual, SPV_OC_OpSGreaterThanEqual, SPV_OC_OpULessThan,
-      SPV_OC_OpSLessThan, SPV_OC_OpULessThanEqual, SPV_OC_OpSLessThanEqual,
-      SPV_OC_OpFOrdEqual, SPV_OC_OpFUnordEqual, SPV_OC_OpFOrdNotEqual,
-      SPV_OC_OpFUnordNotEqual, SPV_OC_OpFOrdLessThan, SPV_OC_OpFUnordLessThan,
-      SPV_OC_OpFOrdGreaterThan, SPV_OC_OpFUnordGreaterThan,
+      SPV_OC_OpMemberDecorate, SPV_OC_OpCompositeExtract, SPV_OC_OpCompositeInsert,
+      SPV_OC_OpConvertFToU, SPV_OC_OpConvertFToS, SPV_OC_OpConvertSToF,
+      SPV_OC_OpConvertUToF, SPV_OC_OpUConvert, SPV_OC_OpSConvert, SPV_OC_OpFConvert,
+      SPV_OC_OpBitcast, SPV_OC_OpFNegate, SPV_OC_OpIAdd, SPV_OC_OpFAdd,
+      SPV_OC_OpISub, SPV_OC_OpFSub, SPV_OC_OpIMul, SPV_OC_OpFMul, SPV_OC_OpUDiv,
+      SPV_OC_OpSDiv, SPV_OC_OpFDiv, SPV_OC_OpUMod, SPV_OC_OpSRem, SPV_OC_OpSMod,
+      SPV_OC_OpFRem, SPV_OC_OpFMod, SPV_OC_OpLogicalEqual, SPV_OC_OpLogicalNotEqual,
+      SPV_OC_OpLogicalOr, SPV_OC_OpLogicalAnd, SPV_OC_OpLogicalNot, SPV_OC_OpSelect,
+      SPV_OC_OpIEqual, SPV_OC_OpINotEqual, SPV_OC_OpUGreaterThan,
+      SPV_OC_OpSGreaterThan, SPV_OC_OpUGreaterThanEqual, SPV_OC_OpSGreaterThanEqual,
+      SPV_OC_OpULessThan, SPV_OC_OpSLessThan, SPV_OC_OpULessThanEqual,
+      SPV_OC_OpSLessThanEqual, SPV_OC_OpFOrdEqual, SPV_OC_OpFUnordEqual,
+      SPV_OC_OpFOrdNotEqual, SPV_OC_OpFUnordNotEqual, SPV_OC_OpFOrdLessThan,
+      SPV_OC_OpFUnordLessThan, SPV_OC_OpFOrdGreaterThan, SPV_OC_OpFUnordGreaterThan,
       SPV_OC_OpFOrdLessThanEqual, SPV_OC_OpFUnordLessThanEqual,
       SPV_OC_OpFOrdGreaterThanEqual, SPV_OC_OpFUnordGreaterThanEqual,
       SPV_OC_OpShiftRightLogical, SPV_OC_OpShiftRightArithmetic,
@@ -1150,10 +1192,11 @@ def SPV_OpcodeAttr :
       SPV_OC_OpBitwiseAnd, SPV_OC_OpNot, SPV_OC_OpBitFieldInsert,
       SPV_OC_OpBitFieldSExtract, SPV_OC_OpBitFieldUExtract, SPV_OC_OpBitReverse,
       SPV_OC_OpBitCount, SPV_OC_OpControlBarrier, SPV_OC_OpMemoryBarrier,
-      SPV_OC_OpPhi, SPV_OC_OpLoopMerge, SPV_OC_OpSelectionMerge, SPV_OC_OpLabel,
-      SPV_OC_OpBranch, SPV_OC_OpBranchConditional, SPV_OC_OpReturn,
-      SPV_OC_OpReturnValue, SPV_OC_OpUnreachable, SPV_OC_OpModuleProcessed,
-      SPV_OC_OpGroupNonUniformBallot, SPV_OC_OpSubgroupBallotKHR
+      SPV_OC_OpAtomicCompareExchangeWeak, SPV_OC_OpPhi, SPV_OC_OpLoopMerge,
+      SPV_OC_OpSelectionMerge, SPV_OC_OpLabel, SPV_OC_OpBranch,
+      SPV_OC_OpBranchConditional, SPV_OC_OpReturn, SPV_OC_OpReturnValue,
+      SPV_OC_OpUnreachable, SPV_OC_OpModuleProcessed, SPV_OC_OpGroupNonUniformBallot,
+      SPV_OC_OpSubgroupBallotKHR
       ]> {
     let cppNamespace = "::mlir::spirv";
 }
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVCompositeOps.td b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVCompositeOps.td
new file mode 100644
index 00000000000..71650504741
--- /dev/null
+++ b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVCompositeOps.td
@@ -0,0 +1,118 @@
+//===-- SPIRVCompositeOps.td - MLIR SPIR-V Composite Ops ---*- tablegen -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file contains composite ops for SPIR-V dialect. It corresponds
+// to "3.32.12. Composite Instructions" of the SPIR-V spec.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPIRV_COMPOSITE_OPS
+#define SPIRV_COMPOSITE_OPS
+
+include "mlir/Dialect/SPIRV/SPIRVBase.td"
+
+def SPV_CompositeExtractOp : SPV_Op<"CompositeExtract", [NoSideEffect]> {
+  let summary = "Extract a part of a composite object.";
+
+  let description = [{
+    Result Type must be the type of object selected by the last provided
+    index.  The instruction result is the extracted object.
+
+    Composite is the composite to extract from.
+
+    Indexes walk the type hierarchy, potentially down to component
+    granularity, to select the part to extract. All indexes must be in
+    bounds.  All composite constituents use zero-based numbering, as
+    described by their OpType… instruction.
+
+    ### Custom assembly form
+
+    ``` {.ebnf}
+    composite-extract-op ::= ssa-id `=` `spv.CompositeExtract` ssa-use
+                             `[` integer-literal (',' integer-literal)* `]`
+                             `:` composite-type
+    ```
+
+    For example:
+
+    ```
+    %0 = spv.Variable : !spv.ptr<!spv.array<4x!spv.array<4xf32>>, Function>
+    %1 = spv.Load "Function" %0 ["Volatile"] : !spv.array<4x!spv.array<4xf32>>
+    %2 = spv.CompositeExtract %1[1 : i32] : !spv.array<4x!spv.array<4xf32>>
+    ```
+
+  }];
+
+  let arguments = (ins
+    SPV_Composite:$composite,
+    I32ArrayAttr:$indices
+  );
+
+  let results = (outs
+    SPV_Type:$component
+  );
+
+  let hasFolder = 1;
+}
+
+// -----
+
+def SPV_CompositeInsertOp : SPV_Op<"CompositeInsert", [NoSideEffect]> {
+  let summary = [{
+    Make a copy of a composite object, while modifying one part of it.
+  }];
+
+  let description = [{
+    Result Type must be the same type as Composite.
+
+    Object is the object to use as the modified part.
+
+    Composite is the composite to copy all but the modified part from.
+
+    Indexes walk the type hierarchy of Composite to the desired depth,
+    potentially down to component granularity, to select the part to modify.
+    All indexes must be in bounds. All composite constituents use zero-based
+    numbering, as described by their OpType… instruction. The type of the
+    part selected to modify must match the type of Object.
+
+    ### Custom assembly form
+
+    ``` {.ebnf}
+    composite-insert-op ::= ssa-id `=` `spv.CompositeInsert` ssa-use, ssa-use
+                            `[` integer-literal (',' integer-literal)* `]`
+                            `:` object-type `into` composite-type
+    ```
+
+    For example:
+
+    ```
+    %0 = spv.CompositeInsert %object, %composite[1 : i32] : f32 into !spv.array<4xf32>
+    ```
+  }];
+
+  let arguments = (ins
+    SPV_Type:$object,
+    SPV_Composite:$composite,
+    I32ArrayAttr:$indices
+  );
+
+  let results = (outs
+    SPV_Composite:$result
+  );
+}
+
+#endif // SPIRV_COMPOSITE_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVControlFlowOps.td b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVControlFlowOps.td
index a494633f6de..ab9d51b05e7 100644
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVControlFlowOps.td
+++ b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVControlFlowOps.td
@@ -59,7 +59,7 @@ def SPV_BranchOp : SPV_Op<"Branch", [InFunctionScope, Terminator]> {
   let builders = [
     OpBuilder<
       "Builder *, OperationState &state, "
-      "Block *successor, ArrayRef<Value *> arguments = {}", [{
+      "Block *successor, ValueRange arguments = {}", [{
         state.addSuccessor(successor, arguments);
       }]
     >
@@ -133,8 +133,8 @@ def SPV_BranchConditionalOp : SPV_Op<"BranchConditional",
   let builders = [
     OpBuilder<
       "Builder *builder, OperationState &state, Value *condition, "
-      "Block *trueBlock, ArrayRef<Value *> trueArguments, "
-      "Block *falseBlock, ArrayRef<Value *> falseArguments, "
+      "Block *trueBlock, ValueRange trueArguments, "
+      "Block *falseBlock, ValueRange falseArguments, "
       "Optional<std::pair<uint32_t, uint32_t>> weights = {}",
       [{
         state.addOperands(condition);
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td
index 217b6d92865..2a1e8f32807 100644
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td
+++ b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td
@@ -516,6 +516,36 @@ def SPV_GLSLSSignOp : SPV_GLSLUnaryArithmeticOp<"SSign", 7, SPV_Integer> {
 
 // -----
 
+def SPV_GLSLSqrtOp : SPV_GLSLUnaryArithmeticOp<"Sqrt", 31, SPV_Float> {
+  let summary = "Returns the square root of the operand";
+
+  let description = [{
+    Result is the square root of x. Result is undefined if x < 0.
+
+    The operand x must be a scalar or vector whose component type is
+    floating-point.
+
+    Result Type and the type of x must be the same type. Results are computed
+    per component.
+
+    ### Custom assembly format
+    ``` {.ebnf}
+    float-scalar-vector-type ::= float-type |
+                                 `vector<` integer-literal `x` float-type `>`
+    sqrt-op ::= ssa-id `=` `spv.GLSL.Sqrt` ssa-use `:`
+                float-scalar-vector-type
+    ```
+    For example:
+
+    ```
+    %2 = spv.GLSL.Sqrt %0 : f32
+    %3 = spv.GLSL.Sqrt %1 : vector<3xf16>
+    ```
+  }];
+}
+
+// -----
+
 def SPV_GLSLTanhOp : SPV_GLSLUnaryArithmeticOp<"Tanh", 21, SPV_Float16or32> {
   let summary = "Hyperbolic tangent of operand in radians";
 
diff --git a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVOps.td b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVOps.td
index 149c2359fda..7a1be77bcd9 100644
--- a/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVOps.td
+++ b/third_party/mlir/include/mlir/Dialect/SPIRV/SPIRVOps.td
@@ -32,8 +32,10 @@
 
 include "mlir/Dialect/SPIRV/SPIRVBase.td"
 include "mlir/Dialect/SPIRV/SPIRVArithmeticOps.td"
+include "mlir/Dialect/SPIRV/SPIRVAtomicOps.td"
 include "mlir/Dialect/SPIRV/SPIRVBitOps.td"
 include "mlir/Dialect/SPIRV/SPIRVCastOps.td"
+include "mlir/Dialect/SPIRV/SPIRVCompositeOps.td"
 include "mlir/Dialect/SPIRV/SPIRVControlFlowOps.td"
 include "mlir/Dialect/SPIRV/SPIRVGLSLOps.td"
 include "mlir/Dialect/SPIRV/SPIRVGroupOps.td"
@@ -100,59 +102,13 @@ def SPV_AccessChainOp : SPV_Op<"AccessChain", [NoSideEffect]> {
   );
 
   let builders = [OpBuilder<[{Builder *builder, OperationState &state,
-                              Value *basePtr, ArrayRef<Value *> indices}]>];
+                              Value *basePtr, ValueRange indices}]>];
 
   let hasCanonicalizer = 1;
 }
 
 // -----
 
-def SPV_CompositeExtractOp : SPV_Op<"CompositeExtract", [NoSideEffect]> {
-  let summary = "Extract a part of a composite object.";
-
-  let description = [{
-    Result Type must be the type of object selected by the last provided
-    index.  The instruction result is the extracted object.
-
-    Composite is the composite to extract from.
-
-    Indexes walk the type hierarchy, potentially down to component
-    granularity, to select the part to extract. All indexes must be in
-    bounds.  All composite constituents use zero-based numbering, as
-    described by their OpType… instruction.
-
-    ### Custom assembly form
-
-    ``` {.ebnf}
-    composite-extract-op ::= ssa-id `=` `spv.CompositeExtract` ssa-use
-                             `[` integer-literal (',' integer-literal)* `]`
-                             `:` composite-type
-    ```
-
-    For example:
-
-    ```
-    %0 = spv.Variable : !spv.ptr<!spv.array<4x!spv.array<4xf32>>, Function>
-    %1 = spv.Load "Function" %0 ["Volatile"] : !spv.array<4x!spv.array<4xf32>>
-    %2 = spv.CompositeExtract %1[1 : i32] : !spv.array<4x!spv.array<4xf32>>
-    ```
-
-  }];
-
-  let arguments = (ins
-    SPV_Composite:$composite,
-    I32ArrayAttr:$indices
-  );
-
-  let results = (outs
-    SPV_Type:$component
-  );
-
-  let hasFolder = 1;
-}
-
-// -----
-
 def SPV_ControlBarrierOp : SPV_Op<"ControlBarrier", []> {
   let summary = [{
     Wait for other invocations of this module to reach the current point of
diff --git a/third_party/mlir/include/mlir/Dialect/StandardOps/Ops.h b/third_party/mlir/include/mlir/Dialect/StandardOps/Ops.h
index cd4ce2c9f48..b201917efe4 100644
--- a/third_party/mlir/include/mlir/Dialect/StandardOps/Ops.h
+++ b/third_party/mlir/include/mlir/Dialect/StandardOps/Ops.h
@@ -178,9 +178,9 @@ public:
   using Op::Op;
 
   static void build(Builder *builder, OperationState &result, Value *srcMemRef,
-                    ArrayRef<Value *> srcIndices, Value *destMemRef,
-                    ArrayRef<Value *> destIndices, Value *numElements,
-                    Value *tagMemRef, ArrayRef<Value *> tagIndices,
+                    ValueRange srcIndices, Value *destMemRef,
+                    ValueRange destIndices, Value *numElements,
+                    Value *tagMemRef, ValueRange tagIndices,
                     Value *stride = nullptr,
                     Value *elementsPerStride = nullptr);
 
@@ -303,7 +303,7 @@ public:
   using Op::Op;
 
   static void build(Builder *builder, OperationState &result, Value *tagMemRef,
-                    ArrayRef<Value *> tagIndices, Value *numElements);
+                    ValueRange tagIndices, Value *numElements);
 
   static StringRef getOperationName() { return "std.dma_wait"; }
 
diff --git a/third_party/mlir/include/mlir/Dialect/StandardOps/Ops.td b/third_party/mlir/include/mlir/Dialect/StandardOps/Ops.td
index 51c7bfbccdc..45d447da1c8 100644
--- a/third_party/mlir/include/mlir/Dialect/StandardOps/Ops.td
+++ b/third_party/mlir/include/mlir/Dialect/StandardOps/Ops.td
@@ -130,6 +130,16 @@ class FloatArithmeticOp<string mnemonic, list<OpTrait> traits = []> :
     ArithmeticOp<mnemonic, traits>,
     Arguments<(ins FloatLike:$lhs, FloatLike:$rhs)>;
 
+def AbsFOp : FloatUnaryOp<"absf"> {
+  let summary = "floating point absolute-value operation";
+  let description = [{
+    The `absf` operation computes the absolute value. It takes one operand and
+    returns one result of the same type. This type may be a float scalar type,
+    a vector whose element type is float, or a tensor of floats. It has no
+    standard attributes.
+  }];
+}
+
 def AddFOp : FloatArithmeticOp<"addf"> {
   let summary = "floating point addition operation";
   let hasFolder = 1;
@@ -235,7 +245,7 @@ def BranchOp : Std_Op<"br", [Terminator]> {
 
   let builders = [OpBuilder<
     "Builder *, OperationState &result, Block *dest,"
-    "ArrayRef<Value *> operands = {}", [{
+    "ValueRange operands = {}", [{
       result.addSuccessor(dest, operands);
   }]>];
 
@@ -269,19 +279,19 @@ def CallOp : Std_Op<"call", [CallOpInterface]> {
 
   let builders = [OpBuilder<
     "Builder *builder, OperationState &result, FuncOp callee,"
-    "ArrayRef<Value *> operands = {}", [{
+    "ValueRange operands = {}", [{
       result.addOperands(operands);
       result.addAttribute("callee", builder->getSymbolRefAttr(callee));
       result.addTypes(callee.getType().getResults());
   }]>, OpBuilder<
     "Builder *builder, OperationState &result, SymbolRefAttr callee,"
-    "ArrayRef<Type> results, ArrayRef<Value *> operands = {}", [{
+    "ArrayRef<Type> results, ValueRange operands = {}", [{
       result.addOperands(operands);
       result.addAttribute("callee", callee);
       result.addTypes(results);
   }]>, OpBuilder<
     "Builder *builder, OperationState &result, StringRef callee,"
-    "ArrayRef<Type> results, ArrayRef<Value *> operands = {}", [{
+    "ArrayRef<Type> results, ValueRange operands = {}", [{
       build(builder, result, builder->getSymbolRefAttr(callee), results,
             operands);
   }]>];
@@ -321,7 +331,7 @@ def CallIndirectOp : Std_Op<"call_indirect", [CallOpInterface]> {
 
   let builders = [OpBuilder<
     "Builder *, OperationState &result, Value *callee,"
-    "ArrayRef<Value *> operands = {}", [{
+    "ValueRange operands = {}", [{
       result.operands.push_back(callee);
       result.addOperands(operands);
       result.addTypes(callee->getType().cast<FunctionType>().getResults());
@@ -345,6 +355,63 @@ def CallIndirectOp : Std_Op<"call_indirect", [CallOpInterface]> {
   let hasCanonicalizer = 1;
 }
 
+def CeilFOp : FloatUnaryOp<"ceilf"> {
+  let summary = "ceiling of the specified value";
+  let description = [{
+    The `ceilf` operation computes the ceiling of a given value. It takes one
+    operand and returns one result of the same type. This type may be a float
+    scalar type, a vector whose element type is float, or a tensor of floats.
+    It has no standard attributes.
+  }];
+}
+
+def CmpFOp : Std_Op<"cmpf",
+    [NoSideEffect, SameTypeOperands, SameOperandsAndResultShape]> {
+  let summary = "floating-point comparison operation";
+  let description = [{
+    The "cmpf" operation compares its two operands according to the float
+    comparison rules and the predicate specified by the respective attribute.
+    The predicate defines the type of comparison: (un)orderedness, (in)equality
+    and signed less/greater than (or equal to) as well as predicates that are
+    always true or false.  The operands must have the same type, and this type
+    must be a float type, or a vector or tensor thereof.  The result is an i1,
+    or a vector/tensor thereof having the same shape as the inputs. Unlike cmpi,
+    the operands are always treated as signed. The u prefix indicates
+    *unordered* comparison, not unsigned comparison, so "une" means unordered or
+    not equal. For the sake of readability by humans, custom assembly form for
+    the operation uses a string-typed attribute for the predicate.  The value of
+    this attribute corresponds to lower-cased name of the predicate constant,
+    e.g., "one" means "ordered not equal".  The string representation of the
+    attribute is merely a syntactic sugar and is converted to an integer
+    attribute by the parser.
+
+      %r1 = cmpf "oeq" %0, %1 : f32
+      %r2 = cmpf "ult" %0, %1 : tensor<42x42xf64>
+      %r3 = "std.cmpf"(%0, %1) {predicate: 0} : (f8, f8) -> i1
+  }];
+
+  let arguments = (ins FloatLike:$lhs, FloatLike:$rhs);
+  let results = (outs BoolLike);
+
+  let builders = [OpBuilder<
+    "Builder *builder, OperationState &result, CmpFPredicate predicate,"
+    "Value *lhs, Value *rhs", [{
+      ::buildCmpFOp(builder, result, predicate, lhs, rhs);
+  }]>];
+
+  let extraClassDeclaration = [{
+    static StringRef getPredicateAttrName() { return "predicate"; }
+    static CmpFPredicate getPredicateByName(StringRef name);
+
+    CmpFPredicate getPredicate() {
+      return (CmpFPredicate)getAttrOfType<IntegerAttr>(getPredicateAttrName())
+          .getInt();
+    }
+  }];
+
+  let hasFolder = 1;
+}
+
 def CMPI_P_EQ  : I64EnumAttrCase<"eq", 0>;
 def CMPI_P_NE  : I64EnumAttrCase<"ne", 1>;
 def CMPI_P_SLT : I64EnumAttrCase<"slt", 2>;
@@ -415,53 +482,6 @@ def CmpIOp : Std_Op<"cmpi",
   let hasFolder = 1;
 }
 
-def CmpFOp : Std_Op<"cmpf",
-    [NoSideEffect, SameTypeOperands, SameOperandsAndResultShape]> {
-  let summary = "floating-point comparison operation";
-  let description = [{
-    The "cmpf" operation compares its two operands according to the float
-    comparison rules and the predicate specified by the respective attribute.
-    The predicate defines the type of comparison: (un)orderedness, (in)equality
-    and signed less/greater than (or equal to) as well as predicates that are
-    always true or false.  The operands must have the same type, and this type
-    must be a float type, or a vector or tensor thereof.  The result is an i1,
-    or a vector/tensor thereof having the same shape as the inputs. Unlike cmpi,
-    the operands are always treated as signed. The u prefix indicates
-    *unordered* comparison, not unsigned comparison, so "une" means unordered or
-    not equal. For the sake of readability by humans, custom assembly form for
-    the operation uses a string-typed attribute for the predicate.  The value of
-    this attribute corresponds to lower-cased name of the predicate constant,
-    e.g., "one" means "ordered not equal".  The string representation of the
-    attribute is merely a syntactic sugar and is converted to an integer
-    attribute by the parser.
-
-      %r1 = cmpf "oeq" %0, %1 : f32
-      %r2 = cmpf "ult" %0, %1 : tensor<42x42xf64>
-      %r3 = "std.cmpf"(%0, %1) {predicate: 0} : (f8, f8) -> i1
-  }];
-
-  let arguments = (ins FloatLike:$lhs, FloatLike:$rhs);
-  let results = (outs BoolLike);
-
-  let builders = [OpBuilder<
-    "Builder *builder, OperationState &result, CmpFPredicate predicate,"
-    "Value *lhs, Value *rhs", [{
-      ::buildCmpFOp(builder, result, predicate, lhs, rhs);
-  }]>];
-
-  let extraClassDeclaration = [{
-    static StringRef getPredicateAttrName() { return "predicate"; }
-    static CmpFPredicate getPredicateByName(StringRef name);
-
-    CmpFPredicate getPredicate() {
-      return (CmpFPredicate)getAttrOfType<IntegerAttr>(getPredicateAttrName())
-          .getInt();
-    }
-  }];
-
-  let hasFolder = 1;
-}
-
 def CondBranchOp : Std_Op<"cond_br", [Terminator]> {
   let summary = "conditional branch operation";
   let description = [{
@@ -483,8 +503,8 @@ def CondBranchOp : Std_Op<"cond_br", [Terminator]> {
 
   let builders = [OpBuilder<
     "Builder *, OperationState &result, Value *condition,"
-    "Block *trueDest, ArrayRef<Value *> trueOperands,"
-    "Block *falseDest, ArrayRef<Value *> falseOperands", [{
+    "Block *trueDest, ValueRange trueOperands,"
+    "Block *falseDest, ValueRange falseOperands", [{
       result.addOperands(condition);
       result.addSuccessor(trueDest, trueOperands);
       result.addSuccessor(falseDest, falseOperands);
@@ -602,6 +622,27 @@ def ConstantOp : Std_Op<"constant",
   let hasFolder = 1;
 }
 
+def CopySignOp : FloatArithmeticOp<"copysign"> {
+  let summary = "A copysign operation";
+  let description = [{
+    The `copysign` returns a value with the magnitude of the first operand and
+    the sign of the second operand. It takes two operands and returns one
+    result of the same type. This type may be a float scalar type, a vector
+    whose element type is float, or a tensor of floats. It has no standard
+    attributes.
+  }];
+}
+
+def CosOp : FloatUnaryOp<"cos"> {
+  let summary = "cosine of the specified value";
+  let description = [{
+    The `cos` operation computes the cosine of a given value. It takes one
+    operand and returns one result of the same type. This type may be a float
+    scalar type, a vector whose element type is float, or a tensor of floats.
+    It has no standard attributes.
+  }];
+}
+
 def DeallocOp : Std_Op<"dealloc"> {
   let summary = "memory deallocation operation";
   let description = [{
@@ -690,7 +731,7 @@ def ExtractElementOp : Std_Op<"extract_element", [NoSideEffect]> {
 
   let builders = [OpBuilder<
     "Builder *builder, OperationState &result, Value *aggregate,"
-    "ArrayRef<Value *> indices = {}", [{
+    "ValueRange indices = {}", [{
       auto resType = aggregate->getType().cast<ShapedType>()
                                          .getElementType();
       build(builder, result, resType, aggregate, indices);
@@ -724,24 +765,6 @@ def IndexCastOp : CastOp<"index_cast">, Arguments<(ins AnyType:$in)> {
   let hasFolder = 0;
 }
 
-def SIToFPOp : CastOp<"sitofp">, Arguments<(ins AnyType:$in)> {
-  let summary = "cast from integer type to floating-point";
-  let description = [{
-    Cast from a value interpreted as signed integer to the corresponding
-    floating-point value. If the value cannot be exactly represented, it is
-    rounded using the default rounding mode. Only scalars are currently
-    supported.
-  }];
-
-  let extraClassDeclaration = [{
-    /// Return true if `a` and `b` are valid operand and result pairs for
-    /// the operation.
-    static bool areCastCompatible(Type a, Type b);
-  }];
-
-  let hasFolder = 0;
-}
-
 def FPExtOp : CastOp<"fpext">, Arguments<(ins AnyType:$in)> {
   let summary = "cast from floating-point to wider floating-point";
   let description = [{
@@ -794,7 +817,7 @@ def LoadOp : Std_Op<"load"> {
 
   let builders = [OpBuilder<
     "Builder *, OperationState &result, Value *memref,"
-    "ArrayRef<Value *> indices = {}", [{
+    "ValueRange indices = {}", [{
       auto memrefType = memref->getType().cast<MemRefType>();
       result.addOperands(memref);
       result.addOperands(indices);
@@ -819,7 +842,8 @@ def MemRefCastOp : CastOp<"memref_cast"> {
   let description = [{
     The "memref_cast" operation converts a memref from one type to an equivalent
     type with a compatible shape. The source and destination types are
-    when both are memref types with the same element type, affine mappings,
+    compatible if:
+    a. both are ranked memref types with the same element type, affine mappings,
     address space, and rank but where the individual dimensions may add or
     remove constant dimensions from the memref type.
 
@@ -827,6 +851,7 @@ def MemRefCastOp : CastOp<"memref_cast"> {
     acts as an assertion that fails at runtime of the dynamic dimensions
     disagree with resultant destination size.
 
+    Example:
     Assert that the input dynamic shape matches the destination static shape.
        %2 = memref_cast %1 : memref<?x?xf32> to memref<4x4xf32>
     Erase static shape information, replacing it with dynamic information.
@@ -841,10 +866,20 @@ def MemRefCastOp : CastOp<"memref_cast"> {
     dynamic information.
        %5 = memref_cast %1 : memref<12x4xf32, offset:5, strides: [4, 1]> to
                              memref<12x4xf32, offset:?, strides: [?, ?]>
+
+    b. either or both memref types are unranked with the same element type, and
+    address space.
+
+    Example:
+    Cast to concrete shape.
+        %4 = memref_cast %1 : memref<*xf32> to memref<4x?xf32>
+
+    Erase rank information.
+        %5 = memref_cast %1 : memref<4x?xf32> to memref<*xf32>
   }];
 
-  let arguments = (ins AnyMemRef:$source);
-  let results = (outs AnyMemRef);
+  let arguments = (ins AnyRankedOrUnrankedMemRef:$source);
+  let results = (outs AnyRankedOrUnrankedMemRef);
 
   let extraClassDeclaration = [{
     /// Return true if `a` and `b` are valid operand and result pairs for
@@ -852,7 +887,7 @@ def MemRefCastOp : CastOp<"memref_cast"> {
     static bool areCastCompatible(Type a, Type b);
 
     /// The result of a memref_cast is always a memref.
-    MemRefType getType() { return getResult()->getType().cast<MemRefType>(); }
+    Type getType() { return getResult()->getType(); }
   }];
 }
 
@@ -866,6 +901,16 @@ def MulIOp : IntArithmeticOp<"muli", [Commutative]> {
   let hasFolder = 1;
 }
 
+def NegFOp : FloatUnaryOp<"negf"> {
+  let summary = "floating point negation";
+  let description = [{
+    The `negf` operation computes the negation of a given value. It takes one
+    operand and returns one result of the same type. This type may be a float
+    scalar type, a vector whose element type is float, or a tensor of floats.
+    It has no standard attributes.
+  }];
+}
+
 def OrOp : IntArithmeticOp<"or", [Commutative]> {
   let summary = "integer binary or";
   let hasFolder = 1;
@@ -1000,6 +1045,24 @@ def ShlISOp : IntArithmeticOp<"shlis"> {
   let summary = "signed integer shift left";
 }
 
+def SIToFPOp : CastOp<"sitofp">, Arguments<(ins AnyType:$in)> {
+  let summary = "cast from integer type to floating-point";
+  let description = [{
+    Cast from a value interpreted as signed integer to the corresponding
+    floating-point value. If the value cannot be exactly represented, it is
+    rounded using the default rounding mode. Only scalars are currently
+    supported.
+  }];
+
+  let extraClassDeclaration = [{
+    /// Return true if `a` and `b` are valid operand and result pairs for
+    /// the operation.
+    static bool areCastCompatible(Type a, Type b);
+  }];
+
+  let hasFolder = 0;
+}
+
 def SplatOp : Std_Op<"splat", [NoSideEffect]> {
   let summary = "splat or broadcast operation";
   let description = [{
@@ -1026,16 +1089,6 @@ def SplatOp : Std_Op<"splat", [NoSideEffect]> {
   let hasFolder = 1;
 }
 
-def SubFOp : FloatArithmeticOp<"subf"> {
-  let summary = "floating point subtraction operation";
-  let hasFolder = 1;
-}
-
-def SubIOp : IntArithmeticOp<"subi"> {
-  let summary = "integer subtraction operation";
-  let hasFolder = 1;
-}
-
 def StoreOp : Std_Op<"store"> {
   let summary = "store operation";
   let description = [{
@@ -1075,6 +1128,192 @@ def StoreOp : Std_Op<"store"> {
   let hasCanonicalizer = 1;
 }
 
+def SubFOp : FloatArithmeticOp<"subf"> {
+  let summary = "floating point subtraction operation";
+  let hasFolder = 1;
+}
+
+def SubIOp : IntArithmeticOp<"subi"> {
+  let summary = "integer subtraction operation";
+  let hasFolder = 1;
+}
+
+def SubViewOp : Std_Op<"subview", [AttrSizedOperandSegments, NoSideEffect]> {
+  let summary = "memref subview operation";
+  let description = [{
+    The "subview" operation converts a memref type to another memref type
+    which represents a reduced-size view of the original memref as specified by
+    the operation's offsets, sizes and strides arguments.
+
+    The SubView operation supports the following arguments:
+    *) Memref: the "base" memref on which to create a "view" memref.
+    *) Offsets: zero or memref-rank number of dynamic offsets into the "base"
+                memref at which to create the "view" memref.
+    *) Sizes: zero or memref-rank dynamic size operands which specify the
+              dynamic sizes of the result "view" memref type.
+    *) Strides: zero or memref-rank number of dynamic strides which are applied
+                multiplicatively to the base memref strides in each dimension.
+
+    Note on the number of operands for offsets, sizes and strides: For
+    each of these, the number of operands must either be same as the
+    memref-rank number or empty. For the latter, those values will be
+    treated as constants.
+
+    Example 1:
+
+      %0 = alloc() : memref<64x4xf32, (d0, d1) -> (d0 * 4 + d1)>
+
+      // Create a sub-view of "base" memref '%0' with offset arguments '%c0',
+      // dynamic sizes for each dimension, and stride arguments '%c1'.
+      %1 = subview %0[%c0, %c0][%size0, %size1][%c1, %c1]
+        : memref<64x4xf32, (d0, d1) -> (d0 * 4 + d1) > to
+          memref<?x?xf32, (d0, d1)[s0, s1] -> (d0 * s1 + d1 + s0)>
+
+    Example 2:
+
+      %0 = alloc() : memref<8x16x4xf32, (d0, d1, d1) -> (d0 * 64 + d1 * 4 + d2)>
+
+      // Create a sub-view of "base" memref '%0' with dynamic offsets, sizes,
+      // and strides.
+      // Note that dynamic offsets are represented by the linearized dynamic
+      // offset symbol 's0' in the subview memref layout map, and that the
+      // dynamic strides operands, after being applied to the base memref
+      // strides in each dimension, are represented in the view memref layout
+      // map as symbols 's1', 's2' and 's3'.
+      %1 = subview %0[%i, %j, %k][%size0, %size1, %size2][%x, %y, %z]
+        : memref<8x16x4xf32, (d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)> to
+          memref<?x?x?xf32,
+            (d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>
+
+    Example 3:
+
+      %0 = alloc() : memref<8x16x4xf32, (d0, d1, d1) -> (d0 * 64 + d1 * 4 + d2)>
+
+      // Subview with constant offsets, sizes and strides.
+      %1 = subview %0[][][]
+        : memref<8x16x4xf32, (d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)> to
+          memref<4x4x4xf32, (d0, d1, d2) -> (d0 * 16 + d1 * 4 + d2 + 8)>
+
+    Example 4:
+
+      %0 = alloc(%arg0, %arg1) : memref<?x?xf32>
+
+      // Subview with constant size, but dynamic offsets and
+      // strides. The resulting memref has a static shape, but if the
+      // base memref has an affine map to describe the layout, the result
+      // memref also uses an affine map to describe the layout. The
+      // strides of the result memref is computed as follows:
+      //
+      // Let #map1 represents the layout of the base memref, and #map2
+      // represents the layout of the result memref. A #mapsubview can be
+      // constructed to map an index from the result memref to the base
+      // memref (note that the description below uses more convenient
+      // naming for symbols, while in affine maps, symbols are
+      // represented as unsigned numbers that identify that symbol in the
+      // given affine map.
+      //
+      // #mapsubview = (d0, d1)[o0, o1, t0, t1] -> (d0 * t0 + o0, d1 * t1 + o1)
+      //
+      // where, o0, o1, ... are offsets, and t0, t1, ... are strides. Then,
+      //
+      // #map2 = #map1.compose(#mapsubview)
+      //
+      // If the layout map is represented as
+      //
+      // #map1 = (d0, d1)[s0, s1, s2] -> (d0 * s1 + d1 * s2 + s0)
+      //
+      // then,
+      //
+      // #map2 = (d0, d1)[s0, s1, s2, o0, o1, t0, t1] ->
+      //              (d0 * s1 * t0 + d1 * s2 * t1 + o0 * s1 + o1 * s2 + s0)
+      //
+      // Representing this canonically
+      //
+      // #map2 = (d0, d1)[r0, r1, r2] -> (d0 * r1 + d1 * r2 + r0)
+      //
+      // where, r0 = o0 * s1 + o1 * s2 + s0, r1 = s1 * t0, r2 = s2 * t1.
+      %1 = subview %0[%i, %j][][%x, %y] :
+        : memref<?x?xf32, (d0, d1)[s0, s1, s2] -> (d0 * s1 + d1 * s2 + s0)> to
+          memref<4x4xf32, (d0, d1)[r0, r1, r2] -> (d0 * r1 + d1 * r2 + r0)>
+
+      // Note that the subview op does not gaurantee that the result
+      // memref is "inbounds" w.r.t to base memref. It is upto the client
+      // to ensure that the subview is accessed in a manner that is
+      // in-bounds.
+
+    }
+  }];
+
+  // TODO(b/144779634, ravishankarm) : Use different arguments for
+  // offsets, sizes and strides.
+  let arguments = (ins
+    AnyMemRef:$source,
+    Variadic<Index>:$offsets,
+    Variadic<Index>:$sizes,
+    Variadic<Index>:$strides,
+    I32ElementsAttr:$operand_segment_sizes
+  );
+  let results = (outs AnyMemRef);
+
+  let builders = [
+    OpBuilder<
+      "Builder *b, OperationState &result, Value *source, "
+      "ValueRange offsets, ValueRange sizes, "
+      "ValueRange strides, Type resultType = Type(), "
+      "ArrayRef<NamedAttribute> attrs = {}">,
+    OpBuilder<
+      "Builder *builder, OperationState &result, "
+      "Type resultType, Value *source">
+  ];
+
+  let extraClassDeclaration = [{
+    /// Returns the type of the base memref operand.
+    MemRefType getBaseMemRefType() {
+      return source()->getType().cast<MemRefType>();
+    }
+
+    /// The result of a subview is always a memref.
+    MemRefType getType() { return getResult()->getType().cast<MemRefType>(); }
+
+    /// Returns as integer value the number of offset operands.
+    int64_t getNumOffsets() { return llvm::size(offsets()); }
+
+    /// Returns as integer value the number of size operands.
+    int64_t getNumSizes() { return llvm::size(sizes()); }
+
+    /// Returns as integer value the number of stride operands.
+    int64_t getNumStrides() { return llvm::size(strides()); }
+
+    /// Returns the dynamic sizes for this subview operation if specified.
+    operand_range getDynamicSizes() { return sizes(); }
+
+    /// Returns in `staticStrides` the static value of the stride
+    /// operands. Returns failure() if the static value of the stride
+    /// operands could not be retrieved.
+    LogicalResult getStaticStrides(SmallVectorImpl<int64_t> &staticStrides);
+
+    // Auxiliary range data structure and helper function that unpacks the
+    // offset, size and stride operands of the SubViewOp into a list of triples.
+    // Such a list of triple is sometimes more convenient to manipulate.
+    struct Range {
+      Value *offset, *size, *stride;
+    };
+    SmallVector<Range, 8> getRanges();
+  }];
+
+  let hasCanonicalizer = 1;
+}
+
+def TanhOp : FloatUnaryOp<"tanh"> {
+  let summary = "hyperbolic tangent of the specified value";
+  let description = [{
+    The `tanh` operation computes the hyperbolic tangent. It takes one operand
+    and returns one result of the same type. This type may be a float scalar
+    type, a vector whose element type is float, or a tensor of floats. It has
+    no standard attributes.
+  }];
+}
+
 def TensorCastOp : CastOp<"tensor_cast"> {
   let summary = "tensor cast operation";
   let description = [{
@@ -1248,172 +1487,6 @@ def ViewOp : Std_Op<"view", [NoSideEffect]> {
   let hasCanonicalizer = 1;
 }
 
-def SubViewOp : Std_Op<"subview", [AttrSizedOperandSegments, NoSideEffect]> {
-  let summary = "memref subview operation";
-  let description = [{
-    The "subview" operation converts a memref type to another memref type
-    which represents a reduced-size view of the original memref as specified by
-    the operation's offsets, sizes and strides arguments.
-
-    The SubView operation supports the following arguments:
-    *) Memref: the "base" memref on which to create a "view" memref.
-    *) Offsets: zero or memref-rank number of dynamic offsets into the "base"
-                memref at which to create the "view" memref.
-    *) Sizes: zero or memref-rank dynamic size operands which specify the
-              dynamic sizes of the result "view" memref type.
-    *) Strides: zero or memref-rank number of dynamic strides which are applied
-                multiplicatively to the base memref strides in each dimension.
-
-    Note on the number of operands for offsets, sizes and strides: For
-    each of these, the number of operands must either be same as the
-    memref-rank number or empty. For the latter, those values will be
-    treated as constants.
-
-    Example 1:
-
-      %0 = alloc() : memref<64x4xf32, (d0, d1) -> (d0 * 4 + d1)>
-
-      // Create a sub-view of "base" memref '%0' with offset arguments '%c0',
-      // dynamic sizes for each dimension, and stride arguments '%c1'.
-      %1 = subview %0[%c0, %c0][%size0, %size1][%c1, %c1]
-        : memref<64x4xf32, (d0, d1) -> (d0 * 4 + d1) > to
-          memref<?x?xf32, (d0, d1)[s0, s1] -> (d0 * s1 + d1 + s0)>
-
-    Example 2:
-
-      %0 = alloc() : memref<8x16x4xf32, (d0, d1, d1) -> (d0 * 64 + d1 * 4 + d2)>
-
-      // Create a sub-view of "base" memref '%0' with dynamic offsets, sizes,
-      // and strides.
-      // Note that dynamic offsets are represented by the linearized dynamic
-      // offset symbol 's0' in the subview memref layout map, and that the
-      // dynamic strides operands, after being applied to the base memref
-      // strides in each dimension, are represented in the view memref layout
-      // map as symbols 's1', 's2' and 's3'.
-      %1 = subview %0[%i, %j, %k][%size0, %size1, %size2][%x, %y, %z]
-        : memref<8x16x4xf32, (d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)> to
-          memref<?x?x?xf32,
-            (d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)>
-
-    Example 3:
-
-      %0 = alloc() : memref<8x16x4xf32, (d0, d1, d1) -> (d0 * 64 + d1 * 4 + d2)>
-
-      // Subview with constant offsets, sizes and strides.
-      %1 = subview %0[][][]
-        : memref<8x16x4xf32, (d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)> to
-          memref<4x4x4xf32, (d0, d1, d2) -> (d0 * 16 + d1 * 4 + d2 + 8)>
-
-    Example 4:
-
-      %0 = alloc(%arg0, %arg1) : memref<?x?xf32>
-
-      // Subview with constant size, but dynamic offsets and
-      // strides. The resulting memref has a static shape, but if the
-      // base memref has an affine map to describe the layout, the result
-      // memref also uses an affine map to describe the layout. The
-      // strides of the result memref is computed as follows:
-      //
-      // Let #map1 represents the layout of the base memref, and #map2
-      // represents the layout of the result memref. A #mapsubview can be
-      // constructed to map an index from the result memref to the base
-      // memref (note that the description below uses more convenient
-      // naming for symbols, while in affine maps, symbols are
-      // represented as unsigned numbers that identify that symbol in the
-      // given affine map.
-      //
-      // #mapsubview = (d0, d1)[o0, o1, t0, t1] -> (d0 * t0 + o0, d1 * t1 + o1)
-      //
-      // where, o0, o1, ... are offsets, and t0, t1, ... are strides. Then,
-      //
-      // #map2 = #map1.compose(#mapsubview)
-      //
-      // If the layout map is represented as
-      //
-      // #map1 = (d0, d1)[s0, s1, s2] -> (d0 * s1 + d1 * s2 + s0)
-      //
-      // then,
-      //
-      // #map2 = (d0, d1)[s0, s1, s2, o0, o1, t0, t1] ->
-      //              (d0 * s1 * t0 + d1 * s2 * t1 + o0 * s1 + o1 * s2 + s0)
-      //
-      // Representing this canonically
-      //
-      // #map2 = (d0, d1)[r0, r1, r2] -> (d0 * r1 + d1 * r2 + r0)
-      //
-      // where, r0 = o0 * s1 + o1 * s2 + s0, r1 = s1 * t0, r2 = s2 * t1.
-      %1 = subview %0[%i, %j][][%x, %y] :
-        : memref<?x?xf32, (d0, d1)[s0, s1, s2] -> (d0 * s1 + d1 * s2 + s0)> to
-          memref<4x4xf32, (d0, d1)[r0, r1, r2] -> (d0 * r1 + d1 * r2 + r0)>
-
-      // Note that the subview op does not gaurantee that the result
-      // memref is "inbounds" w.r.t to base memref. It is upto the client
-      // to ensure that the subview is accessed in a manner that is
-      // in-bounds.
-
-    }
-  }];
-
-  // TODO(b/144779634, ravishankarm) : Use different arguments for
-  // offsets, sizes and strides.
-  let arguments = (ins
-    AnyMemRef:$source,
-    Variadic<Index>:$offsets,
-    Variadic<Index>:$sizes,
-    Variadic<Index>:$strides,
-    I32ElementsAttr:$operand_segment_sizes
-  );
-  let results = (outs AnyMemRef);
-
-  let builders = [
-    OpBuilder<
-      "Builder *b, OperationState &result, Value *source, "
-      "ArrayRef<Value *> offsets, ArrayRef<Value *> sizes, "
-      "ArrayRef<Value *> strides, Type resultType = Type(), "
-      "ArrayRef<NamedAttribute> attrs = {}">,
-    OpBuilder<
-      "Builder *builder, OperationState &result, "
-      "Type resultType, Value *source">
-  ];
-
-  let extraClassDeclaration = [{
-    /// Returns the type of the base memref operand.
-    MemRefType getBaseMemRefType() {
-      return source()->getType().cast<MemRefType>();
-    }
-
-    /// The result of a subview is always a memref.
-    MemRefType getType() { return getResult()->getType().cast<MemRefType>(); }
-
-    /// Returns as integer value the number of offset operands.
-    int64_t getNumOffsets() { return llvm::size(offsets()); }
-
-    /// Returns as integer value the number of size operands.
-    int64_t getNumSizes() { return llvm::size(sizes()); }
-
-    /// Returns as integer value the number of stride operands.
-    int64_t getNumStrides() { return llvm::size(strides()); }
-
-    /// Returns the dynamic sizes for this subview operation if specified.
-    operand_range getDynamicSizes() { return sizes(); }
-
-    /// Returns in `staticStrides` the static value of the stride
-    /// operands. Returns failure() if the static value of the stride
-    /// operands could not be retrieved.
-    LogicalResult getStaticStrides(SmallVectorImpl<int64_t> &staticStrides);
-
-    // Auxiliary range data structure and helper function that unpacks the
-    // offset, size and stride operands of the SubViewOp into a list of triples.
-    // Such a list of triple is sometimes more convenient to manipulate.
-    struct Range {
-      Value *offset, *size, *stride;
-    };
-    SmallVector<Range, 8> getRanges();
-  }];
-
-  let hasCanonicalizer = 1;
-}
-
 def XOrOp : IntArithmeticOp<"xor", [Commutative]> {
   let summary = "integer binary xor";
   let hasFolder = 1;
diff --git a/third_party/mlir/include/mlir/Dialect/VectorOps/VectorOps.h b/third_party/mlir/include/mlir/Dialect/VectorOps/VectorOps.h
index 668eaa5c9d5..8cb0d8516b4 100644
--- a/third_party/mlir/include/mlir/Dialect/VectorOps/VectorOps.h
+++ b/third_party/mlir/include/mlir/Dialect/VectorOps/VectorOps.h
@@ -28,6 +28,8 @@
 #include "mlir/IR/StandardTypes.h"
 
 namespace mlir {
+class MLIRContext;
+class OwningRewritePatternList;
 namespace vector {
 
 /// Dialect for Ops on higher-dimensional vector types.
@@ -37,6 +39,10 @@ public:
   static StringRef getDialectNamespace() { return "vector"; }
 };
 
+/// Collect a set of vector-to-vector canonicalization patterns.
+void populateVectorToVectorCanonicalizationPatterns(
+    OwningRewritePatternList &patterns, MLIRContext *context);
+
 #define GET_OP_CLASSES
 #include "mlir/Dialect/VectorOps/VectorOps.h.inc"
 
diff --git a/third_party/mlir/include/mlir/Dialect/VectorOps/VectorOps.td b/third_party/mlir/include/mlir/Dialect/VectorOps/VectorOps.td
index d34fa9a245d..6c2b4e6bb16 100644
--- a/third_party/mlir/include/mlir/Dialect/VectorOps/VectorOps.td
+++ b/third_party/mlir/include/mlir/Dialect/VectorOps/VectorOps.td
@@ -49,7 +49,7 @@ class Vector_Op<string mnemonic, list<OpTrait> traits = []> :
 def Vector_ContractionOp :
   Vector_Op<"contract", [NoSideEffect]>,
     Arguments<(ins AnyVector:$lhs, AnyVector:$rhs, AnyVector:$acc,
-               Variadic<TupleOf<[Index]>>:$masks,
+               Variadic<VectorOf<[I1]>>:$masks,
                AffineMapArrayAttr:$indexing_maps, ArrayAttr:$iterator_types)>,
     Results<(outs AnyVector)> {
   let summary = "vector contraction operation";
@@ -60,8 +60,9 @@ def Vector_ContractionOp :
     vector result of rank K (where K = num_lhs_free_dims + num_rhs_free_dims +
     num_batch_dims (see dimension type descriptions below)).
 
-    Optional vector mask arguments specify the dynamic dimension sizes of
-    valid data within the lhs/rhs vector arguments.
+    Optional vector mask arguments (produced by CreateMaskOp or ConstantMaskOp)
+    specify the dynamic dimension sizes of valid data within the lhs/rhs vector
+    arguments.
 
     An iterator type attribute list must be specified, where each element of
     the list represents an iterator with one of the following types:
@@ -120,10 +121,8 @@ def Vector_ContractionOp :
 
       // 4D vector contraction with two contracting dimensions and optional
       // vector mask arguments.
-      %lhs_mask = vector.make_tuple %size0, %size1, %size2, %size3
-        : tuple<index, index, index, index>
-      %rhs_mask = vector.make_tuple %size4, %size5, %size6, %size7
-        : tuple<index, index, index, index>
+      %lhs_mask = vector.constant_mask [7, 8, 16, 15] : vector<7x8x16x15xi1>
+      %rhs_mask = vector.constant_mask [8, 16, 7, 5] : vector<8x16x7x5xi1>
 
       %5 = vector.contract #contraction_trait %0, %1, %2, %lhs_mask, %rhs_mask
          : vector<7x8x16x15xf32>, vector<8x16x7x5xf32> into vector<8x15x8x5xf32>
@@ -138,13 +137,13 @@ def Vector_ContractionOp :
     VectorType getAccType() {
       return acc()->getType().cast<VectorType>();
     }
-    TupleType getLHSVectorMaskType() {
-      if (llvm::size(masks()) != 2) return TupleType();
-      return getOperand(3)->getType().cast<TupleType>();
+    VectorType getLHSVectorMaskType() {
+      if (llvm::size(masks()) != 2) return VectorType();
+      return getOperand(3)->getType().cast<VectorType>();
     }
-    TupleType getRHSVectorMaskType() {
-      if (llvm::size(masks()) != 2) return TupleType();
-      return getOperand(4)->getType().cast<TupleType>();
+    VectorType getRHSVectorMaskType() {
+      if (llvm::size(masks()) != 2) return VectorType();
+      return getOperand(4)->getType().cast<VectorType>();
     }
     VectorType getResultType() {
       return getResult()->getType().cast<VectorType>();
@@ -157,6 +156,18 @@ def Vector_ContractionOp :
     static StringRef getParallelIteratorTypeName() {
       return "parallel";
     }
+
+    // Returns the bounds of each dimension in the iteration space spanned
+    // by the iterator types of this operation.
+    void getIterationBounds(SmallVectorImpl<int64_t> &iterationBounds);
+
+    // Returns a list of index maps, where there is a list entry for each
+    // op indexing map attribute (i.e. one for each input and output, with
+    // the output listed last). Each index map, maps from this operations
+    // iteration space, to vector dimensions of the maps input/output.
+    void getIterationIndexMap(
+      std::vector<DenseMap<int64_t, int64_t>> &iterationIndexMap);
+
     std::vector<std::pair<int64_t, int64_t>> getContractingDimMap();
     std::vector<std::pair<int64_t, int64_t>> getBatchDimMap();
   }];
@@ -205,21 +216,21 @@ def Vector_BroadcastOp :
   }];
 }
 
-def Vector_ExtractElementOp :
-  Vector_Op<"extractelement", [NoSideEffect,
+def Vector_ExtractOp :
+  Vector_Op<"extract", [NoSideEffect,
      PredOpTrait<"operand and result have same element type",
                  TCresVTEtIsSameAsOpBase<0, 0>>]>,
     Arguments<(ins AnyVector:$vector, I32ArrayAttr:$position)>,
     Results<(outs AnyType)> {
-  let summary = "extractelement operation";
+  let summary = "extract operation";
   let description = [{
     Takes an n-D vector and a k-D position and extracts the (n-k)-D vector at
     the proper position. Degenerates to an element type in the 0-D case.
 
     Examples:
     ```
-      %1 = vector.extractelement %0[3]: vector<4x8x16xf32>
-      %2 = vector.extractelement %0[3, 3, 3]: vector<4x8x16xf32>
+      %1 = vector.extract %0[3]: vector<4x8x16xf32>
+      %2 = vector.extract %0[3, 3, 3]: vector<4x8x16xf32>
     ```
   }];
   let builders = [OpBuilder<
@@ -232,15 +243,15 @@ def Vector_ExtractElementOp :
   }];
 }
 
-def Vector_InsertElementOp :
-  Vector_Op<"insertelement", [NoSideEffect,
+def Vector_InsertOp :
+  Vector_Op<"insert", [NoSideEffect,
      PredOpTrait<"source operand and result have same element type",
                  TCresVTEtIsSameAsOpBase<0, 0>>,
      PredOpTrait<"dest operand and result have same type",
                  TCresIsSameAsOpBase<0, 1>>]>,
      Arguments<(ins AnyType:$source, AnyVector:$dest, I32ArrayAttr:$position)>,
      Results<(outs AnyVector)> {
-  let summary = "insertelement operation";
+  let summary = "insert operation";
   let description = [{
     Takes an n-D source vector, an (n+k)-D destination vector and a k-D position
     and inserts the n-D source into the (n+k)-D destination at the proper
@@ -248,9 +259,9 @@ def Vector_InsertElementOp :
 
     Examples:
     ```
-      %2 = vector.insertelement %0, %1[3 : i32]:
+      %2 = vector.insert %0, %1[3 : i32]:
         vector<8x16xf32> into vector<4x8x16xf32>
-      %5 = vector.insertelement %3, %4[3 : i32, 3 : i32, 3 : i32]:
+      %5 = vector.insert %3, %4[3 : i32, 3 : i32, 3 : i32]:
         f32 into vector<4x8x16xf32>
     ```
   }];
@@ -390,6 +401,7 @@ def Vector_StridedSliceOp :
     static StringRef getStridesAttrName() { return "strides"; }
     VectorType getVectorType(){ return vector()->getType().cast<VectorType>(); }
   }];
+  let hasCanonicalizer = 1;
 }
 
 def Vector_TransferReadOp :
@@ -627,7 +639,41 @@ def Vector_TypeCastOp :
   }];
 }
 
-// TODO(andydavis) Add constant folding support.
+def Vector_ConstantMaskOp :
+  Vector_Op<"constant_mask", [NoSideEffect]>,
+    Arguments<(ins I64ArrayAttr:$mask_dim_sizes)>,
+    Results<(outs VectorOf<[I1]>)> {
+  let summary = "creates a constant vector mask";
+  let description = [{
+    Creates and returns a vector mask where elements of the result vector
+    are set to '0' or '1', based on whether the element indices are contained
+    within a hyper-rectangular region specified by the 'mask_dim_sizes'
+    array attribute argument. Each element of the 'mask_dim_sizes' array,
+    specifices an exclusive upper bound [0, mask-dim-size-element-value)
+    for a unique dimension in the vector result. The conjunction of the ranges
+    define a hyper-rectangular region within which elements values are set to 1
+    (otherwise element values are set to 0).
+
+    Example: create a constant vector mask of size 4x3xi1 with elements in range
+             0 <= row <= 2 and 0 <= col <= 1 are set to 1 (others to 0).
+
+      %1 = vector.constant_mask [3, 2] : vector<4x3xi1>
+
+      print %1
+                    columns
+                  0    1    2
+                |------------
+              0 | 1    1    0
+        rows  1 | 1    1    0
+              2 | 1    1    0
+              3 | 0    0    0
+  }];
+
+  let extraClassDeclaration = [{
+    static StringRef getMaskDimSizesAttrName() { return "mask_dim_sizes"; }
+  }];
+}
+
 def Vector_CreateMaskOp :
   Vector_Op<"create_mask", [NoSideEffect]>,
     Arguments<(ins Variadic<Index>:$operands)>, Results<(outs VectorOf<[I1]>)> {
@@ -637,7 +683,7 @@ def Vector_CreateMaskOp :
     are set to '0' or '1', based on whether the element indices are contained
     within a hyper-rectangular region specified by the operands. Specifically,
     each operand specifies a range [0, operand-value) for a unique dimension in
-    the vector result. The conjunction of the operand ranges define
+    the vector result. The conjunction of the operand ranges define a
     hyper-rectangular region within which elements values are set to 1
     (otherwise element values are set to 0).
 
@@ -655,22 +701,8 @@ def Vector_CreateMaskOp :
               2 | 1    1    0
               3 | 0    0    0
   }];
+
+  let hasCanonicalizer = 1;
 }
 
-// TODO(andydavis) Delete this op once ContractOp is converted to use VectorMask
-def Vector_IndexTupleOp :
-  Vector_Op<"make_index_tuple", [NoSideEffect]>,
-    Arguments<(ins Variadic<Index>:$operands)>,
-    Results<(outs TupleOf<[Index]>)> {
-  let summary = "creates a tuple of operand values";
-  let description = [{
-    Creates and returns a tuple of its operands which must be of index type.
-
-    Example:
-
-      %1 = vector.make_index_tuple %size0, %size1, %size2
-        : tuple<index, index, index>
-
-  }];
-}
 #endif // VECTOR_OPS
diff --git a/third_party/mlir/include/mlir/Dialect/VectorOps/VectorTransformPatterns.td b/third_party/mlir/include/mlir/Dialect/VectorOps/VectorTransformPatterns.td
index fe0940c0d76..e71679620d6 100644
--- a/third_party/mlir/include/mlir/Dialect/VectorOps/VectorTransformPatterns.td
+++ b/third_party/mlir/include/mlir/Dialect/VectorOps/VectorTransformPatterns.td
@@ -40,4 +40,9 @@ def : Pat<(AddFOp:$op_results $a, $b),
           (UnrollVectorOp<[2, 2]> $op_results, $a, $b),
           [(Constraint<HasShape<[4, 4]>> $a)]>;
 
+// TODO(andydavis) Add Constraints on lhs/rhs shapes.
+def : Pat<(Vector_ContractionOp:$op_results $a, $b, $c, $masks, $attr0, $attr1),
+          (UnrollVectorOp<[2, 2, 2]> $op_results, $a, $b, $c),
+          [(Constraint<HasShape<[4, 4]>> $c)]>;
+
 #endif // VECTOR_TRANSFORMS
diff --git a/third_party/mlir/include/mlir/IR/AffineExpr.h b/third_party/mlir/include/mlir/IR/AffineExpr.h
index 2420deb5b7e..cca7eac536f 100644
--- a/third_party/mlir/include/mlir/IR/AffineExpr.h
+++ b/third_party/mlir/include/mlir/IR/AffineExpr.h
@@ -267,9 +267,8 @@ AffineExpr simplifyAffineExpr(AffineExpr expr, unsigned numDims,
 
 /// Flattens 'expr' into 'flattenedExpr'. Returns true on success or false
 /// if 'expr' could not be flattened (i.e., semi-affine is not yet handled).
-/// 'cst' contains constraints that connect newly introduced local identifiers
-/// to existing dimensional and / symbolic identifiers. See documentation for
-/// AffineExprFlattener on how mod's and div's are flattened.
+/// See documentation for AffineExprFlattener on how mod's and div's are
+/// flattened.
 bool getFlattenedAffineExpr(AffineExpr expr, unsigned numDims,
                             unsigned numSymbols,
                             llvm::SmallVectorImpl<int64_t> *flattenedExpr);
diff --git a/third_party/mlir/include/mlir/IR/AffineMap.h b/third_party/mlir/include/mlir/IR/AffineMap.h
index 9b30f15628a..e42173d5a2b 100644
--- a/third_party/mlir/include/mlir/IR/AffineMap.h
+++ b/third_party/mlir/include/mlir/IR/AffineMap.h
@@ -65,6 +65,15 @@ public:
   static AffineMap getMultiDimIdentityMap(unsigned numDims,
                                           MLIRContext *context);
 
+  /// Returns an AffineMap representing a permutation.
+  /// The permutation is expressed as a non-empty vector of integers.
+  /// E.g. the permutation `(i,j,k) -> (j,k,i)` will be expressed with
+  /// `permutation = [1,2,0]`. All values in `permutation` must be
+  /// integers, in the range 0..`permutation.size()-1` without duplications
+  /// (i.e. `[1,1,2]` is an invalid permutation).
+  static AffineMap getPermutationMap(ArrayRef<unsigned> permutation,
+                                     MLIRContext *context);
+
   MLIRContext *getContext() const;
 
   explicit operator bool() { return map != nullptr; }
diff --git a/third_party/mlir/include/mlir/IR/Attributes.h b/third_party/mlir/include/mlir/IR/Attributes.h
index b5b9a0491f4..3968d44dd37 100644
--- a/third_party/mlir/include/mlir/IR/Attributes.h
+++ b/third_party/mlir/include/mlir/IR/Attributes.h
@@ -321,12 +321,12 @@ public:
   }
 
   /// Verify the construction invariants for a double value.
-  static LogicalResult
-  verifyConstructionInvariants(llvm::Optional<Location> loc, MLIRContext *ctx,
-                               Type type, double value);
-  static LogicalResult
-  verifyConstructionInvariants(llvm::Optional<Location> loc, MLIRContext *ctx,
-                               Type type, const APFloat &value);
+  static LogicalResult verifyConstructionInvariants(Optional<Location> loc,
+                                                    MLIRContext *ctx, Type type,
+                                                    double value);
+  static LogicalResult verifyConstructionInvariants(Optional<Location> loc,
+                                                    MLIRContext *ctx, Type type,
+                                                    const APFloat &value);
 };
 
 //===----------------------------------------------------------------------===//
@@ -403,10 +403,11 @@ public:
   StringRef getAttrData() const;
 
   /// Verify the construction of an opaque attribute.
-  static LogicalResult
-  verifyConstructionInvariants(llvm::Optional<Location> loc,
-                               MLIRContext *context, Identifier dialect,
-                               StringRef attrData, Type type);
+  static LogicalResult verifyConstructionInvariants(Optional<Location> loc,
+                                                    MLIRContext *context,
+                                                    Identifier dialect,
+                                                    StringRef attrData,
+                                                    Type type);
 
   static bool kindof(unsigned kind) {
     return kind == StandardAttributes::Opaque;
@@ -1373,6 +1374,13 @@ public:
       : attrs((attrs && !attrs.empty()) ? attrs : nullptr) {}
   NamedAttributeList(ArrayRef<NamedAttribute> attributes);
 
+  bool operator!=(const NamedAttributeList &other) const {
+    return !(*this == other);
+  }
+  bool operator==(const NamedAttributeList &other) const {
+    return attrs == other.attrs;
+  }
+
   /// Return the underlying dictionary attribute. This may be null, if this list
   /// has no attributes.
   DictionaryAttr getDictionary() const { return attrs; }
diff --git a/third_party/mlir/include/mlir/IR/Diagnostics.h b/third_party/mlir/include/mlir/IR/Diagnostics.h
index 1d284f6ccd7..4baea744a62 100644
--- a/third_party/mlir/include/mlir/IR/Diagnostics.h
+++ b/third_party/mlir/include/mlir/IR/Diagnostics.h
@@ -481,6 +481,30 @@ InFlightDiagnostic emitWarning(Location loc, const Twine &message);
 InFlightDiagnostic emitRemark(Location loc);
 InFlightDiagnostic emitRemark(Location loc, const Twine &message);
 
+/// Overloads of the above emission functions that take an optionally null
+/// location. If the location is null, no diagnostic is emitted and a failure is
+/// returned. Given that the provided location may be null, these methods take
+/// the diagnostic arguments directly instead of relying on the returned
+/// InFlightDiagnostic.
+template <typename... Args>
+LogicalResult emitOptionalError(Optional<Location> loc, Args &&... args) {
+  if (loc)
+    return emitError(*loc).append(std::forward<Args>(args)...);
+  return failure();
+}
+template <typename... Args>
+LogicalResult emitOptionalWarning(Optional<Location> loc, Args &&... args) {
+  if (loc)
+    return emitWarning(*loc).append(std::forward<Args>(args)...);
+  return failure();
+}
+template <typename... Args>
+LogicalResult emitOptionalRemark(Optional<Location> loc, Args &&... args) {
+  if (loc)
+    return emitRemark(*loc).append(std::forward<Args>(args)...);
+  return failure();
+}
+
 //===----------------------------------------------------------------------===//
 // ScopedDiagnosticHandler
 //===----------------------------------------------------------------------===//
diff --git a/third_party/mlir/include/mlir/IR/Matchers.h b/third_party/mlir/include/mlir/IR/Matchers.h
index 1a1869b8317..99a33b624bd 100644
--- a/third_party/mlir/include/mlir/IR/Matchers.h
+++ b/third_party/mlir/include/mlir/IR/Matchers.h
@@ -26,7 +26,6 @@
 
 #include "mlir/IR/OpDefinition.h"
 #include "mlir/IR/StandardTypes.h"
-#include <type_traits>
 
 namespace mlir {
 
@@ -134,30 +133,78 @@ template <typename OpClass> struct op_matcher {
   bool match(Operation *op) { return isa<OpClass>(op); }
 };
 
-} // end namespace detail
+/// Trait to check whether T provides a 'match' method with type
+/// `OperationOrValue`.
+template <typename T, typename OperationOrValue>
+using has_operation_or_value_matcher_t =
+    decltype(std::declval<T>().match(std::declval<OperationOrValue>()));
 
-/// Entry point for matching a pattern over a Value.
-template <typename Pattern>
-inline bool matchPattern(Value *value, const Pattern &pattern) {
-  // TODO: handle other cases
-  if (auto *op = value->getDefiningOp())
-    return const_cast<Pattern &>(pattern).match(op);
+/// Statically switch to a Value matcher.
+template <typename MatcherClass>
+typename std::enable_if_t<is_detected<detail::has_operation_or_value_matcher_t,
+                                      MatcherClass, Value *>::value,
+                          bool>
+matchOperandOrValueAtIndex(Operation *op, unsigned idx, MatcherClass &matcher) {
+  return matcher.match(op->getOperand(idx));
+}
+
+/// Statically switch to an Operation matcher.
+template <typename MatcherClass>
+typename std::enable_if_t<is_detected<detail::has_operation_or_value_matcher_t,
+                                      MatcherClass, Operation *>::value,
+                          bool>
+matchOperandOrValueAtIndex(Operation *op, unsigned idx, MatcherClass &matcher) {
+  if (auto defOp = op->getOperand(idx)->getDefiningOp())
+    return matcher.match(defOp);
   return false;
 }
 
-/// Entry point for matching a pattern over an Operation.
-template <typename Pattern>
-inline bool matchPattern(Operation *op, const Pattern &pattern) {
-  return const_cast<Pattern &>(pattern).match(op);
+/// Terminal matcher, always returns true.
+struct AnyValueMatcher {
+  bool match(Value *op) const { return true; }
+};
+
+/// Binds to a specific value and matches it.
+struct PatternMatcherValue {
+  PatternMatcherValue(Value *val) : value(val) {}
+  bool match(Value *val) const { return val == value; }
+  Value *value;
+};
+
+template <typename TupleT, class CallbackT, std::size_t... Is>
+constexpr void enumerateImpl(TupleT &&tuple, CallbackT &&callback,
+                             std::index_sequence<Is...>) {
+  (void)std::initializer_list<int>{
+      0,
+      (callback(std::integral_constant<std::size_t, Is>{}, std::get<Is>(tuple)),
+       0)...};
 }
 
-/// Matches a constant holding a scalar/vector/tensor integer (splat) and
-/// writes the integer value to bind_value.
-inline detail::constant_int_op_binder
-m_ConstantInt(IntegerAttr::ValueType *bind_value) {
-  return detail::constant_int_op_binder(bind_value);
+template <typename... Tys, typename CallbackT>
+constexpr void enumerate(std::tuple<Tys...> &tuple, CallbackT &&callback) {
+  detail::enumerateImpl(tuple, std::forward<CallbackT>(callback),
+                        std::make_index_sequence<sizeof...(Tys)>{});
 }
 
+/// RecursivePatternMatcher that composes.
+template <typename OpType, typename... OperandMatchers>
+struct RecursivePatternMatcher {
+  RecursivePatternMatcher(OperandMatchers... matchers)
+      : operandMatchers(matchers...) {}
+  bool match(Operation *op) {
+    if (!isa<OpType>(op) || op->getNumOperands() != sizeof...(OperandMatchers))
+      return false;
+    bool res = true;
+    enumerate(operandMatchers, [&](size_t index, auto &matcher) {
+      res &= matchOperandOrValueAtIndex(op, index, matcher);
+    });
+    return res;
+  }
+  std::tuple<OperandMatchers...> operandMatchers;
+};
+
+} // end namespace detail
+
 /// Matches a value from a constant foldable operation and writes the value to
 /// bind_value.
 template <typename AttrT>
@@ -186,6 +233,38 @@ inline detail::constant_int_not_value_matcher<0> m_NonZero() {
   return detail::constant_int_not_value_matcher<0>();
 }
 
+/// Entry point for matching a pattern over a Value.
+template <typename Pattern>
+inline bool matchPattern(Value *value, const Pattern &pattern) {
+  // TODO: handle other cases
+  if (auto *op = value->getDefiningOp())
+    return const_cast<Pattern &>(pattern).match(op);
+  return false;
+}
+
+/// Entry point for matching a pattern over an Operation.
+template <typename Pattern>
+inline bool matchPattern(Operation *op, const Pattern &pattern) {
+  return const_cast<Pattern &>(pattern).match(op);
+}
+
+/// Matches a constant holding a scalar/vector/tensor integer (splat) and
+/// writes the integer value to bind_value.
+inline detail::constant_int_op_binder
+m_ConstantInt(IntegerAttr::ValueType *bind_value) {
+  return detail::constant_int_op_binder(bind_value);
+}
+
+template <typename OpType, typename... Matchers>
+auto m_Op(Matchers... matchers) {
+  return detail::RecursivePatternMatcher<OpType, Matchers...>(matchers...);
+}
+
+namespace matchers {
+inline auto m_any() { return detail::AnyValueMatcher(); }
+inline auto m_val(Value *v) { return detail::PatternMatcherValue(v); }
+} // namespace matchers
+
 } // end namespace mlir
 
 #endif // MLIR_MATCHERS_H
diff --git a/third_party/mlir/include/mlir/IR/Module.h b/third_party/mlir/include/mlir/IR/Module.h
index 9ac985ff586..1ff885d4b66 100644
--- a/third_party/mlir/include/mlir/IR/Module.h
+++ b/third_party/mlir/include/mlir/IR/Module.h
@@ -118,56 +118,6 @@ public:
   static void build(Builder *, OperationState &) {}
 };
 
-//===----------------------------------------------------------------------===//
-// Module Manager.
-//===----------------------------------------------------------------------===//
-
-/// A class used to manage the symbols held by a module. This class handles
-/// ensures that symbols inserted into a module have a unique name, and provides
-/// efficient named lookup to held symbols.
-class ModuleManager {
-public:
-  ModuleManager(ModuleOp module) : module(module), symbolTable(module) {}
-
-  /// Look up a symbol with the specified name, returning null if no such
-  /// name exists. Names must never include the @ on them.
-  template <typename T, typename NameTy> T lookupSymbol(NameTy &&name) const {
-    return symbolTable.lookup<T>(name);
-  }
-
-  /// Look up a symbol with the specified name, returning null if no such
-  /// name exists. Names must never include the @ on them.
-  template <typename NameTy> Operation *lookupSymbol(NameTy &&name) const {
-    return symbolTable.lookup(name);
-  }
-
-  /// Insert a new symbol into the module, auto-renaming it as necessary.
-  void insert(Operation *op) {
-    symbolTable.insert(op);
-    module.push_back(op);
-  }
-  void insert(Block::iterator insertPt, Operation *op) {
-    symbolTable.insert(op);
-    module.insert(insertPt, op);
-  }
-
-  /// Remove the given symbol from the module symbol table and then erase it.
-  void erase(Operation *op) {
-    symbolTable.erase(op);
-    op->erase();
-  }
-
-  /// Return the internally held module.
-  ModuleOp getModule() const { return module; }
-
-  /// Return the context of the internal module.
-  MLIRContext *getContext() { return module.getContext(); }
-
-private:
-  ModuleOp module;
-  SymbolTable symbolTable;
-};
-
 /// This class acts as an owning reference to a module, and will automatically
 /// destroy the held module if valid.
 class OwningModuleRef {
diff --git a/third_party/mlir/include/mlir/IR/OpBase.td b/third_party/mlir/include/mlir/IR/OpBase.td
index f81063f1085..dd7fac27a00 100644
--- a/third_party/mlir/include/mlir/IR/OpBase.td
+++ b/third_party/mlir/include/mlir/IR/OpBase.td
@@ -221,6 +221,9 @@ def IsTensorTypePred : CPred<"$_self.isa<TensorType>()">;
 // Whether a type is a MemRefType.
 def IsMemRefTypePred : CPred<"$_self.isa<MemRefType>()">;
 
+// Whether a type is an  IsUnrankedMemRefType
+def IsUnrankedMemRefTypePred : CPred<"$_self.isa<UnrankedMemRefType>()">;
+
 // Whether a type is a ShapedType.
 def IsShapedTypePred : CPred<"$_self.isa<ShapedType>()">;
 
@@ -486,6 +489,10 @@ class 2DTensorOf<list<Type> allowedTypes> : TensorRankOf<allowedTypes, [2]>;
 class 3DTensorOf<list<Type> allowedTypes> : TensorRankOf<allowedTypes, [3]>;
 class 4DTensorOf<list<Type> allowedTypes> : TensorRankOf<allowedTypes, [4]>;
 
+// Unranked Memref type
+def AnyUnrankedMemRef : 
+    ShapedContainerType<[AnyType], 
+                        IsUnrankedMemRefTypePred, "unranked.memref">;
 // Memref type.
 
 // Memrefs are blocks of data with fixed type and rank.
@@ -494,6 +501,8 @@ class MemRefOf<list<Type> allowedTypes> :
 
 def AnyMemRef : MemRefOf<[AnyType]>;
 
+def AnyRankedOrUnrankedMemRef: AnyTypeOf<[AnyUnrankedMemRef, AnyMemRef]>;
+
 // Memref declarations handle any memref, independent of rank, size, (static or
 // dynamic), layout, or memory space.
 def I1MemRef  : MemRefOf<[I1]>;
@@ -1326,6 +1335,8 @@ class PredOpTrait<string descr, Pred pred> : OpTrait {
 def Broadcastable  : NativeOpTrait<"BroadcastableTwoOperandsOneResult">;
 // X op Y == Y op X
 def Commutative  : NativeOpTrait<"IsCommutative">;
+// Op behaves like a function.
+def FunctionLike : NativeOpTrait<"FunctionLike">;
 // Op is isolated from above.
 def IsolatedFromAbove : NativeOpTrait<"IsIsolatedFromAbove">;
 // Op results are float or vectors/tensors thereof.
diff --git a/third_party/mlir/include/mlir/IR/OpImplementation.h b/third_party/mlir/include/mlir/IR/OpImplementation.h
index 666a90ec6e1..3052f797ab6 100644
--- a/third_party/mlir/include/mlir/IR/OpImplementation.h
+++ b/third_party/mlir/include/mlir/IR/OpImplementation.h
@@ -95,15 +95,14 @@ public:
   /// SSA values in namesToUse.  This may only be used for IsolatedFromAbove
   /// operations.  If any entry in namesToUse is null, the corresponding
   /// argument name is left alone.
-  virtual void shadowRegionArgs(Region &region,
-                                ArrayRef<Value *> namesToUse) = 0;
+  virtual void shadowRegionArgs(Region &region, ValueRange namesToUse) = 0;
 
   /// Prints an affine map of SSA ids, where SSA id names are used in place
   /// of dims/symbols.
   /// Operand values must come from single-result sources, and be valid
   /// dimensions/symbol identifiers according to mlir::isValidDim/Symbol.
   virtual void printAffineMapOfSSAIds(AffineMapAttr mapAttr,
-                                      ArrayRef<Value *> operands) = 0;
+                                      ValueRange operands) = 0;
 
   /// Print an optional arrow followed by a type list.
   void printOptionalArrowTypeList(ArrayRef<Type> types) {
diff --git a/third_party/mlir/include/mlir/IR/Operation.h b/third_party/mlir/include/mlir/IR/Operation.h
index 27bc1b17b63..f2c94bc539c 100644
--- a/third_party/mlir/include/mlir/IR/Operation.h
+++ b/third_party/mlir/include/mlir/IR/Operation.h
@@ -222,9 +222,7 @@ public:
   /// Replace the current operands of this operation with the ones provided in
   /// 'operands'. If the operands list is not resizable, the size of 'operands'
   /// must be less than or equal to the current number of operands.
-  void setOperands(ArrayRef<Value *> operands) {
-    getOperandStorage().setOperands(this, operands);
-  }
+  void setOperands(ValueRange operands);
 
   unsigned getNumOperands() { return getOperandStorage().size(); }
 
@@ -574,6 +572,26 @@ public:
   /// handlers that may be listening.
   InFlightDiagnostic emitRemark(const Twine &message = {});
 
+private:
+  //===--------------------------------------------------------------------===//
+  // Ordering
+  //===--------------------------------------------------------------------===//
+
+  /// This value represents an invalid index ordering for an operation within a
+  /// block.
+  static constexpr unsigned kInvalidOrderIdx = -1;
+
+  /// This value represents the stride to use when computing a new order for an
+  /// operation.
+  static constexpr unsigned kOrderStride = 5;
+
+  /// Update the order index of this operation of this operation if necessary,
+  /// potentially recomputing the order of the parent block.
+  void updateOrderIfNecessary();
+
+  /// Returns true if this operation has a valid order.
+  bool hasValidOrder() { return orderIndex != kInvalidOrderIdx; }
+
 private:
   Operation(Location location, OperationName name, unsigned numResults,
             unsigned numSuccessors, unsigned numRegions,
@@ -778,6 +796,76 @@ inline auto Operation::getResultTypes() -> result_type_range {
   return {result_type_begin(), result_type_end()};
 }
 
+/// This class provides an abstraction over the different types of ranges over
+/// Value*s. In many cases, this prevents the need to explicitly materialize a
+/// SmallVector/std::vector. This class should be used in places that are not
+/// suitable for a more derived type(e.g. ArrayRef) or a template range
+/// parameter.
+class ValueRange {
+  /// The type representing the owner of this range. This is either a list of
+  /// values, operands, or results.
+  using OwnerT = llvm::PointerUnion<Value *const *, OpOperand *, OpResult *>;
+
+public:
+  ValueRange(const ValueRange &) = default;
+  ValueRange(ValueRange &&) = default;
+  ValueRange &operator=(const ValueRange &) = default;
+
+  template <typename Arg,
+            typename = typename std::enable_if_t<
+                std::is_constructible<ArrayRef<Value *>, Arg>::value &&
+                !std::is_convertible<Arg, Value *>::value>>
+  ValueRange(Arg &&arg)
+      : ValueRange(ArrayRef<Value *>(std::forward<Arg>(arg))) {}
+  ValueRange(Value *const &value) : ValueRange(&value, /*count=*/1) {}
+  ValueRange(const std::initializer_list<Value *> &values)
+      : ValueRange(ArrayRef<Value *>(values)) {}
+  ValueRange(ArrayRef<Value *> values = llvm::None);
+  ValueRange(iterator_range<OperandIterator> values);
+  ValueRange(iterator_range<ResultIterator> values);
+
+  /// An iterator element of this range.
+  class Iterator : public indexed_accessor_iterator<Iterator, OwnerT, Value *,
+                                                    Value *, Value *> {
+  public:
+    Value *operator*() const;
+
+  private:
+    Iterator(OwnerT owner, unsigned curIndex);
+
+    /// Allow access to the constructor.
+    friend ValueRange;
+  };
+
+  Iterator begin() const { return Iterator(owner, 0); }
+  Iterator end() const { return Iterator(owner, count); }
+  Value *operator[](unsigned index) const {
+    assert(index < size() && "invalid index for value range");
+    return *std::next(begin(), index);
+  }
+
+  /// Return the size of this range.
+  size_t size() const { return count; }
+
+  /// Return if the range is empty.
+  bool empty() const { return size() == 0; }
+
+  /// Drop the first N elements, and keep M elements.
+  ValueRange slice(unsigned n, unsigned m) const;
+  /// Drop the first n elements.
+  ValueRange drop_front(unsigned n = 1) const;
+  /// Drop the last n elements.
+  ValueRange drop_back(unsigned n = 1) const;
+
+private:
+  ValueRange(OwnerT owner, unsigned count) : owner(owner), count(count) {}
+
+  /// The object that owns the provided range of values.
+  OwnerT owner;
+  /// The size from the owning range.
+  unsigned count;
+};
+
 } // end namespace mlir
 
 namespace llvm {
diff --git a/third_party/mlir/include/mlir/IR/OperationSupport.h b/third_party/mlir/include/mlir/IR/OperationSupport.h
index 7e6ba8c0950..38f35cca97f 100644
--- a/third_party/mlir/include/mlir/IR/OperationSupport.h
+++ b/third_party/mlir/include/mlir/IR/OperationSupport.h
@@ -48,6 +48,7 @@ class Region;
 class RewritePattern;
 class Type;
 class Value;
+class ValueRange;
 
 /// This is an adaptor from a list of values to named operands of OpTy.  In a
 /// generic operation context, e.g., in dialect conversions, an ordered array of
@@ -279,11 +280,7 @@ public:
                  MutableArrayRef<std::unique_ptr<Region>> regions = {},
                  bool resizableOperandList = false);
 
-  void addOperands(ArrayRef<Value *> newOperands) {
-    assert(successors.empty() &&
-           "Non successor operands should be added first.");
-    operands.append(newOperands.begin(), newOperands.end());
-  }
+  void addOperands(ValueRange newOperands);
 
   void addTypes(ArrayRef<Type> newTypes) {
     types.append(newTypes.begin(), newTypes.end());
@@ -304,12 +301,7 @@ public:
     attributes.append(newAttributes.begin(), newAttributes.end());
   }
 
-  void addSuccessor(Block *successor, ArrayRef<Value *> succOperands) {
-    successors.push_back(successor);
-    // Insert a sentinel operand to mark a barrier between successor operands.
-    operands.push_back(nullptr);
-    operands.append(succOperands.begin(), succOperands.end());
-  }
+  void addSuccessor(Block *successor, ValueRange succOperands);
 
   /// Create a region that should be attached to the operation.  These regions
   /// can be filled in immediately without waiting for Operation to be
@@ -398,7 +390,7 @@ public:
 
   /// Replace the operands contained in the storage with the ones provided in
   /// 'operands'.
-  void setOperands(Operation *owner, ArrayRef<Value *> operands);
+  void setOperands(Operation *owner, ValueRange operands);
 
   /// Erase an operand held by the storage.
   void eraseOperand(unsigned index);
diff --git a/third_party/mlir/include/mlir/IR/PatternMatch.h b/third_party/mlir/include/mlir/IR/PatternMatch.h
index 7e5596df859..366d2b893af 100644
--- a/third_party/mlir/include/mlir/IR/PatternMatch.h
+++ b/third_party/mlir/include/mlir/IR/PatternMatch.h
@@ -331,9 +331,9 @@ public:
   /// clients can specify a list of other nodes that this replacement may make
   /// (perhaps transitively) dead.  If any of those values are dead, this will
   /// remove them as well.
-  virtual void replaceOp(Operation *op, ArrayRef<Value *> newValues,
-                         ArrayRef<Value *> valuesToRemoveIfDead);
-  void replaceOp(Operation *op, ArrayRef<Value *> newValues) {
+  virtual void replaceOp(Operation *op, ValueRange newValues,
+                         ValueRange valuesToRemoveIfDead);
+  void replaceOp(Operation *op, ValueRange newValues) {
     replaceOp(op, newValues, llvm::None);
   }
 
@@ -349,7 +349,7 @@ public:
   /// The result values of the two ops must be the same types.  This allows
   /// specifying a list of ops that may be removed if dead.
   template <typename OpTy, typename... Args>
-  void replaceOpWithNewOp(ArrayRef<Value *> valuesToRemoveIfDead, Operation *op,
+  void replaceOpWithNewOp(ValueRange valuesToRemoveIfDead, Operation *op,
                           Args &&... args) {
     auto newOp = create<OpTy>(op->getLoc(), std::forward<Args>(args)...);
     replaceOpWithResultsOfAnotherOp(op, newOp.getOperation(),
@@ -364,7 +364,7 @@ public:
   /// 'argValues' is used to replace the block arguments of 'source' after
   /// merging.
   virtual void mergeBlocks(Block *source, Block *dest,
-                           ArrayRef<Value *> argValues = llvm::None);
+                           ValueRange argValues = llvm::None);
 
   /// Split the operations starting at "before" (inclusive) out of the given
   /// block into a new block, and return it.
@@ -378,8 +378,7 @@ public:
   /// The valuesToRemoveIfDead list is an optional list of values that the
   /// rewriter should remove if they are dead at this point.
   ///
-  void updatedRootInPlace(Operation *op,
-                          ArrayRef<Value *> valuesToRemoveIfDead = {});
+  void updatedRootInPlace(Operation *op, ValueRange valuesToRemoveIfDead = {});
 
 protected:
   explicit PatternRewriter(MLIRContext *ctx) : OpBuilder(ctx) {}
@@ -406,7 +405,7 @@ private:
   /// op and newOp are known to have the same number of results, replace the
   /// uses of op with uses of newOp
   void replaceOpWithResultsOfAnotherOp(Operation *op, Operation *newOp,
-                                       ArrayRef<Value *> valuesToRemoveIfDead);
+                                       ValueRange valuesToRemoveIfDead);
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/third_party/mlir/include/mlir/IR/StandardTypes.h b/third_party/mlir/include/mlir/IR/StandardTypes.h
index 2d232897428..23a1ff2177e 100644
--- a/third_party/mlir/include/mlir/IR/StandardTypes.h
+++ b/third_party/mlir/include/mlir/IR/StandardTypes.h
@@ -40,6 +40,7 @@ struct VectorTypeStorage;
 struct RankedTensorTypeStorage;
 struct UnrankedTensorTypeStorage;
 struct MemRefTypeStorage;
+struct UnrankedMemRefTypeStorage;
 struct ComplexTypeStorage;
 struct TupleTypeStorage;
 
@@ -64,6 +65,7 @@ enum Kind {
   RankedTensor,
   UnrankedTensor,
   MemRef,
+  UnrankedMemRef,
   Complex,
   Tuple,
   None,
@@ -102,9 +104,9 @@ public:
                                 Location location);
 
   /// Verify the construction of an integer type.
-  static LogicalResult
-  verifyConstructionInvariants(llvm::Optional<Location> loc,
-                               MLIRContext *context, unsigned width);
+  static LogicalResult verifyConstructionInvariants(Optional<Location> loc,
+                                                    MLIRContext *context,
+                                                    unsigned width);
 
   /// Return the bitwidth of this integer type.
   unsigned getWidth() const;
@@ -168,9 +170,9 @@ public:
   static ComplexType getChecked(Type elementType, Location location);
 
   /// Verify the construction of an integer type.
-  static LogicalResult
-  verifyConstructionInvariants(llvm::Optional<Location> loc,
-                               MLIRContext *context, Type elementType);
+  static LogicalResult verifyConstructionInvariants(Optional<Location> loc,
+                                                    MLIRContext *context,
+                                                    Type elementType);
 
   Type getElementType();
 
@@ -243,6 +245,7 @@ public:
     return type.getKind() == StandardTypes::Vector ||
            type.getKind() == StandardTypes::RankedTensor ||
            type.getKind() == StandardTypes::UnrankedTensor ||
+           type.getKind() == StandardTypes::UnrankedMemRef ||
            type.getKind() == StandardTypes::MemRef;
   }
 
@@ -269,10 +272,10 @@ public:
                                Location location);
 
   /// Verify the construction of a vector type.
-  static LogicalResult
-  verifyConstructionInvariants(llvm::Optional<Location> loc,
-                               MLIRContext *context, ArrayRef<int64_t> shape,
-                               Type elementType);
+  static LogicalResult verifyConstructionInvariants(Optional<Location> loc,
+                                                    MLIRContext *context,
+                                                    ArrayRef<int64_t> shape,
+                                                    Type elementType);
 
   /// Returns true of the given type can be used as an element of a vector type.
   /// In particular, vectors can consist of integer or float primitives.
@@ -328,10 +331,10 @@ public:
                                      Location location);
 
   /// Verify the construction of a ranked tensor type.
-  static LogicalResult
-  verifyConstructionInvariants(llvm::Optional<Location> loc,
-                               MLIRContext *context, ArrayRef<int64_t> shape,
-                               Type elementType);
+  static LogicalResult verifyConstructionInvariants(Optional<Location> loc,
+                                                    MLIRContext *context,
+                                                    ArrayRef<int64_t> shape,
+                                                    Type elementType);
 
   ArrayRef<int64_t> getShape() const;
 
@@ -359,9 +362,9 @@ public:
   static UnrankedTensorType getChecked(Type elementType, Location location);
 
   /// Verify the construction of a unranked tensor type.
-  static LogicalResult
-  verifyConstructionInvariants(llvm::Optional<Location> loc,
-                               MLIRContext *context, Type elementType);
+  static LogicalResult verifyConstructionInvariants(Optional<Location> loc,
+                                                    MLIRContext *context,
+                                                    Type elementType);
 
   ArrayRef<int64_t> getShape() const { return llvm::None; }
 
@@ -370,12 +373,24 @@ public:
   }
 };
 
+/// Base MemRef for Ranked and Unranked variants
+class BaseMemRefType : public ShapedType {
+public:
+  using ShapedType::ShapedType;
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast.
+  static bool classof(Type type) {
+    return type.getKind() == StandardTypes::MemRef ||
+           type.getKind() == StandardTypes::UnrankedMemRef;
+  }
+};
+
 /// MemRef types represent a region of memory that have a shape with a fixed
 /// number of dimensions. Each shape element can be a non-negative integer or
 /// unknown (represented by any negative integer). MemRef types also have an
 /// affine map composition, represented as an array AffineMap pointers.
-class MemRefType
-    : public Type::TypeBase<MemRefType, ShapedType, detail::MemRefTypeStorage> {
+class MemRefType : public Type::TypeBase<MemRefType, BaseMemRefType,
+                                         detail::MemRefTypeStorage> {
 public:
   using Base::Base;
 
@@ -426,6 +441,40 @@ private:
   using Base::getImpl;
 };
 
+/// Unranked MemRef type represent multi-dimensional MemRefs that
+/// have an unknown rank.
+class UnrankedMemRefType
+    : public Type::TypeBase<UnrankedMemRefType, BaseMemRefType,
+                            detail::UnrankedMemRefTypeStorage> {
+public:
+  using Base::Base;
+
+  /// Get or create a new UnrankedMemRefType of the provided element
+  /// type and memory space
+  static UnrankedMemRefType get(Type elementType, unsigned memorySpace);
+
+  /// Get or create a new UnrankedMemRefType of the provided element
+  /// type and memory space declared at the given, potentially unknown,
+  /// location. If the UnrankedMemRefType defined by the arguments would be
+  /// ill-formed, emit errors and return a nullptr-wrapping type.
+  static UnrankedMemRefType getChecked(Type elementType, unsigned memorySpace,
+                                       Location location);
+
+  /// Verify the construction of a unranked memref type.
+  static LogicalResult
+  verifyConstructionInvariants(llvm::Optional<Location> loc,
+                               MLIRContext *context, Type elementType,
+                               unsigned memorySpace);
+
+  ArrayRef<int64_t> getShape() const { return llvm::None; }
+
+  /// Returns the memory space in which data referred to by this memref resides.
+  unsigned getMemorySpace() const;
+  static bool kindof(unsigned kind) {
+    return kind == StandardTypes::UnrankedMemRef;
+  }
+};
+
 /// Tuple types represent a collection of other types. Note: This type merely
 /// provides a common mechanism for representing tuples in MLIR. It is up to
 /// dialect authors to provides operations for manipulating them, e.g.
diff --git a/third_party/mlir/include/mlir/IR/SymbolTable.h b/third_party/mlir/include/mlir/IR/SymbolTable.h
index 58084183dac..ea7986172cb 100644
--- a/third_party/mlir/include/mlir/IR/SymbolTable.h
+++ b/third_party/mlir/include/mlir/IR/SymbolTable.h
@@ -23,15 +23,16 @@
 
 namespace mlir {
 class Identifier;
-class MLIRContext;
 class Operation;
 
 /// This class allows for representing and managing the symbol table used by
-/// operations with the 'SymbolTable' trait.
+/// operations with the 'SymbolTable' trait. Inserting into and erasing from
+/// this SymbolTable will also insert and erase from the Operation given to it
+/// at construction.
 class SymbolTable {
 public:
   /// Build a symbol table with the symbols within the given operation.
-  SymbolTable(Operation *op);
+  SymbolTable(Operation *symbolTableOp);
 
   /// Look up a symbol with the specified name, returning null if no such
   /// name exists. Names never include the @ on them.
@@ -44,15 +45,16 @@ public:
   void erase(Operation *symbol);
 
   /// Insert a new symbol into the table, and rename it as necessary to avoid
-  /// collisions.
-  void insert(Operation *symbol);
-
-  /// Returns the context held by this symbol table.
-  MLIRContext *getContext() const { return context; }
+  /// collisions. Also insert at the specified location in the body of the
+  /// associated operation.
+  void insert(Operation *symbol, Block::iterator insertPt = {});
 
   /// Return the name of the attribute used for symbol names.
   static StringRef getSymbolAttrName() { return "sym_name"; }
 
+  /// Returns the associated operation.
+  Operation *getOp() const { return symbolTableOp; }
+
   //===--------------------------------------------------------------------===//
   // Symbol Utilities
   //===--------------------------------------------------------------------===//
@@ -60,7 +62,7 @@ public:
   /// Returns the operation registered with the given symbol name with the
   /// regions of 'symbolTableOp'. 'symbolTableOp' is required to be an operation
   /// with the 'OpTrait::SymbolTable' trait.
-  static Operation *lookupSymbolIn(Operation *symbolTableOp, StringRef symbol);
+  static Operation *lookupSymbolIn(Operation *op, StringRef symbol);
 
   /// Returns the operation registered with the given symbol name within the
   /// closest parent operation of, or including, 'from' with the
@@ -118,11 +120,11 @@ public:
   /// are any unknown operations that may potentially be symbol tables.
   static Optional<UseRange> getSymbolUses(StringRef symbol, Operation *from);
 
-  /// Return if the given symbol is known to have no uses that are nested within
-  /// the given operation 'from'. This does not traverse into any nested symbol
-  /// tables, and will also only count uses on 'from' if it does not also define
-  /// a symbol table. This is because we treat the region as the boundary of
-  /// the symbol table, and not the op itself. This function will also return
+  /// Return if the given symbol is known to have no uses that are nested
+  /// within the given operation 'from'. This does not traverse into any nested
+  /// symbol tables, and will also only count uses on 'from' if it does not also
+  /// define a symbol table. This is because we treat the region as the boundary
+  /// of the symbol table, and not the op itself. This function will also return
   /// false if there are any unknown operations that may potentially be symbol
   /// tables. This doesn't necessarily mean that there are no uses, we just
   /// can't convervatively prove it.
@@ -141,7 +143,7 @@ public:
                                                            Operation *from);
 
 private:
-  MLIRContext *context;
+  Operation *symbolTableOp;
 
   /// This is a mapping from a name to the symbol with that name.
   llvm::StringMap<Operation *> symbolTable;
diff --git a/third_party/mlir/include/mlir/IR/Types.h b/third_party/mlir/include/mlir/IR/Types.h
index b1d522a2511..11af3eb1e66 100644
--- a/third_party/mlir/include/mlir/IR/Types.h
+++ b/third_party/mlir/include/mlir/IR/Types.h
@@ -56,7 +56,7 @@ struct OpaqueTypeStorage;
 ///
 ///  * Optional:
 ///    - static LogicalResult verifyConstructionInvariants(
-///                                               llvm::Optional<Location> loc,
+///                                               Optional<Location> loc,
 ///                                               MLIRContext *context,
 ///                                               Args... args)
 ///      * This method is invoked when calling the 'TypeBase::get/getChecked'
@@ -250,10 +250,10 @@ public:
   StringRef getTypeData() const;
 
   /// Verify the construction of an opaque type.
-  static LogicalResult
-  verifyConstructionInvariants(llvm::Optional<Location> loc,
-                               MLIRContext *context, Identifier dialect,
-                               StringRef typeData);
+  static LogicalResult verifyConstructionInvariants(Optional<Location> loc,
+                                                    MLIRContext *context,
+                                                    Identifier dialect,
+                                                    StringRef typeData);
 
   static bool kindof(unsigned kind) { return kind == Kind::Opaque; }
 };
diff --git a/third_party/mlir/include/mlir/Pass/Pass.h b/third_party/mlir/include/mlir/Pass/Pass.h
index 4fc277e3edc..274ae9d12e3 100644
--- a/third_party/mlir/include/mlir/Pass/Pass.h
+++ b/third_party/mlir/include/mlir/Pass/Pass.h
@@ -23,6 +23,7 @@
 #include "mlir/Pass/PassRegistry.h"
 #include "mlir/Support/LogicalResult.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/Statistic.h"
 
 namespace mlir {
 namespace detail {
@@ -76,6 +77,28 @@ public:
   /// pass to be to be round-trippable to the textual format.
   virtual void printAsTextualPipeline(raw_ostream &os);
 
+  /// This class represents a single pass statistic. This statistic functions
+  /// similarly to an unsigned integer value, and may be updated and incremented
+  /// accordingly. This class can be used to provide additional information
+  /// about the transformations and analyses performed by a pass.
+  class Statistic : public llvm::Statistic {
+  public:
+    /// The statistic is initialized by the pass owner, a name, and a
+    /// description.
+    Statistic(Pass *owner, const char *name, const char *description);
+
+    /// Assign the statistic to the given value.
+    Statistic &operator=(unsigned value);
+
+  private:
+    /// Hide some of the details of llvm::Statistic that we don't use.
+    using llvm::Statistic::getDebugType;
+  };
+
+  /// Returns the main statistics for this pass instance.
+  ArrayRef<Statistic *> getStatistics() const { return statistics; }
+  MutableArrayRef<Statistic *> getStatistics() { return statistics; }
+
 protected:
   explicit Pass(const PassID *passID,
                 llvm::Optional<StringRef> opName = llvm::None)
@@ -125,6 +148,9 @@ private:
   /// The current execution state for the pass.
   llvm::Optional<detail::PassExecutionState> passState;
 
+  /// The set of statistics held by this pass.
+  std::vector<Statistic *> statistics;
+
   /// Allow access to 'clone' and 'run'.
   friend class OpPassManager;
 };
diff --git a/third_party/mlir/include/mlir/Pass/PassManager.h b/third_party/mlir/include/mlir/Pass/PassManager.h
index fa0788f28b5..5762d684b06 100644
--- a/third_party/mlir/include/mlir/Pass/PassManager.h
+++ b/third_party/mlir/include/mlir/Pass/PassManager.h
@@ -21,6 +21,9 @@
 #include "mlir/Support/LogicalResult.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator.h"
+
+#include <vector>
 
 namespace llvm {
 class Any;
@@ -54,6 +57,13 @@ public:
   ~OpPassManager();
   OpPassManager &operator=(const OpPassManager &rhs);
 
+  /// Iterator over the passes in this pass manager.
+  using pass_iterator =
+      llvm::pointee_iterator<std::vector<std::unique_ptr<Pass>>::iterator>;
+  pass_iterator begin();
+  pass_iterator end();
+  llvm::iterator_range<pass_iterator> getPasses() { return {begin(), end()}; }
+
   /// Run the held passes over the given operation.
   LogicalResult run(Operation *op, AnalysisManager am);
 
@@ -93,6 +103,9 @@ public:
   /// the correctness of per-pass overrides of Pass::printAsTextualPipeline.
   void printAsTextualPipeline(raw_ostream &os);
 
+  /// Merge the pass statistics of this class into 'other'.
+  void mergeStatisticsInto(OpPassManager &other);
+
 private:
   OpPassManager(OperationName name, bool disableThreads, bool verifyPasses);
 
@@ -107,10 +120,10 @@ private:
 // PassManager
 //===----------------------------------------------------------------------===//
 
-/// An enum describing the different display modes for the pass timing
-/// information within the pass manager.
-enum class PassTimingDisplayMode {
-  // In this mode the results are displayed in a list sorted by total time,
+/// An enum describing the different display modes for the information within
+/// the pass manager.
+enum class PassDisplayMode {
+  // In this mode the results are displayed in a list sorted by total,
   // with each pass/analysis instance aggregated into one unique result.
   List,
 
@@ -146,29 +159,102 @@ public:
   /// Add the provided instrumentation to the pass manager.
   void addInstrumentation(std::unique_ptr<PassInstrumentation> pi);
 
-  /// Add an instrumentation to print the IR before and after pass execution.
+  //===--------------------------------------------------------------------===//
+  // IR Printing
+
+  /// A configuration struct provided to the IR printer instrumentation.
+  class IRPrinterConfig {
+  public:
+    using PrintCallbackFn = function_ref<void(raw_ostream &)>;
+
+    /// Initialize the configuration.
+    /// * 'printModuleScope' signals if the top-level module IR should always be
+    ///   printed. This should only be set to true when multi-threading is
+    ///   disabled, otherwise we may try to print IR that is being modified
+    ///   asynchronously.
+    /// * 'printAfterOnlyOnChange' signals that when printing the IR after a
+    ///   pass, in the case of a non-failure, we should first check if any
+    ///   potential mutations were made. This allows for reducing the number of
+    ///   logs that don't contain meaningful changes.
+    explicit IRPrinterConfig(bool printModuleScope = false,
+                             bool printAfterOnlyOnChange = false);
+    virtual ~IRPrinterConfig();
+
+    /// A hook that may be overridden by a derived config that checks if the IR
+    /// of 'operation' should be dumped *before* the pass 'pass' has been
+    /// executed. If the IR should be dumped, 'printCallback' should be invoked
+    /// with the stream to dump into.
+    virtual void printBeforeIfEnabled(Pass *pass, Operation *operation,
+                                      PrintCallbackFn printCallback);
+
+    /// A hook that may be overridden by a derived config that checks if the IR
+    /// of 'operation' should be dumped *after* the pass 'pass' has been
+    /// executed. If the IR should be dumped, 'printCallback' should be invoked
+    /// with the stream to dump into.
+    virtual void printAfterIfEnabled(Pass *pass, Operation *operation,
+                                     PrintCallbackFn printCallback);
+
+    /// Returns true if the IR should always be printed at the top-level scope.
+    bool shouldPrintAtModuleScope() const { return printModuleScope; }
+
+    /// Returns true if the IR should only printed after a pass if the IR
+    /// "changed".
+    bool shouldPrintAfterOnlyOnChange() const { return printAfterOnlyOnChange; }
+
+  private:
+    /// A flag that indicates if the IR should be printed at module scope.
+    bool printModuleScope;
+
+    /// A flag that indicates that the IR after a pass should only be printed if
+    /// a change is detected.
+    bool printAfterOnlyOnChange;
+  };
+
+  /// Add an instrumentation to print the IR before and after pass execution,
+  /// using the provided configuration.
+  void enableIRPrinting(std::unique_ptr<IRPrinterConfig> config);
+
+  /// Add an instrumentation to print the IR before and after pass execution,
+  /// using the provided fields to generate a default configuration:
   /// * 'shouldPrintBeforePass' and 'shouldPrintAfterPass' correspond to filter
-  ///   functions that take a 'Pass *'. These function should return true if the
-  ///   IR should be printed or not.
-  /// * 'printModuleScope' signals if the module IR should be printed, even for
-  ///   non module passes.
+  ///   functions that take a 'Pass *' and `Operation *`. These function should
+  ///   return true if the IR should be printed or not.
+  /// * 'printModuleScope' signals if the module IR should be printed, even
+  ///   for non module passes.
+  /// * 'printAfterOnlyOnChange' signals that when printing the IR after a
+  ///   pass, in the case of a non-failure, we should first check if any
+  ///   potential mutations were made.
   /// * 'out' corresponds to the stream to output the printed IR to.
-  void enableIRPrinting(std::function<bool(Pass *)> shouldPrintBeforePass,
-                        std::function<bool(Pass *)> shouldPrintAfterPass,
-                        bool printModuleScope, raw_ostream &out);
+  void enableIRPrinting(
+      std::function<bool(Pass *, Operation *)> shouldPrintBeforePass,
+      std::function<bool(Pass *, Operation *)> shouldPrintAfterPass,
+      bool printModuleScope, bool printAfterOnlyOnChange, raw_ostream &out);
+
+  //===--------------------------------------------------------------------===//
+  // Pass Timing
 
   /// Add an instrumentation to time the execution of passes and the computation
   /// of analyses.
   /// Note: Timing should be enabled after all other instrumentations to avoid
   /// any potential "ghost" timing from other instrumentations being
   /// unintentionally included in the timing results.
-  void enableTiming(
-      PassTimingDisplayMode displayMode = PassTimingDisplayMode::Pipeline);
+  void enableTiming(PassDisplayMode displayMode = PassDisplayMode::Pipeline);
+
+  /// Prompts the pass manager to print the statistics collected for each of the
+  /// held passes after each call to 'run'.
+  void
+  enableStatistics(PassDisplayMode displayMode = PassDisplayMode::Pipeline);
 
 private:
+  /// Dump the statistics of the passes within this pass manager.
+  void dumpStatistics();
+
   /// Flag that specifies if pass timing is enabled.
   bool passTiming : 1;
 
+  /// Flag that specifies if pass statistics should be dumped.
+  Optional<PassDisplayMode> passStatisticsMode;
+
   /// A manager for pass instrumentations.
   std::unique_ptr<PassInstrumentor> instrumentor;
 
diff --git a/third_party/mlir/include/mlir/Support/LLVM.h b/third_party/mlir/include/mlir/Support/LLVM.h
index 6ae8d5c075c..91d145dd3ca 100644
--- a/third_party/mlir/include/mlir/Support/LLVM.h
+++ b/third_party/mlir/include/mlir/Support/LLVM.h
@@ -56,6 +56,7 @@ template <typename ValueT, typename ValueInfoT> class DenseSet;
 template <typename KeyT, typename ValueT, typename KeyInfoT, typename BucketT>
 class DenseMap;
 template <typename Fn> class function_ref;
+template <typename IteratorT> class iterator_range;
 
 // Other common classes.
 class raw_ostream;
@@ -82,6 +83,7 @@ using DenseMap = llvm::DenseMap<KeyT, ValueT, KeyInfoT, BucketT>;
 template <typename ValueT, typename ValueInfoT = DenseMapInfo<ValueT>>
 using DenseSet = llvm::DenseSet<ValueT, ValueInfoT>;
 template <typename Fn> using function_ref = llvm::function_ref<Fn>;
+using llvm::iterator_range;
 using llvm::MutableArrayRef;
 using llvm::None;
 using llvm::Optional;
diff --git a/third_party/mlir/include/mlir/Support/STLExtras.h b/third_party/mlir/include/mlir/Support/STLExtras.h
index 24e2ac6a5a3..95e52f94e22 100644
--- a/third_party/mlir/include/mlir/Support/STLExtras.h
+++ b/third_party/mlir/include/mlir/Support/STLExtras.h
@@ -177,6 +177,12 @@ public:
     return static_cast<DerivedT &>(*this);
   }
 
+  /// Returns the current index of the iterator.
+  ptrdiff_t getIndex() const { return index; }
+
+  /// Returns the current object of the iterator.
+  const ObjectType &getObject() const { return object; }
+
 protected:
   indexed_accessor_iterator(ObjectType object, ptrdiff_t index)
       : object(object), index(index) {}
diff --git a/third_party/mlir/include/mlir/Transforms/DialectConversion.h b/third_party/mlir/include/mlir/Transforms/DialectConversion.h
index 88669505e23..fee58a4904a 100644
--- a/third_party/mlir/include/mlir/Transforms/DialectConversion.h
+++ b/third_party/mlir/include/mlir/Transforms/DialectConversion.h
@@ -347,8 +347,8 @@ public:
   //===--------------------------------------------------------------------===//
 
   /// PatternRewriter hook for replacing the results of an operation.
-  void replaceOp(Operation *op, ArrayRef<Value *> newValues,
-                 ArrayRef<Value *> valuesToRemoveIfDead) override;
+  void replaceOp(Operation *op, ValueRange newValues,
+                 ValueRange valuesToRemoveIfDead) override;
   using PatternRewriter::replaceOp;
 
   /// PatternRewriter hook for erasing a dead operation. The uses of this
@@ -360,8 +360,7 @@ public:
   Block *splitBlock(Block *block, Block::iterator before) override;
 
   /// PatternRewriter hook for merging a block into another.
-  void mergeBlocks(Block *source, Block *dest,
-                   ArrayRef<Value *> argValues) override;
+  void mergeBlocks(Block *source, Block *dest, ValueRange argValues) override;
 
   /// PatternRewriter hook for moving blocks out of a region.
   void inlineRegionBefore(Region &region, Region &parent,
diff --git a/third_party/mlir/lib/Analysis/AffineStructures.cpp b/third_party/mlir/lib/Analysis/AffineStructures.cpp
index 80dc73755c7..e57045c639d 100644
--- a/third_party/mlir/lib/Analysis/AffineStructures.cpp
+++ b/third_party/mlir/lib/Analysis/AffineStructures.cpp
@@ -1525,7 +1525,7 @@ void FlatAffineConstraints::removeRedundantInequalities() {
 
 std::pair<AffineMap, AffineMap> FlatAffineConstraints::getLowerAndUpperBound(
     unsigned pos, unsigned offset, unsigned num, unsigned symStartPos,
-    ArrayRef<AffineExpr> localExprs, MLIRContext *context) {
+    ArrayRef<AffineExpr> localExprs, MLIRContext *context) const {
   assert(pos + offset < getNumDimIds() && "invalid dim start pos");
   assert(symStartPos >= (pos + offset) && "invalid sym start pos");
   assert(getNumLocalIds() == localExprs.size() &&
diff --git a/third_party/mlir/lib/Analysis/LoopAnalysis.cpp b/third_party/mlir/lib/Analysis/LoopAnalysis.cpp
index b297a63cb62..1d88d09d269 100644
--- a/third_party/mlir/lib/Analysis/LoopAnalysis.cpp
+++ b/third_party/mlir/lib/Analysis/LoopAnalysis.cpp
@@ -158,7 +158,22 @@ uint64_t mlir::getLargestDivisorOfTripCount(AffineForOp forOp) {
   return gcd.getValue();
 }
 
-bool mlir::isAccessInvariant(Value *iv, Value *index) {
+/// Given an induction variable `iv` of type AffineForOp and an access `index`
+/// of type index, returns `true` if `index` is independent of `iv` and
+/// false otherwise. The determination supports composition with at most one
+/// AffineApplyOp. The 'at most one AffineApplyOp' comes from the fact that
+/// the composition of AffineApplyOp needs to be canonicalized by construction
+/// to avoid writing code that composes arbitrary numbers of AffineApplyOps
+/// everywhere. To achieve this, at the very least, the compose-affine-apply
+/// pass must have been run.
+///
+/// Prerequisites:
+///   1. `iv` and `index` of the proper type;
+///   2. at most one reachable AffineApplyOp from index;
+///
+/// Returns false in cases with more than one AffineApplyOp, this is
+/// conservative.
+static bool isAccessIndexInvariant(Value *iv, Value *index) {
   assert(isForInductionVar(iv) && "iv must be a AffineForOp");
   assert(index->getType().isa<IndexType>() && "index must be of IndexType");
   SmallVector<Operation *, 4> affineApplyOps;
@@ -187,7 +202,7 @@ mlir::getInvariantAccesses(Value *iv, llvm::ArrayRef<Value *> indices) {
   llvm::DenseSet<Value *> res;
   for (unsigned idx = 0, n = indices.size(); idx < n; ++idx) {
     auto *val = indices[idx];
-    if (isAccessInvariant(iv, val)) {
+    if (isAccessIndexInvariant(iv, val)) {
       res.insert(val);
     }
   }
@@ -249,7 +264,7 @@ static bool isContiguousAccess(Value *iv, LoadOrStoreOp memoryOp,
     });
     // Check access invariance of each operand in 'exprOperands'.
     for (auto *exprOperand : exprOperands) {
-      if (!isAccessInvariant(iv, exprOperand)) {
+      if (!isAccessIndexInvariant(iv, exprOperand)) {
         if (uniqueVaryingIndexAlongIv != -1) {
           // 2+ varying indices -> do not vectorize along iv.
           return false;
diff --git a/third_party/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/third_party/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index 54dd18e7492..5c772cc85f2 100644
--- a/third_party/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/third_party/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -29,6 +29,8 @@
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/DialectConversion.h"
 
+#include "llvm/Support/FormatVariadic.h"
+
 #include "../GPUCommon/IndexIntrinsicsOpLowering.h"
 #include "../GPUCommon/OpToFuncCallLowering.h"
 
@@ -99,8 +101,7 @@ private:
       // Add branch before inserted body, into body.
       block = block->getNextNode();
       rewriter.create<LLVM::BrOp>(loc, ArrayRef<Value *>{},
-                                  llvm::makeArrayRef(block),
-                                  llvm::ArrayRef<Value *>());
+                                  llvm::makeArrayRef(block), ValueRange());
 
       // Replace all gpu.yield ops with branch out of body.
       for (; block != split; block = block->getNextNode()) {
@@ -110,7 +111,7 @@ private:
         rewriter.setInsertionPointToEnd(block);
         rewriter.replaceOpWithNewOp<LLVM::BrOp>(
             terminator, ArrayRef<Value *>{}, llvm::makeArrayRef(split),
-            llvm::makeArrayRef(terminator->getOperand(0)));
+            ValueRange(terminator->getOperand(0)));
       }
 
       // Return accumulator result.
@@ -266,7 +267,7 @@ private:
     rewriter.create<LLVM::CondBrOp>(loc, llvm::makeArrayRef(condition),
                                     ArrayRef<Block *>{thenBlock, elseBlock});
 
-    auto addBranch = [&](ArrayRef<Value *> operands) {
+    auto addBranch = [&](ValueRange operands) {
       rewriter.create<LLVM::BrOp>(loc, ArrayRef<Value *>{},
                                   llvm::makeArrayRef(continueBlock),
                                   llvm::makeArrayRef(operands));
@@ -383,12 +384,10 @@ private:
 
     auto arrayType = LLVM::LLVMType::getArrayTy(elementType, numElements);
     StringRef name = "reduce_buffer";
-    auto addrSpace =
-        builder.getNamedAttr("addr_space", builder.getI32IntegerAttr(3));
     auto globalOp = builder.create<LLVM::GlobalOp>(
         loc, arrayType.cast<LLVM::LLVMType>(),
         /*isConstant=*/false, LLVM::Linkage::Internal, name,
-        /*value=*/Attribute(), llvm::makeArrayRef(addrSpace));
+        /*value=*/Attribute(), gpu::GPUDialect::getWorkgroupAddressSpace());
 
     return rewriter.create<LLVM::AddressOfOp>(loc, globalOp);
   }
@@ -451,6 +450,143 @@ private:
   static constexpr int kWarpSize = 32;
 };
 
+namespace {
+
+struct FuncOpLowering : LLVMOpLowering {
+  explicit FuncOpLowering(LLVMTypeConverter &typeConverter)
+      : LLVMOpLowering(gpu::GPUFuncOp::getOperationName(),
+                       typeConverter.getDialect()->getContext(),
+                       typeConverter) {}
+
+  PatternMatchResult
+  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    assert(operands.empty() && "func op is not expected to have operands");
+    auto gpuFuncOp = cast<gpu::GPUFuncOp>(op);
+    Location loc = gpuFuncOp.getLoc();
+
+    SmallVector<LLVM::GlobalOp, 3> workgroupBuffers;
+    workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
+    for (auto en : llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
+      Value *attribution = en.value();
+
+      auto type = attribution->getType().dyn_cast<MemRefType>();
+      assert(type && type.hasStaticShape() && "unexpected type in attribution");
+
+      uint64_t numElements = type.getNumElements();
+
+      auto elementType =
+          lowering.convertType(type.getElementType()).cast<LLVM::LLVMType>();
+      auto arrayType = LLVM::LLVMType::getArrayTy(elementType, numElements);
+      std::string name =
+          llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), en.index());
+      auto globalOp = rewriter.create<LLVM::GlobalOp>(
+          gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false,
+          LLVM::Linkage::Internal, name, /*value=*/Attribute(),
+          gpu::GPUDialect::getWorkgroupAddressSpace());
+      workgroupBuffers.push_back(globalOp);
+    }
+
+    // Rewrite the original GPU function to an LLVM function.
+    // TODO(zinenko): there is a hack in the std->llvm lowering that promotes
+    // structs to pointers that probably needs to be replicated here.
+    auto funcType = lowering.convertType(gpuFuncOp.getType())
+                        .cast<LLVM::LLVMType>()
+                        .getPointerElementTy();
+
+    // Remap proper input types.
+    TypeConverter::SignatureConversion signatureConversion(
+        gpuFuncOp.front().getNumArguments());
+    for (unsigned i = 0, e = funcType.getFunctionNumParams(); i < e; ++i)
+      signatureConversion.addInputs(i, funcType.getFunctionParamType(i));
+
+    // Create the new function operation. Only copy those attributes that are
+    // not specific to function modeling.
+    SmallVector<NamedAttribute, 4> attributes;
+    for (const auto &attr : gpuFuncOp.getAttrs()) {
+      if (attr.first.is(SymbolTable::getSymbolAttrName()) ||
+          attr.first.is(impl::getTypeAttrName()) ||
+          attr.first.is(gpu::GPUFuncOp::getNumWorkgroupAttributionsAttrName()))
+        continue;
+      attributes.push_back(attr);
+    }
+    auto llvmFuncOp = rewriter.create<LLVM::LLVMFuncOp>(
+        gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType,
+        LLVM::Linkage::External, attributes);
+
+    {
+      // Insert operations that correspond to converted workgroup and private
+      // memory attributions to the body of the function. This must operate on
+      // the original function, before the body region is inlined in the new
+      // function to maintain the relation between block arguments and the
+      // parent operation that assigns their semantics.
+      OpBuilder::InsertionGuard guard(rewriter);
+
+      // Rewrite workgroup memory attributions to addresses of global buffers.
+      rewriter.setInsertionPointToStart(&gpuFuncOp.front());
+      unsigned numProperArguments = gpuFuncOp.getNumArguments();
+      auto i32Type = LLVM::LLVMType::getInt32Ty(lowering.getDialect());
+
+      Value *zero = nullptr;
+      if (!workgroupBuffers.empty())
+        zero = rewriter.create<LLVM::ConstantOp>(loc, i32Type,
+                                                 rewriter.getI32IntegerAttr(0));
+      for (auto en : llvm::enumerate(workgroupBuffers)) {
+        LLVM::GlobalOp global = en.value();
+        Value *address = rewriter.create<LLVM::AddressOfOp>(loc, global);
+        auto elementType = global.getType().getArrayElementType();
+        Value *memory = rewriter.create<LLVM::GEPOp>(
+            loc, elementType.getPointerTo(global.addr_space().getZExtValue()),
+            address, ArrayRef<Value *>{zero, zero});
+
+        // Build a memref descriptor pointing to the buffer to plug with the
+        // existing memref infrastructure. This may use more registers than
+        // otherwise necessary given that memref sizes are fixed, but we can try
+        // and canonicalize that away later.
+        Value *attribution = gpuFuncOp.getWorkgroupAttributions()[en.index()];
+        auto type = attribution->getType().cast<MemRefType>();
+        auto descr = MemRefDescriptor::fromStaticShape(rewriter, loc, lowering,
+                                                       type, memory);
+        signatureConversion.remapInput(numProperArguments + en.index(), descr);
+      }
+
+      // Rewrite private memory attributions to alloca'ed buffers.
+      unsigned numWorkgroupAttributions =
+          gpuFuncOp.getNumWorkgroupAttributions();
+      auto int64Ty = LLVM::LLVMType::getInt64Ty(lowering.getDialect());
+      for (auto en : llvm::enumerate(gpuFuncOp.getPrivateAttributions())) {
+        Value *attribution = en.value();
+        auto type = attribution->getType().cast<MemRefType>();
+        assert(type && type.hasStaticShape() &&
+               "unexpected type in attribution");
+
+        auto ptrType = lowering.convertType(type.getElementType())
+                           .cast<LLVM::LLVMType>()
+                           .getPointerTo(type.getMemorySpace());
+        Value *numElements = rewriter.create<LLVM::ConstantOp>(
+            gpuFuncOp.getLoc(), int64Ty,
+            rewriter.getI64IntegerAttr(type.getNumElements()));
+        Value *allocated = rewriter.create<LLVM::AllocaOp>(
+            gpuFuncOp.getLoc(), ptrType, numElements, /*alignment=*/0);
+        auto descr = MemRefDescriptor::fromStaticShape(rewriter, loc, lowering,
+                                                       type, allocated);
+        signatureConversion.remapInput(
+            numProperArguments + numWorkgroupAttributions + en.index(), descr);
+      }
+    }
+
+    rewriter.inlineRegionBefore(gpuFuncOp.getBody(), llvmFuncOp.getBody(),
+                                llvmFuncOp.end());
+    rewriter.applySignatureConversion(&llvmFuncOp.getBody(),
+                                      signatureConversion);
+
+    rewriter.eraseOp(gpuFuncOp);
+    return matchSuccess();
+  }
+};
+
+} // end namespace
+
 /// Import the GPU Ops to NVVM Patterns.
 #include "GPUToNVVM.cpp.inc"
 
@@ -479,12 +615,13 @@ public:
                                     NVVM::BlockIdYOp, NVVM::BlockIdZOp>,
         GPUIndexIntrinsicOpLowering<gpu::GridDimOp, NVVM::GridDimXOp,
                                     NVVM::GridDimYOp, NVVM::GridDimZOp>,
-        GPUAllReduceOpLowering>(converter);
+        GPUAllReduceOpLowering, FuncOpLowering>(converter);
     patterns.insert<OpToFuncCallLowering<ExpOp>>(converter, "__nv_expf",
                                                  "__nv_exp");
     ConversionTarget target(getContext());
     target.addIllegalDialect<gpu::GPUDialect>();
     target.addIllegalOp<LLVM::ExpOp>();
+    target.addIllegalOp<FuncOp>();
     target.addLegalDialect<LLVM::LLVMDialect>();
     target.addLegalDialect<NVVM::NVVMDialect>();
     // TODO(csigg): Remove once we support replacing non-root ops.
diff --git a/third_party/mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt b/third_party/mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt
index a562439108d..be82894461d 100644
--- a/third_party/mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt
+++ b/third_party/mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt
@@ -10,5 +10,6 @@ target_link_libraries(MLIRGPUtoSPIRVTransforms
   MLIRSPIRV
   MLIRStandardOps
   MLIRStandardToSPIRVTransforms
+  MLIRSupport
   MLIRTransforms
   )
diff --git a/third_party/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp b/third_party/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
index 23e7b9166bb..2c1847d99ed 100644
--- a/third_party/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
+++ b/third_party/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
@@ -54,11 +54,21 @@ public:
 /// attribute gpu.kernel) within a spv.module.
 class KernelFnConversion final : public SPIRVOpLowering<FuncOp> {
 public:
-  using SPIRVOpLowering<FuncOp>::SPIRVOpLowering;
+  KernelFnConversion(MLIRContext *context, SPIRVTypeConverter &converter,
+                     ArrayRef<int64_t> workGroupSize,
+                     PatternBenefit benefit = 1)
+      : SPIRVOpLowering<FuncOp>(context, converter, benefit) {
+    auto config = workGroupSize.take_front(3);
+    workGroupSizeAsInt32.assign(config.begin(), config.end());
+    workGroupSizeAsInt32.resize(3, 1);
+  }
 
   PatternMatchResult
   matchAndRewrite(FuncOp funcOp, ArrayRef<Value *> operands,
                   ConversionPatternRewriter &rewriter) const override;
+
+private:
+  SmallVector<int32_t, 3> workGroupSizeAsInt32;
 };
 
 } // namespace
@@ -172,10 +182,10 @@ KernelFnConversion::matchAndRewrite(FuncOp funcOp, ArrayRef<Value *> operands,
     argABI.push_back(spirv::getInterfaceVarABIAttr(
         0, argNum, spirv::StorageClass::StorageBuffer, rewriter.getContext()));
   }
-  // TODO(ravishankarm) : For now set this to {32, 1, 1}. This is incorrect. The
-  // actual workgroup size needs to be plumbed through.
+
   auto context = rewriter.getContext();
-  auto entryPointAttr = spirv::getEntryPointABIAttr({32, 1, 1}, context);
+  auto entryPointAttr =
+      spirv::getEntryPointABIAttr(workGroupSizeAsInt32, context);
   FuncOp newFuncOp = spirv::lowerAsEntryFunction(
       funcOp, typeConverter, rewriter, argABI, entryPointAttr);
   if (!newFuncOp) {
@@ -189,9 +199,11 @@ KernelFnConversion::matchAndRewrite(FuncOp funcOp, ArrayRef<Value *> operands,
 namespace mlir {
 void populateGPUToSPIRVPatterns(MLIRContext *context,
                                 SPIRVTypeConverter &typeConverter,
-                                OwningRewritePatternList &patterns) {
+                                OwningRewritePatternList &patterns,
+                                ArrayRef<int64_t> workGroupSize) {
+  patterns.insert<KernelFnConversion>(context, typeConverter, workGroupSize);
   patterns.insert<
-      ForOpConversion, KernelFnConversion,
+      ForOpConversion,
       LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
       LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
       LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
diff --git a/third_party/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.cpp b/third_party/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.cpp
index 49f161e3794..cec71ca9b3f 100644
--- a/third_party/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.cpp
+++ b/third_party/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.cpp
@@ -28,6 +28,7 @@
 #include "mlir/Dialect/SPIRV/SPIRVLowering.h"
 #include "mlir/Dialect/SPIRV/SPIRVOps.h"
 #include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassRegistry.h"
 
 using namespace mlir;
 
@@ -42,7 +43,23 @@ namespace {
 ///
 /// 2) Lower the body of the spirv::ModuleOp.
 class GPUToSPIRVPass : public ModulePass<GPUToSPIRVPass> {
+public:
+  GPUToSPIRVPass(ArrayRef<int64_t> workGroupSize)
+      : workGroupSize(workGroupSize.begin(), workGroupSize.end()) {}
   void runOnModule() override;
+
+private:
+  SmallVector<int64_t, 3> workGroupSize;
+};
+
+/// Command line option to specify the workgroup size.
+struct GPUToSPIRVPassOptions : public PassOptions<GPUToSPIRVPassOptions> {
+  List<unsigned> workGroupSize{
+      *this, "workgroup-size",
+      llvm::cl::desc(
+          "Workgroup Sizes in the SPIR-V module for x, followed by y, followed "
+          "by z dimension of the dispatch (others will be ignored)"),
+      llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated};
 };
 } // namespace
 
@@ -80,7 +97,7 @@ void GPUToSPIRVPass::runOnModule() {
   /// Dialect conversion to lower the functions with the spirv::ModuleOps.
   SPIRVTypeConverter typeConverter;
   OwningRewritePatternList patterns;
-  populateGPUToSPIRVPatterns(context, typeConverter, patterns);
+  populateGPUToSPIRVPatterns(context, typeConverter, patterns, workGroupSize);
   populateStandardToSPIRVPatterns(context, typeConverter, patterns);
 
   ConversionTarget target(*context);
@@ -94,9 +111,16 @@ void GPUToSPIRVPass::runOnModule() {
   }
 }
 
-std::unique_ptr<OpPassBase<ModuleOp>> mlir::createConvertGPUToSPIRVPass() {
-  return std::make_unique<GPUToSPIRVPass>();
+std::unique_ptr<OpPassBase<ModuleOp>>
+mlir::createConvertGPUToSPIRVPass(ArrayRef<int64_t> workGroupSize) {
+  return std::make_unique<GPUToSPIRVPass>(workGroupSize);
 }
 
-static PassRegistration<GPUToSPIRVPass>
-    pass("convert-gpu-to-spirv", "Convert GPU dialect to SPIR-V dialect");
+static PassRegistration<GPUToSPIRVPass, GPUToSPIRVPassOptions>
+    pass("convert-gpu-to-spirv", "Convert GPU dialect to SPIR-V dialect",
+         [](const GPUToSPIRVPassOptions &passOptions) {
+           SmallVector<int64_t, 3> workGroupSize;
+           workGroupSize.assign(passOptions.workGroupSize.begin(),
+                                passOptions.workGroupSize.end());
+           return std::make_unique<GPUToSPIRVPass>(workGroupSize);
+         });
diff --git a/third_party/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp b/third_party/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp
index 709dd3af7f0..d161e99b30f 100644
--- a/third_party/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp
+++ b/third_party/mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp
@@ -417,10 +417,8 @@ public:
     if (!libraryCallName)
       return this->matchFailure();
 
-    SmallVector<Value *, 4> operands(op.getOperands().begin(),
-                                     op.getOperands().end());
-    rewriter.replaceOpWithNewOp<mlir::CallOp>(op, libraryCallName.getValue(),
-                                              ArrayRef<Type>{}, operands);
+    rewriter.replaceOpWithNewOp<mlir::CallOp>(
+        op, libraryCallName.getValue(), ArrayRef<Type>{}, op.getOperands());
     return this->matchSuccess();
   }
 };
@@ -444,10 +442,8 @@ public:
     if (!libraryCallName)
       return matchFailure();
 
-    SmallVector<Value *, 4> operands(op.getOperands().begin(),
-                                     op.getOperands().end());
-    rewriter.replaceOpWithNewOp<mlir::CallOp>(op, libraryCallName.getValue(),
-                                              ArrayRef<Type>{}, operands);
+    rewriter.replaceOpWithNewOp<mlir::CallOp>(
+        op, libraryCallName.getValue(), ArrayRef<Type>{}, op.getOperands());
     return matchSuccess();
   }
 };
diff --git a/third_party/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp b/third_party/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
index eb0b0d5931a..c269dc5c45a 100644
--- a/third_party/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
+++ b/third_party/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
@@ -313,7 +313,7 @@ LogicalResult createLaunchBody(OpBuilder &builder, OpTy rootForOp,
                                gpu::LaunchOp launchOp, unsigned numBlockDims,
                                unsigned numThreadDims) {
   OpBuilder::InsertionGuard bodyInsertionGuard(builder);
-  builder.setInsertionPointToEnd(&launchOp.getBody().front());
+  builder.setInsertionPointToEnd(&launchOp.body().front());
   auto returnOp = builder.create<gpu::ReturnOp>(launchOp.getLoc());
 
   rootForOp.getOperation()->moveBefore(returnOp);
@@ -389,7 +389,7 @@ LogicalResult createLaunchFromOp(OpTy rootForOp,
        llvm::zip_first(valuesToForward, launchOp.getKernelArguments())) {
     Value *from = std::get<0>(pair);
     Value *to = std::get<1>(pair);
-    replaceAllUsesInRegionWith(from, to, launchOp.getBody());
+    replaceAllUsesInRegionWith(from, to, launchOp.body());
   }
   return success();
 }
@@ -444,15 +444,15 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp,
   terminator.erase();
   builder.setInsertionPointToEnd(innermostForOp.getBody());
   builder.create<gpu::ReturnOp>(terminatorLoc);
-  launchOp.getBody().front().getOperations().splice(
-      launchOp.getBody().front().begin(),
+  launchOp.body().front().getOperations().splice(
+      launchOp.body().front().begin(),
       innermostForOp.getBody()->getOperations());
 
   // Remap the loop iterators to use block/thread identifiers instead.  Loops
   // may iterate from LB with step S whereas GPU thread/block ids always iterate
   // from 0 to N with step 1.  Therefore, loop induction variables are replaced
   // with (gpu-thread/block-id * S) + LB.
-  builder.setInsertionPointToStart(&launchOp.getBody().front());
+  builder.setInsertionPointToStart(&launchOp.body().front());
   auto lbArgumentIt = std::next(launchOp.getKernelArguments().begin(),
                                 originallyForwardedValues);
   auto stepArgumentIt = std::next(lbArgumentIt, lbs.size());
@@ -469,7 +469,7 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp,
         builder.create<AddIOp>(rootForOp.getLoc(), *lbArgumentIt, id);
     en.value()->replaceAllUsesWith(ivReplacement);
     replaceAllUsesInRegionWith(steps[en.index()], *stepArgumentIt,
-                               launchOp.getBody());
+                               launchOp.body());
     std::advance(lbArgumentIt, 1);
     std::advance(stepArgumentIt, 1);
   }
@@ -481,7 +481,7 @@ void LoopToGpuConverter::createLaunch(OpTy rootForOp, OpTy innermostForOp,
        llvm::zip_first(valuesToForward, launchOp.getKernelArguments())) {
     Value *from = std::get<0>(pair);
     Value *to = std::get<1>(pair);
-    replaceAllUsesInRegionWith(from, to, launchOp.getBody());
+    replaceAllUsesInRegionWith(from, to, launchOp.body());
   }
 
   // We are done and can erase the original outermost loop.
diff --git a/third_party/mlir/lib/Conversion/StandardToLLVM/ConvertStandardToLLVM.cpp b/third_party/mlir/lib/Conversion/StandardToLLVM/ConvertStandardToLLVM.cpp
index 793997e9045..60fa8dddd37 100644
--- a/third_party/mlir/lib/Conversion/StandardToLLVM/ConvertStandardToLLVM.cpp
+++ b/third_party/mlir/lib/Conversion/StandardToLLVM/ConvertStandardToLLVM.cpp
@@ -38,9 +38,20 @@
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Type.h"
+#include "llvm/Support/CommandLine.h"
 
 using namespace mlir;
 
+#define PASS_NAME "convert-std-to-llvm"
+
+static llvm::cl::OptionCategory
+    clOptionsCategory("Standard to LLVM lowering options");
+
+static llvm::cl::opt<bool>
+    clUseAlloca(PASS_NAME "-use-alloca",
+                llvm::cl::desc("Replace emission of malloc/free by alloca"),
+                llvm::cl::init(false));
+
 LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx)
     : llvmDialect(ctx->getRegisteredDialect<LLVM::LLVMDialect>()) {
   assert(llvmDialect && "LLVM IR dialect is not registered");
@@ -182,6 +193,22 @@ Type LLVMTypeConverter::convertMemRefType(MemRefType type) {
   return LLVM::LLVMType::getStructTy(ptrTy, ptrTy, indexTy);
 }
 
+// Converts UnrankedMemRefType to LLVMType. The result is a descriptor which
+// contains:
+// 1. int64_t rank, the dynamic rank of this MemRef
+// 2. void* ptr, pointer to the static ranked MemRef descriptor. This will be
+//    stack allocated (alloca) copy of a MemRef descriptor that got casted to
+//    be unranked.
+
+static constexpr unsigned kRankInUnrankedMemRefDescriptor = 0;
+static constexpr unsigned kPtrInUnrankedMemRefDescriptor = 1;
+
+Type LLVMTypeConverter::convertUnrankedMemRefType(UnrankedMemRefType type) {
+  auto rankTy = LLVM::LLVMType::getInt64Ty(llvmDialect);
+  auto ptrTy = LLVM::LLVMType::getInt8PtrTy(llvmDialect);
+  return LLVM::LLVMType::getStructTy(rankTy, ptrTy);
+}
+
 // Convert an n-D vector type to an LLVM vector type via (n-1)-D array type when
 // n > 1.
 // For example, `vector<4 x f32>` converts to `!llvm.type<"<4 x float>">` and
@@ -210,6 +237,8 @@ Type LLVMTypeConverter::convertStandardType(Type type) {
     return convertIndexType(indexType);
   if (auto memRefType = type.dyn_cast<MemRefType>())
     return convertMemRefType(memRefType);
+  if (auto memRefType = type.dyn_cast<UnrankedMemRefType>())
+    return convertUnrankedMemRefType(memRefType);
   if (auto vectorType = type.dyn_cast<VectorType>())
     return convertVectorType(vectorType);
   if (auto llvmType = type.dyn_cast<LLVM::LLVMType>())
@@ -234,27 +263,77 @@ LLVMOpLowering::LLVMOpLowering(StringRef rootOpName, MLIRContext *context,
                                PatternBenefit benefit)
     : ConversionPattern(rootOpName, benefit, context), lowering(lowering_) {}
 
+/*============================================================================*/
+/* StructBuilder implementation                                               */
+/*============================================================================*/
+StructBuilder::StructBuilder(Value *v) : value(v) {
+  assert(value != nullptr && "value cannot be null");
+  structType = value->getType().cast<LLVM::LLVMType>();
+}
+
+Value *StructBuilder::extractPtr(OpBuilder &builder, Location loc,
+                                 unsigned pos) {
+  Type type = structType.cast<LLVM::LLVMType>().getStructElementType(pos);
+  return builder.create<LLVM::ExtractValueOp>(loc, type, value,
+                                              builder.getI64ArrayAttr(pos));
+}
+
+void StructBuilder::setPtr(OpBuilder &builder, Location loc, unsigned pos,
+                           Value *ptr) {
+  value = builder.create<LLVM::InsertValueOp>(loc, structType, value, ptr,
+                                              builder.getI64ArrayAttr(pos));
+}
 /*============================================================================*/
 /* MemRefDescriptor implementation                                            */
 /*============================================================================*/
 
 /// Construct a helper for the given descriptor value.
-MemRefDescriptor::MemRefDescriptor(Value *descriptor) : value(descriptor) {
-  if (value) {
-    structType = value->getType().cast<LLVM::LLVMType>();
-    indexType = value->getType().cast<LLVM::LLVMType>().getStructElementType(
-        kOffsetPosInMemRefDescriptor);
-  }
+MemRefDescriptor::MemRefDescriptor(Value *descriptor)
+    : StructBuilder(descriptor) {
+  assert(value != nullptr && "value cannot be null");
+  indexType = value->getType().cast<LLVM::LLVMType>().getStructElementType(
+      kOffsetPosInMemRefDescriptor);
 }
 
 /// Builds IR creating an `undef` value of the descriptor type.
 MemRefDescriptor MemRefDescriptor::undef(OpBuilder &builder, Location loc,
                                          Type descriptorType) {
+
   Value *descriptor =
       builder.create<LLVM::UndefOp>(loc, descriptorType.cast<LLVM::LLVMType>());
   return MemRefDescriptor(descriptor);
 }
 
+/// Builds IR creating a MemRef descriptor that represents `type` and
+/// populates it with static shape and stride information extracted from the
+/// type.
+MemRefDescriptor
+MemRefDescriptor::fromStaticShape(OpBuilder &builder, Location loc,
+                                  LLVMTypeConverter &typeConverter,
+                                  MemRefType type, Value *memory) {
+  assert(type.hasStaticShape() && "unexpected dynamic shape");
+  assert(type.getAffineMaps().empty() && "unexpected layout map");
+
+  auto convertedType = typeConverter.convertType(type);
+  assert(convertedType && "unexpected failure in memref type conversion");
+
+  auto descr = MemRefDescriptor::undef(builder, loc, convertedType);
+  descr.setAllocatedPtr(builder, loc, memory);
+  descr.setAlignedPtr(builder, loc, memory);
+  descr.setConstantOffset(builder, loc, 0);
+
+  // Fill in sizes and strides, in reverse order to simplify stride
+  // calculation.
+  uint64_t runningStride = 1;
+  for (unsigned i = type.getRank(); i > 0; --i) {
+    unsigned dim = i - 1;
+    descr.setConstantSize(builder, loc, dim, type.getDimSize(dim));
+    descr.setConstantStride(builder, loc, dim, runningStride);
+    runningStride *= type.getDimSize(dim);
+  }
+  return descr;
+}
+
 /// Builds IR extracting the allocated pointer from the descriptor.
 Value *MemRefDescriptor::allocatedPtr(OpBuilder &builder, Location loc) {
   return extractPtr(builder, loc, kAllocatedPtrPosInMemRefDescriptor);
@@ -277,6 +356,14 @@ void MemRefDescriptor::setAlignedPtr(OpBuilder &builder, Location loc,
   setPtr(builder, loc, kAlignedPtrPosInMemRefDescriptor, ptr);
 }
 
+// Creates a constant Op producing a value of `resultType` from an index-typed
+// integer attribute.
+static Value *createIndexAttrConstant(OpBuilder &builder, Location loc,
+                                      Type resultType, int64_t value) {
+  return builder.create<LLVM::ConstantOp>(
+      loc, resultType, builder.getIntegerAttr(builder.getIndexType(), value));
+}
+
 /// Builds IR extracting the offset from the descriptor.
 Value *MemRefDescriptor::offset(OpBuilder &builder, Location loc) {
   return builder.create<LLVM::ExtractValueOp>(
@@ -292,6 +379,13 @@ void MemRefDescriptor::setOffset(OpBuilder &builder, Location loc,
       builder.getI64ArrayAttr(kOffsetPosInMemRefDescriptor));
 }
 
+/// Builds IR inserting the offset into the descriptor.
+void MemRefDescriptor::setConstantOffset(OpBuilder &builder, Location loc,
+                                         uint64_t offset) {
+  setOffset(builder, loc,
+            createIndexAttrConstant(builder, loc, indexType, offset));
+}
+
 /// Builds IR extracting the pos-th size from the descriptor.
 Value *MemRefDescriptor::size(OpBuilder &builder, Location loc, unsigned pos) {
   return builder.create<LLVM::ExtractValueOp>(
@@ -307,6 +401,13 @@ void MemRefDescriptor::setSize(OpBuilder &builder, Location loc, unsigned pos,
       builder.getI64ArrayAttr({kSizePosInMemRefDescriptor, pos}));
 }
 
+/// Builds IR inserting the pos-th size into the descriptor
+void MemRefDescriptor::setConstantSize(OpBuilder &builder, Location loc,
+                                       unsigned pos, uint64_t size) {
+  setSize(builder, loc, pos,
+          createIndexAttrConstant(builder, loc, indexType, size));
+}
+
 /// Builds IR extracting the pos-th size from the descriptor.
 Value *MemRefDescriptor::stride(OpBuilder &builder, Location loc,
                                 unsigned pos) {
@@ -323,17 +424,11 @@ void MemRefDescriptor::setStride(OpBuilder &builder, Location loc, unsigned pos,
       builder.getI64ArrayAttr({kStridePosInMemRefDescriptor, pos}));
 }
 
-Value *MemRefDescriptor::extractPtr(OpBuilder &builder, Location loc,
-                                    unsigned pos) {
-  Type type = structType.cast<LLVM::LLVMType>().getStructElementType(pos);
-  return builder.create<LLVM::ExtractValueOp>(loc, type, value,
-                                              builder.getI64ArrayAttr(pos));
-}
-
-void MemRefDescriptor::setPtr(OpBuilder &builder, Location loc, unsigned pos,
-                              Value *ptr) {
-  value = builder.create<LLVM::InsertValueOp>(loc, structType, value, ptr,
-                                              builder.getI64ArrayAttr(pos));
+/// Builds IR inserting the pos-th stride into the descriptor
+void MemRefDescriptor::setConstantStride(OpBuilder &builder, Location loc,
+                                         unsigned pos, uint64_t stride) {
+  setStride(builder, loc, pos,
+            createIndexAttrConstant(builder, loc, indexType, stride));
 }
 
 LLVM::LLVMType MemRefDescriptor::getElementType() {
@@ -341,6 +436,37 @@ LLVM::LLVMType MemRefDescriptor::getElementType() {
       kAlignedPtrPosInMemRefDescriptor);
 }
 
+/*============================================================================*/
+/* UnrankedMemRefDescriptor implementation                                    */
+/*============================================================================*/
+
+/// Construct a helper for the given descriptor value.
+UnrankedMemRefDescriptor::UnrankedMemRefDescriptor(Value *descriptor)
+    : StructBuilder(descriptor) {}
+
+/// Builds IR creating an `undef` value of the descriptor type.
+UnrankedMemRefDescriptor UnrankedMemRefDescriptor::undef(OpBuilder &builder,
+                                                         Location loc,
+                                                         Type descriptorType) {
+  Value *descriptor =
+      builder.create<LLVM::UndefOp>(loc, descriptorType.cast<LLVM::LLVMType>());
+  return UnrankedMemRefDescriptor(descriptor);
+}
+Value *UnrankedMemRefDescriptor::rank(OpBuilder &builder, Location loc) {
+  return extractPtr(builder, loc, kRankInUnrankedMemRefDescriptor);
+}
+void UnrankedMemRefDescriptor::setRank(OpBuilder &builder, Location loc,
+                                       Value *v) {
+  setPtr(builder, loc, kRankInUnrankedMemRefDescriptor, v);
+}
+Value *UnrankedMemRefDescriptor::memRefDescPtr(OpBuilder &builder,
+                                               Location loc) {
+  return extractPtr(builder, loc, kPtrInUnrankedMemRefDescriptor);
+}
+void UnrankedMemRefDescriptor::setMemRefDescPtr(OpBuilder &builder,
+                                                Location loc, Value *v) {
+  setPtr(builder, loc, kPtrInUnrankedMemRefDescriptor, v);
+}
 namespace {
 // Base class for Standard to LLVM IR op conversions.  Matches the Op type
 // provided as template argument.  Carries a reference to the LLVM dialect in
@@ -381,8 +507,7 @@ public:
   // Create an LLVM IR pseudo-operation defining the given index constant.
   Value *createIndexConstant(ConversionPatternRewriter &builder, Location loc,
                              uint64_t value) const {
-    auto attr = builder.getIntegerAttr(builder.getIndexType(), value);
-    return builder.create<LLVM::ConstantOp>(loc, getIndexType(), attr);
+    return createIndexAttrConstant(builder, loc, getIndexType(), value);
   }
 
 protected:
@@ -421,7 +546,7 @@ struct FuncOpConversion : public LLVMLegalizationPattern<FuncOp> {
       auto converted = lowering.convertType(t).dyn_cast<LLVM::LLVMType>();
       if (!converted)
         return matchFailure();
-      if (t.isa<MemRefType>()) {
+      if (t.isa<MemRefType>() || t.isa<UnrankedMemRefType>()) {
         converted = converted.getPointerTo();
         promotedArgIndices.push_back(en.index());
       }
@@ -443,7 +568,7 @@ struct FuncOpConversion : public LLVMLegalizationPattern<FuncOp> {
       attributes.push_back(attr);
     }
 
-    // Create an LLVM funcion, use external linkage by default until MLIR
+    // Create an LLVM function, use external linkage by default until MLIR
     // functions have linkage.
     auto newFuncOp = rewriter.create<LLVM::LLVMFuncOp>(
         op->getLoc(), funcOp.getName(), llvmType, LLVM::Linkage::External,
@@ -764,6 +889,11 @@ static bool isSupportedMemRefType(MemRefType type) {
 struct AllocOpLowering : public LLVMLegalizationPattern<AllocOp> {
   using LLVMLegalizationPattern<AllocOp>::LLVMLegalizationPattern;
 
+  AllocOpLowering(LLVM::LLVMDialect &dialect_, LLVMTypeConverter &converter,
+                  bool useAlloca = false)
+      : LLVMLegalizationPattern<AllocOp>(dialect_, converter),
+        useAlloca(useAlloca) {}
+
   PatternMatchResult match(Operation *op) const override {
     MemRefType type = cast<AllocOp>(op).getType();
     if (isSupportedMemRefType(type))
@@ -825,32 +955,43 @@ struct AllocOpLowering : public LLVMLegalizationPattern<AllocOp> {
     cumulativeSize = rewriter.create<LLVM::MulOp>(
         loc, getIndexType(), ArrayRef<Value *>{cumulativeSize, elementSize});
 
-    // Insert the `malloc` declaration if it is not already present.
-    auto module = op->getParentOfType<ModuleOp>();
-    auto mallocFunc = module.lookupSymbol<LLVM::LLVMFuncOp>("malloc");
-    if (!mallocFunc) {
-      OpBuilder moduleBuilder(op->getParentOfType<ModuleOp>().getBodyRegion());
-      mallocFunc = moduleBuilder.create<LLVM::LLVMFuncOp>(
-          rewriter.getUnknownLoc(), "malloc",
-          LLVM::LLVMType::getFunctionTy(getVoidPtrType(), getIndexType(),
-                                        /*isVarArg=*/false));
-    }
-
     // Allocate the underlying buffer and store a pointer to it in the MemRef
     // descriptor.
-    Value *align = nullptr;
-    if (auto alignAttr = allocOp.alignment()) {
-      align = createIndexConstant(rewriter, loc,
-                                  alignAttr.getValue().getSExtValue());
-      cumulativeSize = rewriter.create<LLVM::SubOp>(
-          loc, rewriter.create<LLVM::AddOp>(loc, cumulativeSize, align), one);
+    Value *allocated = nullptr;
+    int alignment = 0;
+    Value *alignmentValue = nullptr;
+    if (auto alignAttr = allocOp.alignment())
+      alignment = alignAttr.getValue().getSExtValue();
+
+    if (useAlloca) {
+      allocated = rewriter.create<LLVM::AllocaOp>(loc, getVoidPtrType(),
+                                                  cumulativeSize, alignment);
+    } else {
+      // Insert the `malloc` declaration if it is not already present.
+      auto module = op->getParentOfType<ModuleOp>();
+      auto mallocFunc = module.lookupSymbol<LLVM::LLVMFuncOp>("malloc");
+      if (!mallocFunc) {
+        OpBuilder moduleBuilder(
+            op->getParentOfType<ModuleOp>().getBodyRegion());
+        mallocFunc = moduleBuilder.create<LLVM::LLVMFuncOp>(
+            rewriter.getUnknownLoc(), "malloc",
+            LLVM::LLVMType::getFunctionTy(getVoidPtrType(), getIndexType(),
+                                          /*isVarArg=*/false));
+      }
+      if (alignment != 0) {
+        alignmentValue = createIndexConstant(rewriter, loc, alignment);
+        cumulativeSize = rewriter.create<LLVM::SubOp>(
+            loc,
+            rewriter.create<LLVM::AddOp>(loc, cumulativeSize, alignmentValue),
+            one);
+      }
+      allocated = rewriter
+                      .create<LLVM::CallOp>(
+                          loc, getVoidPtrType(),
+                          rewriter.getSymbolRefAttr(mallocFunc), cumulativeSize)
+                      .getResult(0);
     }
-    Value *allocated =
-        rewriter
-            .create<LLVM::CallOp>(loc, getVoidPtrType(),
-                                  rewriter.getSymbolRefAttr(mallocFunc),
-                                  cumulativeSize)
-            .getResult(0);
+
     auto structElementType = lowering.convertType(elementType);
     auto elementPtrType = structElementType.cast<LLVM::LLVMType>().getPointerTo(
         type.getMemorySpace());
@@ -878,13 +1019,17 @@ struct AllocOpLowering : public LLVMLegalizationPattern<AllocOp> {
 
     // Field 2: Actual aligned pointer to payload.
     Value *bitcastAligned = bitcastAllocated;
-    if (align) {
+    if (!useAlloca && alignment != 0) {
+      assert(alignmentValue);
       // offset = (align - (ptr % align))% align
       Value *intVal = rewriter.create<LLVM::PtrToIntOp>(
           loc, this->getIndexType(), allocated);
-      Value *ptrModAlign = rewriter.create<LLVM::URemOp>(loc, intVal, align);
-      Value *subbed = rewriter.create<LLVM::SubOp>(loc, align, ptrModAlign);
-      Value *offset = rewriter.create<LLVM::URemOp>(loc, subbed, align);
+      Value *ptrModAlign =
+          rewriter.create<LLVM::URemOp>(loc, intVal, alignmentValue);
+      Value *subbed =
+          rewriter.create<LLVM::SubOp>(loc, alignmentValue, ptrModAlign);
+      Value *offset =
+          rewriter.create<LLVM::URemOp>(loc, subbed, alignmentValue);
       Value *aligned = rewriter.create<LLVM::GEPOp>(loc, allocated->getType(),
                                                     allocated, offset);
       bitcastAligned = rewriter.create<LLVM::BitcastOp>(
@@ -930,6 +1075,8 @@ struct AllocOpLowering : public LLVMLegalizationPattern<AllocOp> {
     // Return the final value of the descriptor.
     rewriter.replaceOp(op, {memRefDescriptor});
   }
+
+  bool useAlloca;
 };
 
 // A CallOp automatically promotes MemRefType to a sequence of alloca/store and
@@ -950,6 +1097,14 @@ struct CallOpInterfaceLowering : public LLVMLegalizationPattern<CallOpType> {
     Type packedResult;
     unsigned numResults = callOp.getNumResults();
     auto resultTypes = llvm::to_vector<4>(callOp.getResultTypes());
+
+    for (Type resType : resultTypes) {
+      assert(!resType.isa<UnrankedMemRefType>() &&
+             "Returning unranked memref is not supported. Pass result as an"
+             "argument instead.");
+      (void)resType;
+    }
+
     if (numResults != 0) {
       if (!(packedResult = this->lowering.packFunctionResults(resultTypes)))
         return this->matchFailure();
@@ -963,8 +1118,7 @@ struct CallOpInterfaceLowering : public LLVMLegalizationPattern<CallOpType> {
 
     // If < 2 results, packing did not do anything and we can just return.
     if (numResults < 2) {
-      SmallVector<Value *, 4> results(newOp.getResults());
-      rewriter.replaceOp(op, results);
+      rewriter.replaceOp(op, newOp.getResults());
       return this->matchSuccess();
     }
 
@@ -1001,9 +1155,17 @@ struct CallIndirectOpLowering : public CallOpInterfaceLowering<CallIndirectOp> {
 struct DeallocOpLowering : public LLVMLegalizationPattern<DeallocOp> {
   using LLVMLegalizationPattern<DeallocOp>::LLVMLegalizationPattern;
 
+  DeallocOpLowering(LLVM::LLVMDialect &dialect_, LLVMTypeConverter &converter,
+                    bool useAlloca = false)
+      : LLVMLegalizationPattern<DeallocOp>(dialect_, converter),
+        useAlloca(useAlloca) {}
+
   PatternMatchResult
   matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
                   ConversionPatternRewriter &rewriter) const override {
+    if (useAlloca)
+      return rewriter.eraseOp(op), matchSuccess();
+
     assert(operands.size() == 1 && "dealloc takes one operand");
     OperandAdaptor<DeallocOp> transformed(operands);
 
@@ -1026,6 +1188,8 @@ struct DeallocOpLowering : public LLVMLegalizationPattern<DeallocOp> {
         op, ArrayRef<Type>(), rewriter.getSymbolRefAttr(freeFunc), casted);
     return matchSuccess();
   }
+
+  bool useAlloca;
 };
 
 struct MemRefCastOpLowering : public LLVMLegalizationPattern<MemRefCastOp> {
@@ -1033,11 +1197,26 @@ struct MemRefCastOpLowering : public LLVMLegalizationPattern<MemRefCastOp> {
 
   PatternMatchResult match(Operation *op) const override {
     auto memRefCastOp = cast<MemRefCastOp>(op);
-    MemRefType sourceType =
-        memRefCastOp.getOperand()->getType().cast<MemRefType>();
-    MemRefType targetType = memRefCastOp.getType();
-    return (isSupportedMemRefType(targetType) &&
-            isSupportedMemRefType(sourceType))
+    Type srcType = memRefCastOp.getOperand()->getType();
+    Type dstType = memRefCastOp.getType();
+
+    if (srcType.isa<MemRefType>() && dstType.isa<MemRefType>()) {
+      MemRefType sourceType =
+          memRefCastOp.getOperand()->getType().cast<MemRefType>();
+      MemRefType targetType = memRefCastOp.getType().cast<MemRefType>();
+      return (isSupportedMemRefType(targetType) &&
+              isSupportedMemRefType(sourceType))
+                 ? matchSuccess()
+                 : matchFailure();
+    }
+
+    // At least one of the operands is unranked type
+    assert(srcType.isa<UnrankedMemRefType>() ||
+           dstType.isa<UnrankedMemRefType>());
+
+    // Unranked to unranked cast is disallowed
+    return !(srcType.isa<UnrankedMemRefType>() &&
+             dstType.isa<UnrankedMemRefType>())
                ? matchSuccess()
                : matchFailure();
   }
@@ -1046,12 +1225,65 @@ struct MemRefCastOpLowering : public LLVMLegalizationPattern<MemRefCastOp> {
                ConversionPatternRewriter &rewriter) const override {
     auto memRefCastOp = cast<MemRefCastOp>(op);
     OperandAdaptor<MemRefCastOp> transformed(operands);
-    // memref_cast is defined for source and destination memref types with the
-    // same element type, same mappings, same address space and same rank.
-    // Therefore a simple bitcast suffices. If not it is undefined behavior.
+
+    auto srcType = memRefCastOp.getOperand()->getType();
+    auto dstType = memRefCastOp.getType();
     auto targetStructType = lowering.convertType(memRefCastOp.getType());
-    rewriter.replaceOpWithNewOp<LLVM::BitcastOp>(op, targetStructType,
-                                                 transformed.source());
+    auto loc = op->getLoc();
+
+    if (srcType.isa<MemRefType>() && dstType.isa<MemRefType>()) {
+      // memref_cast is defined for source and destination memref types with the
+      // same element type, same mappings, same address space and same rank.
+      // Therefore a simple bitcast suffices. If not it is undefined behavior.
+      rewriter.replaceOpWithNewOp<LLVM::BitcastOp>(op, targetStructType,
+                                                   transformed.source());
+    } else if (srcType.isa<MemRefType>() && dstType.isa<UnrankedMemRefType>()) {
+      // Casting ranked to unranked memref type
+      // Set the rank in the destination from the memref type
+      // Allocate space on the stack and copy the src memref decsriptor
+      // Set the ptr in the destination to the stack space
+      auto srcMemRefType = srcType.cast<MemRefType>();
+      int64_t rank = srcMemRefType.getRank();
+      // ptr = AllocaOp sizeof(MemRefDescriptor)
+      auto ptr = lowering.promoteOneMemRefDescriptor(loc, transformed.source(),
+                                                     rewriter);
+      // voidptr = BitCastOp srcType* to void*
+      auto voidPtr =
+          rewriter.create<LLVM::BitcastOp>(loc, getVoidPtrType(), ptr)
+              .getResult();
+      // rank = ConstantOp srcRank
+      auto rankVal = rewriter.create<LLVM::ConstantOp>(
+          loc, lowering.convertType(rewriter.getIntegerType(64)),
+          rewriter.getI64IntegerAttr(rank));
+      // undef = UndefOp
+      UnrankedMemRefDescriptor memRefDesc =
+          UnrankedMemRefDescriptor::undef(rewriter, loc, targetStructType);
+      // d1 = InsertValueOp undef, rank, 0
+      memRefDesc.setRank(rewriter, loc, rankVal);
+      // d2 = InsertValueOp d1, voidptr, 1
+      memRefDesc.setMemRefDescPtr(rewriter, loc, voidPtr);
+      rewriter.replaceOp(op, (Value *)memRefDesc);
+
+    } else if (srcType.isa<UnrankedMemRefType>() && dstType.isa<MemRefType>()) {
+      // Casting from unranked type to ranked.
+      // The operation is assumed to be doing a correct cast. If the destination
+      // type mismatches the unranked the type, it is undefined behavior.
+      UnrankedMemRefDescriptor memRefDesc(transformed.source());
+      // ptr = ExtractValueOp src, 1
+      auto ptr = memRefDesc.memRefDescPtr(rewriter, loc);
+      // castPtr = BitCastOp i8* to structTy*
+      auto castPtr =
+          rewriter
+              .create<LLVM::BitcastOp>(
+                  loc, targetStructType.cast<LLVM::LLVMType>().getPointerTo(),
+                  ptr)
+              .getResult();
+      // struct = LoadOp castPtr
+      auto loadOp = rewriter.create<LLVM::LoadOp>(loc, castPtr);
+      rewriter.replaceOp(op, loadOp.getResult());
+    } else {
+      llvm_unreachable("Unsuppored unranked memref to unranked memref cast");
+    }
   }
 };
 
@@ -1318,8 +1550,9 @@ struct OneToOneLLVMTerminatorLowering
                   ArrayRef<Block *> destinations,
                   ArrayRef<ArrayRef<Value *>> operands,
                   ConversionPatternRewriter &rewriter) const override {
+    SmallVector<ValueRange, 2> operandRanges(operands.begin(), operands.end());
     rewriter.replaceOpWithNewOp<TargetOp>(op, properOperands, destinations,
-                                          operands, op->getAttrs());
+                                          operandRanges, op->getAttrs());
     return this->matchSuccess();
   }
 };
@@ -1759,7 +1992,6 @@ void mlir::populateStdToLLVMConversionPatterns(
   patterns.insert<
       AddFOpLowering,
       AddIOpLowering,
-      AllocOpLowering,
       AndOpLowering,
       BranchOpLowering,
       CallIndirectOpLowering,
@@ -1768,7 +2000,6 @@ void mlir::populateStdToLLVMConversionPatterns(
       CmpIOpLowering,
       CondBranchOpLowering,
       ConstLLVMOpLowering,
-      DeallocOpLowering,
       DimOpLowering,
       DivFOpLowering,
       DivISOpLowering,
@@ -1800,6 +2031,10 @@ void mlir::populateStdToLLVMConversionPatterns(
       ViewOpLowering,
       XOrOpLowering,
       ZeroExtendIOpLowering>(*converter.getDialect(), converter);
+   patterns.insert<
+       AllocOpLowering,
+       DeallocOpLowering>(
+           *converter.getDialect(), converter, clUseAlloca.getValue());
   // clang-format on
 }
 
@@ -1851,7 +2086,8 @@ SmallVector<Value *, 4> LLVMTypeConverter::promoteMemRefDescriptors(
   for (auto it : llvm::zip(opOperands, operands)) {
     auto *operand = std::get<0>(it);
     auto *llvmOperand = std::get<1>(it);
-    if (!operand->getType().isa<MemRefType>()) {
+    if (!operand->getType().isa<MemRefType>() &&
+        !operand->getType().isa<UnrankedMemRefType>()) {
       promotedOperands.push_back(operand);
       continue;
     }
@@ -1873,6 +2109,7 @@ struct LLVMLoweringPass : public ModulePass<LLVMLoweringPass> {
   // By default, the patterns are those converting Standard operations to the
   // LLVMIR dialect.
   explicit LLVMLoweringPass(
+      bool useAlloca = false,
       LLVMPatternListFiller patternListFiller =
           populateStdToLLVMConversionPatterns,
       LLVMTypeConverterMaker converterBuilder = makeStandardToLLVMTypeConverter)
@@ -1911,17 +2148,25 @@ struct LLVMLoweringPass : public ModulePass<LLVMLoweringPass> {
 };
 } // end namespace
 
-std::unique_ptr<OpPassBase<ModuleOp>> mlir::createLowerToLLVMPass() {
-  return std::make_unique<LLVMLoweringPass>();
+std::unique_ptr<OpPassBase<ModuleOp>>
+mlir::createLowerToLLVMPass(bool useAlloca) {
+  return std::make_unique<LLVMLoweringPass>(useAlloca);
 }
 
 std::unique_ptr<OpPassBase<ModuleOp>>
 mlir::createLowerToLLVMPass(LLVMPatternListFiller patternListFiller,
-                            LLVMTypeConverterMaker typeConverterMaker) {
-  return std::make_unique<LLVMLoweringPass>(patternListFiller,
+                            LLVMTypeConverterMaker typeConverterMaker,
+                            bool useAlloca) {
+  return std::make_unique<LLVMLoweringPass>(useAlloca, patternListFiller,
                                             typeConverterMaker);
 }
 
 static PassRegistration<LLVMLoweringPass>
-    pass("convert-std-to-llvm", "Convert scalar and vector operations from the "
-                                "Standard to the LLVM dialect");
+    pass("convert-std-to-llvm",
+         "Convert scalar and vector operations from the "
+         "Standard to the LLVM dialect",
+         [] {
+           return std::make_unique<LLVMLoweringPass>(
+               clUseAlloca.getValue(), populateStdToLLVMConversionPatterns,
+               makeStandardToLLVMTypeConverter);
+         });
diff --git a/third_party/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp b/third_party/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp
index c2ca4c94878..e87bd4ef861 100644
--- a/third_party/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp
+++ b/third_party/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp
@@ -28,6 +28,107 @@
 
 using namespace mlir;
 
+//===----------------------------------------------------------------------===//
+// Operation conversion
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// Convert constant operation with IndexType return to SPIR-V constant
+/// operation. Since IndexType is not used within SPIR-V dialect, this needs
+/// special handling to make sure the result type and the type of the value
+/// attribute are consistent.
+// TODO(ravishankarm) : This should be moved into DRR.
+class ConstantIndexOpConversion final : public SPIRVOpLowering<ConstantOp> {
+public:
+  using SPIRVOpLowering<ConstantOp>::SPIRVOpLowering;
+
+  PatternMatchResult
+  matchAndRewrite(ConstantOp constIndexOp, ArrayRef<Value *> operands,
+                  ConversionPatternRewriter &rewriter) const override;
+};
+
+/// Convert compare operation to SPIR-V dialect.
+class CmpIOpConversion final : public SPIRVOpLowering<CmpIOp> {
+public:
+  using SPIRVOpLowering<CmpIOp>::SPIRVOpLowering;
+
+  PatternMatchResult
+  matchAndRewrite(CmpIOp cmpIOp, ArrayRef<Value *> operands,
+                  ConversionPatternRewriter &rewriter) const override;
+};
+
+/// Convert integer binary operations to SPIR-V operations. Cannot use
+/// tablegen for this. If the integer operation is on variables of IndexType,
+/// the type of the return value of the replacement operation differs from
+/// that of the replaced operation. This is not handled in tablegen-based
+/// pattern specification.
+// TODO(ravishankarm) : This should be moved into DRR.
+template <typename StdOp, typename SPIRVOp>
+class IntegerOpConversion final : public SPIRVOpLowering<StdOp> {
+public:
+  using SPIRVOpLowering<StdOp>::SPIRVOpLowering;
+
+  PatternMatchResult
+  matchAndRewrite(StdOp operation, ArrayRef<Value *> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto resultType =
+        this->typeConverter.convertType(operation.getResult()->getType());
+    rewriter.template replaceOpWithNewOp<SPIRVOp>(
+        operation, resultType, operands, ArrayRef<NamedAttribute>());
+    return this->matchSuccess();
+  }
+};
+
+/// Convert load -> spv.LoadOp. The operands of the replaced operation are of
+/// IndexType while that of the replacement operation are of type i32. This is
+/// not supported in tablegen based pattern specification.
+// TODO(ravishankarm) : This should be moved into DRR.
+class LoadOpConversion final : public SPIRVOpLowering<LoadOp> {
+public:
+  using SPIRVOpLowering<LoadOp>::SPIRVOpLowering;
+
+  PatternMatchResult
+  matchAndRewrite(LoadOp loadOp, ArrayRef<Value *> operands,
+                  ConversionPatternRewriter &rewriter) const override;
+};
+
+/// Convert return -> spv.Return.
+// TODO(ravishankarm) : This should be moved into DRR.
+class ReturnOpConversion final : public SPIRVOpLowering<ReturnOp> {
+public:
+  using SPIRVOpLowering<ReturnOp>::SPIRVOpLowering;
+
+  PatternMatchResult
+  matchAndRewrite(ReturnOp returnOp, ArrayRef<Value *> operands,
+                  ConversionPatternRewriter &rewriter) const override;
+};
+
+/// Convert select -> spv.Select
+// TODO(ravishankarm) : This should be moved into DRR.
+class SelectOpConversion final : public SPIRVOpLowering<SelectOp> {
+public:
+  using SPIRVOpLowering<SelectOp>::SPIRVOpLowering;
+  PatternMatchResult
+  matchAndRewrite(SelectOp op, ArrayRef<Value *> operands,
+                  ConversionPatternRewriter &rewriter) const override;
+};
+
+/// Convert store -> spv.StoreOp. The operands of the replaced operation are
+/// of IndexType while that of the replacement operation are of type i32. This
+/// is not supported in tablegen based pattern specification.
+// TODO(ravishankarm) : This should be moved into DRR.
+class StoreOpConversion final : public SPIRVOpLowering<StoreOp> {
+public:
+  using SPIRVOpLowering<StoreOp>::SPIRVOpLowering;
+
+  PatternMatchResult
+  matchAndRewrite(StoreOp storeOp, ArrayRef<Value *> operands,
+                  ConversionPatternRewriter &rewriter) const override;
+};
+
+} // namespace
+
 //===----------------------------------------------------------------------===//
 // Utility functions for operation conversion
 //===----------------------------------------------------------------------===//
@@ -71,62 +172,49 @@ spirv::AccessChainOp getElementPtr(OpBuilder &builder,
 }
 
 //===----------------------------------------------------------------------===//
-// Operation conversion
+// ConstantOp with index type.
 //===----------------------------------------------------------------------===//
 
-namespace {
-
-/// Convert constant operation with IndexType return to SPIR-V constant
-/// operation. Since IndexType is not used within SPIR-V dialect, this needs
-/// special handling to make sure the result type and the type of the value
-/// attribute are consistent.
-// TODO(ravishankarm) : This should be moved into DRR.
-class ConstantIndexOpConversion final : public SPIRVOpLowering<ConstantOp> {
-public:
-  using SPIRVOpLowering<ConstantOp>::SPIRVOpLowering;
-
-  PatternMatchResult
-  matchAndRewrite(ConstantOp constIndexOp, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    if (!constIndexOp.getResult()->getType().isa<IndexType>()) {
-      return matchFailure();
-    }
-    // The attribute has index type which is not directly supported in
-    // SPIR-V. Get the integer value and create a new IntegerAttr.
-    auto constAttr = constIndexOp.value().dyn_cast<IntegerAttr>();
-    if (!constAttr) {
-      return matchFailure();
-    }
-
-    // Use the bitwidth set in the value attribute to decide the result type
-    // of the SPIR-V constant operation since SPIR-V does not support index
-    // types.
-    auto constVal = constAttr.getValue();
-    auto constValType = constAttr.getType().dyn_cast<IndexType>();
-    if (!constValType) {
-      return matchFailure();
-    }
-    auto spirvConstType =
-        typeConverter.convertType(constIndexOp.getResult()->getType());
-    auto spirvConstVal =
-        rewriter.getIntegerAttr(spirvConstType, constAttr.getInt());
-    rewriter.replaceOpWithNewOp<spirv::ConstantOp>(constIndexOp, spirvConstType,
-                                                   spirvConstVal);
-    return matchSuccess();
+PatternMatchResult ConstantIndexOpConversion::matchAndRewrite(
+    ConstantOp constIndexOp, ArrayRef<Value *> operands,
+    ConversionPatternRewriter &rewriter) const {
+  if (!constIndexOp.getResult()->getType().isa<IndexType>()) {
+    return matchFailure();
+  }
+  // The attribute has index type which is not directly supported in
+  // SPIR-V. Get the integer value and create a new IntegerAttr.
+  auto constAttr = constIndexOp.value().dyn_cast<IntegerAttr>();
+  if (!constAttr) {
+    return matchFailure();
   }
-};
 
-/// Convert compare operation to SPIR-V dialect.
-class CmpIOpConversion final : public SPIRVOpLowering<CmpIOp> {
-public:
-  using SPIRVOpLowering<CmpIOp>::SPIRVOpLowering;
+  // Use the bitwidth set in the value attribute to decide the result type
+  // of the SPIR-V constant operation since SPIR-V does not support index
+  // types.
+  auto constVal = constAttr.getValue();
+  auto constValType = constAttr.getType().dyn_cast<IndexType>();
+  if (!constValType) {
+    return matchFailure();
+  }
+  auto spirvConstType =
+      typeConverter.convertType(constIndexOp.getResult()->getType());
+  auto spirvConstVal =
+      rewriter.getIntegerAttr(spirvConstType, constAttr.getInt());
+  rewriter.replaceOpWithNewOp<spirv::ConstantOp>(constIndexOp, spirvConstType,
+                                                 spirvConstVal);
+  return matchSuccess();
+}
 
-  PatternMatchResult
-  matchAndRewrite(CmpIOp cmpIOp, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    CmpIOpOperandAdaptor cmpIOpOperands(operands);
+//===----------------------------------------------------------------------===//
+// CmpIOp
+//===----------------------------------------------------------------------===//
 
-    switch (cmpIOp.getPredicate()) {
+PatternMatchResult
+CmpIOpConversion::matchAndRewrite(CmpIOp cmpIOp, ArrayRef<Value *> operands,
+                                  ConversionPatternRewriter &rewriter) const {
+  CmpIOpOperandAdaptor cmpIOpOperands(operands);
+
+  switch (cmpIOp.getPredicate()) {
 #define DISPATCH(cmpPredicate, spirvOp)                                        \
   case cmpPredicate:                                                           \
     rewriter.replaceOpWithNewOp<spirvOp>(                                      \
@@ -134,124 +222,85 @@ public:
         cmpIOpOperands.rhs());                                                 \
     return matchSuccess();
 
-      DISPATCH(CmpIPredicate::eq, spirv::IEqualOp);
-      DISPATCH(CmpIPredicate::ne, spirv::INotEqualOp);
-      DISPATCH(CmpIPredicate::slt, spirv::SLessThanOp);
-      DISPATCH(CmpIPredicate::sle, spirv::SLessThanEqualOp);
-      DISPATCH(CmpIPredicate::sgt, spirv::SGreaterThanOp);
-      DISPATCH(CmpIPredicate::sge, spirv::SGreaterThanEqualOp);
+    DISPATCH(CmpIPredicate::eq, spirv::IEqualOp);
+    DISPATCH(CmpIPredicate::ne, spirv::INotEqualOp);
+    DISPATCH(CmpIPredicate::slt, spirv::SLessThanOp);
+    DISPATCH(CmpIPredicate::sle, spirv::SLessThanEqualOp);
+    DISPATCH(CmpIPredicate::sgt, spirv::SGreaterThanOp);
+    DISPATCH(CmpIPredicate::sge, spirv::SGreaterThanEqualOp);
 
 #undef DISPATCH
 
-    default:
-      break;
-    }
+  default:
+    break;
+  }
+  return matchFailure();
+}
+
+//===----------------------------------------------------------------------===//
+// LoadOp
+//===----------------------------------------------------------------------===//
+
+PatternMatchResult
+LoadOpConversion::matchAndRewrite(LoadOp loadOp, ArrayRef<Value *> operands,
+                                  ConversionPatternRewriter &rewriter) const {
+  LoadOpOperandAdaptor loadOperands(operands);
+  auto loadPtr = getElementPtr(rewriter, typeConverter, loadOp.getLoc(),
+                               loadOp.memref()->getType().cast<MemRefType>(),
+                               loadOperands.memref(), loadOperands.indices());
+  rewriter.replaceOpWithNewOp<spirv::LoadOp>(loadOp, loadPtr,
+                                             /*memory_access =*/nullptr,
+                                             /*alignment =*/nullptr);
+  return matchSuccess();
+}
+
+//===----------------------------------------------------------------------===//
+// ReturnOp
+//===----------------------------------------------------------------------===//
+
+PatternMatchResult
+ReturnOpConversion::matchAndRewrite(ReturnOp returnOp,
+                                    ArrayRef<Value *> operands,
+                                    ConversionPatternRewriter &rewriter) const {
+  if (returnOp.getNumOperands()) {
     return matchFailure();
   }
-};
+  rewriter.replaceOpWithNewOp<spirv::ReturnOp>(returnOp);
+  return matchSuccess();
+}
 
-/// Convert integer binary operations to SPIR-V operations. Cannot use
-/// tablegen for this. If the integer operation is on variables of IndexType,
-/// the type of the return value of the replacement operation differs from
-/// that of the replaced operation. This is not handled in tablegen-based
-/// pattern specification.
-// TODO(ravishankarm) : This should be moved into DRR.
-template <typename StdOp, typename SPIRVOp>
-class IntegerOpConversion final : public SPIRVOpLowering<StdOp> {
-public:
-  using SPIRVOpLowering<StdOp>::SPIRVOpLowering;
+//===----------------------------------------------------------------------===//
+// SelectOp
+//===----------------------------------------------------------------------===//
 
-  PatternMatchResult
-  matchAndRewrite(StdOp operation, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto resultType =
-        this->typeConverter.convertType(operation.getResult()->getType());
-    rewriter.template replaceOpWithNewOp<SPIRVOp>(
-        operation, resultType, operands, ArrayRef<NamedAttribute>());
-    return this->matchSuccess();
-  }
-};
+PatternMatchResult
+SelectOpConversion::matchAndRewrite(SelectOp op, ArrayRef<Value *> operands,
+                                    ConversionPatternRewriter &rewriter) const {
+  SelectOpOperandAdaptor selectOperands(operands);
+  rewriter.replaceOpWithNewOp<spirv::SelectOp>(op, selectOperands.condition(),
+                                               selectOperands.true_value(),
+                                               selectOperands.false_value());
+  return matchSuccess();
+}
 
-/// Convert load -> spv.LoadOp. The operands of the replaced operation are of
-/// IndexType while that of the replacement operation are of type i32. This is
-/// not supported in tablegen based pattern specification.
-// TODO(ravishankarm) : This should be moved into DRR.
-class LoadOpConversion final : public SPIRVOpLowering<LoadOp> {
-public:
-  using SPIRVOpLowering<LoadOp>::SPIRVOpLowering;
+//===----------------------------------------------------------------------===//
+// StoreOp
+//===----------------------------------------------------------------------===//
 
-  PatternMatchResult
-  matchAndRewrite(LoadOp loadOp, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    LoadOpOperandAdaptor loadOperands(operands);
-    auto loadPtr = getElementPtr(rewriter, typeConverter, loadOp.getLoc(),
-                                 loadOp.memref()->getType().cast<MemRefType>(),
-                                 loadOperands.memref(), loadOperands.indices());
-    rewriter.replaceOpWithNewOp<spirv::LoadOp>(loadOp, loadPtr,
-                                               /*memory_access =*/nullptr,
-                                               /*alignment =*/nullptr);
-    return matchSuccess();
-  }
-};
-
-/// Convert return -> spv.Return.
-// TODO(ravishankarm) : This should be moved into DRR.
-class ReturnToSPIRVConversion final : public SPIRVOpLowering<ReturnOp> {
-public:
-  using SPIRVOpLowering<ReturnOp>::SPIRVOpLowering;
-
-  PatternMatchResult
-  matchAndRewrite(ReturnOp returnOp, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    if (returnOp.getNumOperands()) {
-      return matchFailure();
-    }
-    rewriter.replaceOpWithNewOp<spirv::ReturnOp>(returnOp);
-    return matchSuccess();
-  }
-};
-
-/// Convert select -> spv.Select
-// TODO(ravishankarm) : This should be moved into DRR.
-class SelectOpConversion final : public SPIRVOpLowering<SelectOp> {
-public:
-  using SPIRVOpLowering<SelectOp>::SPIRVOpLowering;
-  PatternMatchResult
-  matchAndRewrite(SelectOp op, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    SelectOpOperandAdaptor selectOperands(operands);
-    rewriter.replaceOpWithNewOp<spirv::SelectOp>(op, selectOperands.condition(),
-                                                 selectOperands.true_value(),
-                                                 selectOperands.false_value());
-    return matchSuccess();
-  }
-};
-
-/// Convert store -> spv.StoreOp. The operands of the replaced operation are
-/// of IndexType while that of the replacement operation are of type i32. This
-/// is not supported in tablegen based pattern specification.
-// TODO(ravishankarm) : This should be moved into DRR.
-class StoreOpConversion final : public SPIRVOpLowering<StoreOp> {
-public:
-  using SPIRVOpLowering<StoreOp>::SPIRVOpLowering;
-
-  PatternMatchResult
-  matchAndRewrite(StoreOp storeOp, ArrayRef<Value *> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    StoreOpOperandAdaptor storeOperands(operands);
-    auto storePtr =
-        getElementPtr(rewriter, typeConverter, storeOp.getLoc(),
-                      storeOp.memref()->getType().cast<MemRefType>(),
-                      storeOperands.memref(), storeOperands.indices());
-    rewriter.replaceOpWithNewOp<spirv::StoreOp>(storeOp, storePtr,
-                                                storeOperands.value(),
-                                                /*memory_access =*/nullptr,
-                                                /*alignment =*/nullptr);
-    return matchSuccess();
-  }
-};
-
-} // namespace
+PatternMatchResult
+StoreOpConversion::matchAndRewrite(StoreOp storeOp, ArrayRef<Value *> operands,
+                                   ConversionPatternRewriter &rewriter) const {
+  StoreOpOperandAdaptor storeOperands(operands);
+  auto storePtr =
+      getElementPtr(rewriter, typeConverter, storeOp.getLoc(),
+                    storeOp.memref()->getType().cast<MemRefType>(),
+                    storeOperands.memref(), storeOperands.indices());
+  rewriter.replaceOpWithNewOp<spirv::StoreOp>(storeOp, storePtr,
+                                              storeOperands.value(),
+                                              /*memory_access =*/nullptr,
+                                              /*alignment =*/nullptr);
+  return matchSuccess();
+}
 
 namespace {
 /// Import the Standard Ops to SPIR-V Patterns.
@@ -264,14 +313,13 @@ void populateStandardToSPIRVPatterns(MLIRContext *context,
                                      OwningRewritePatternList &patterns) {
   // Add patterns that lower operations into SPIR-V dialect.
   populateWithGenerated(context, &patterns);
-  patterns
-      .insert<ConstantIndexOpConversion, CmpIOpConversion,
-              IntegerOpConversion<AddIOp, spirv::IAddOp>,
-              IntegerOpConversion<MulIOp, spirv::IMulOp>,
-              IntegerOpConversion<DivISOp, spirv::SDivOp>,
-              IntegerOpConversion<RemISOp, spirv::SModOp>,
-              IntegerOpConversion<SubIOp, spirv::ISubOp>, LoadOpConversion,
-              ReturnToSPIRVConversion, SelectOpConversion, StoreOpConversion>(
-          context, typeConverter);
+  patterns.insert<ConstantIndexOpConversion, CmpIOpConversion,
+                  IntegerOpConversion<AddIOp, spirv::IAddOp>,
+                  IntegerOpConversion<MulIOp, spirv::IMulOp>,
+                  IntegerOpConversion<DivISOp, spirv::SDivOp>,
+                  IntegerOpConversion<RemISOp, spirv::SModOp>,
+                  IntegerOpConversion<SubIOp, spirv::ISubOp>, LoadOpConversion,
+                  ReturnOpConversion, SelectOpConversion, StoreOpConversion>(
+      context, typeConverter);
 }
 } // namespace mlir
diff --git a/third_party/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp b/third_party/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp
index 1e8afbf43e1..4469c2802a8 100644
--- a/third_party/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp
+++ b/third_party/mlir/lib/Conversion/StandardToSPIRV/LegalizeStandardForSPIRV.cpp
@@ -68,7 +68,7 @@ public:
 ///          memref<12x42xf32>
 static LogicalResult
 resolveSourceIndices(Location loc, PatternRewriter &rewriter,
-                     SubViewOp subViewOp, ArrayRef<Value *> indices,
+                     SubViewOp subViewOp, ValueRange indices,
                      SmallVectorImpl<Value *> &sourceIndices) {
   // TODO: Aborting when the offsets are static. There might be a way to fold
   // the subview op with load even if the offsets have been canonicalized
@@ -76,7 +76,7 @@ resolveSourceIndices(Location loc, PatternRewriter &rewriter,
   if (subViewOp.getNumOffsets() == 0)
     return failure();
 
-  SmallVector<Value *, 2> opOffsets = llvm::to_vector<2>(subViewOp.offsets());
+  ValueRange opOffsets = subViewOp.offsets();
   SmallVector<Value *, 2> opStrides;
   if (subViewOp.getNumStrides()) {
     // If the strides are dynamic, get the stride operands.
@@ -102,7 +102,7 @@ resolveSourceIndices(Location loc, PatternRewriter &rewriter,
   // subview_offset.
   assert(indices.size() == opStrides.size());
   sourceIndices.resize(indices.size());
-  for (auto index : enumerate(indices)) {
+  for (auto index : llvm::enumerate(indices)) {
     auto offset = opOffsets[index.index()];
     auto stride = opStrides[index.index()];
     auto mul = rewriter.create<MulIOp>(loc, index.value(), stride);
@@ -124,10 +124,9 @@ LoadOpOfSubViewFolder::matchAndRewrite(LoadOp loadOp,
   if (!subViewOp) {
     return matchFailure();
   }
-  SmallVector<Value *, 4> sourceIndices,
-      indices = llvm::to_vector<4>(loadOp.indices());
-  if (failed(resolveSourceIndices(loadOp.getLoc(), rewriter, subViewOp, indices,
-                                  sourceIndices)))
+  SmallVector<Value *, 4> sourceIndices;
+  if (failed(resolveSourceIndices(loadOp.getLoc(), rewriter, subViewOp,
+                                  loadOp.indices(), sourceIndices)))
     return matchFailure();
 
   rewriter.replaceOpWithNewOp<LoadOp>(loadOp, subViewOp.source(),
@@ -147,10 +146,9 @@ StoreOpOfSubViewFolder::matchAndRewrite(StoreOp storeOp,
   if (!subViewOp) {
     return matchFailure();
   }
-  SmallVector<Value *, 4> sourceIndices,
-      indices = llvm::to_vector<4>(storeOp.indices());
+  SmallVector<Value *, 4> sourceIndices;
   if (failed(resolveSourceIndices(storeOp.getLoc(), rewriter, subViewOp,
-                                  indices, sourceIndices)))
+                                  storeOp.indices(), sourceIndices)))
     return matchFailure();
 
   rewriter.replaceOpWithNewOp<StoreOp>(storeOp, storeOp.value(),
diff --git a/third_party/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/third_party/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
index 7221998ce25..8adc415f820 100644
--- a/third_party/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
+++ b/third_party/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
@@ -49,19 +49,204 @@ static LLVM::LLVMType getPtrToElementType(T containerType,
       .getPointerTo();
 }
 
+class VectorBroadcastOpConversion : public LLVMOpLowering {
+public:
+  explicit VectorBroadcastOpConversion(MLIRContext *context,
+                                       LLVMTypeConverter &typeConverter)
+      : LLVMOpLowering(vector::BroadcastOp::getOperationName(), context,
+                       typeConverter) {}
+
+  PatternMatchResult
+  matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto broadcastOp = cast<vector::BroadcastOp>(op);
+    VectorType dstVectorType = broadcastOp.getVectorType();
+    if (lowering.convertType(dstVectorType) == nullptr)
+      return matchFailure();
+    // Rewrite when the full vector type can be lowered (which
+    // implies all 'reduced' types can be lowered too).
+    VectorType srcVectorType =
+        broadcastOp.getSourceType().dyn_cast<VectorType>();
+    rewriter.replaceOp(
+        op, expandRanks(operands[0],  // source value to be expanded
+                        op->getLoc(), // location of original broadcast
+                        srcVectorType, dstVectorType, rewriter));
+    return matchSuccess();
+  }
+
+private:
+  // Expands the given source value over all the ranks, as defined
+  // by the source and destination type (a null source type denotes
+  // expansion from a scalar value into a vector).
+  //
+  // TODO(ajcbik): consider replacing this one-pattern lowering
+  //               with a two-pattern lowering using other vector
+  //               ops once all insert/extract/shuffle operations
+  //               are available with lowering implemention.
+  //
+  Value *expandRanks(Value *value, Location loc, VectorType srcVectorType,
+                     VectorType dstVectorType,
+                     ConversionPatternRewriter &rewriter) const {
+    assert((dstVectorType != nullptr) && "invalid result type in broadcast");
+    // Determine rank of source and destination.
+    int64_t srcRank = srcVectorType ? srcVectorType.getRank() : 0;
+    int64_t dstRank = dstVectorType.getRank();
+    int64_t curDim = dstVectorType.getDimSize(0);
+    if (srcRank < dstRank)
+      // Duplicate this rank.
+      return duplicateOneRank(value, loc, srcVectorType, dstVectorType, dstRank,
+                              curDim, rewriter);
+    // If all trailing dimensions are the same, the broadcast consists of
+    // simply passing through the source value and we are done. Otherwise,
+    // any non-matching dimension forces a stretch along this rank.
+    assert((srcVectorType != nullptr) && (srcRank > 0) &&
+           (srcRank == dstRank) && "invalid rank in broadcast");
+    for (int64_t r = 0; r < dstRank; r++) {
+      if (srcVectorType.getDimSize(r) != dstVectorType.getDimSize(r)) {
+        return stretchOneRank(value, loc, srcVectorType, dstVectorType, dstRank,
+                              curDim, rewriter);
+      }
+    }
+    return value;
+  }
+
+  // Picks the best way to duplicate a single rank. For the 1-D case, a
+  // single insert-elt/shuffle is the most efficient expansion. For higher
+  // dimensions, however, we need dim x insert-values on a new broadcast
+  // with one less leading dimension, which will be lowered "recursively"
+  // to matching LLVM IR.
+  // For example:
+  //   v = broadcast s : f32 to vector<4x2xf32>
+  // becomes:
+  //   x = broadcast s : f32 to vector<2xf32>
+  //   v = [x,x,x,x]
+  // becomes:
+  //   x = [s,s]
+  //   v = [x,x,x,x]
+  Value *duplicateOneRank(Value *value, Location loc, VectorType srcVectorType,
+                          VectorType dstVectorType, int64_t rank, int64_t dim,
+                          ConversionPatternRewriter &rewriter) const {
+    Type llvmType = lowering.convertType(dstVectorType);
+    assert((llvmType != nullptr) && "unlowerable vector type");
+    if (rank == 1) {
+      Value *undef = rewriter.create<LLVM::UndefOp>(loc, llvmType);
+      Value *expand = insertOne(undef, value, loc, llvmType, rank, 0, rewriter);
+      SmallVector<int32_t, 4> zeroValues(dim, 0);
+      return rewriter.create<LLVM::ShuffleVectorOp>(
+          loc, expand, undef, rewriter.getI32ArrayAttr(zeroValues));
+    }
+    Value *expand = expandRanks(value, loc, srcVectorType,
+                                reducedVectorType(dstVectorType), rewriter);
+    Value *result = rewriter.create<LLVM::UndefOp>(loc, llvmType);
+    for (int64_t d = 0; d < dim; ++d) {
+      result = insertOne(result, expand, loc, llvmType, rank, d, rewriter);
+    }
+    return result;
+  }
+
+  // Picks the best way to stretch a single rank. For the 1-D case, a
+  // single insert-elt/shuffle is the most efficient expansion when at
+  // a stretch. Otherwise, every dimension needs to be expanded
+  // individually and individually inserted in the resulting vector.
+  // For example:
+  //   v = broadcast w : vector<4x1x2xf32> to vector<4x2x2xf32>
+  // becomes:
+  //   a = broadcast w[0] : vector<1x2xf32> to vector<2x2xf32>
+  //   b = broadcast w[1] : vector<1x2xf32> to vector<2x2xf32>
+  //   c = broadcast w[2] : vector<1x2xf32> to vector<2x2xf32>
+  //   d = broadcast w[3] : vector<1x2xf32> to vector<2x2xf32>
+  //   v = [a,b,c,d]
+  // becomes:
+  //   x = broadcast w[0][0] : vector<2xf32> to vector <2x2xf32>
+  //   y = broadcast w[1][0] : vector<2xf32> to vector <2x2xf32>
+  //   a = [x, y]
+  //   etc.
+  Value *stretchOneRank(Value *value, Location loc, VectorType srcVectorType,
+                        VectorType dstVectorType, int64_t rank, int64_t dim,
+                        ConversionPatternRewriter &rewriter) const {
+    Type llvmType = lowering.convertType(dstVectorType);
+    assert((llvmType != nullptr) && "unlowerable vector type");
+    Value *result = rewriter.create<LLVM::UndefOp>(loc, llvmType);
+    bool atStretch = dim != srcVectorType.getDimSize(0);
+    if (rank == 1) {
+      Type redLlvmType = lowering.convertType(dstVectorType.getElementType());
+      if (atStretch) {
+        Value *one = extractOne(value, loc, redLlvmType, rank, 0, rewriter);
+        Value *expand =
+            insertOne(result, one, loc, llvmType, rank, 0, rewriter);
+        SmallVector<int32_t, 4> zeroValues(dim, 0);
+        return rewriter.create<LLVM::ShuffleVectorOp>(
+            loc, expand, result, rewriter.getI32ArrayAttr(zeroValues));
+      }
+      for (int64_t d = 0; d < dim; ++d) {
+        Value *one = extractOne(value, loc, redLlvmType, rank, d, rewriter);
+        result = insertOne(result, one, loc, llvmType, rank, d, rewriter);
+      }
+    } else {
+      VectorType redSrcType = reducedVectorType(srcVectorType);
+      VectorType redDstType = reducedVectorType(dstVectorType);
+      Type redLlvmType = lowering.convertType(redSrcType);
+      for (int64_t d = 0; d < dim; ++d) {
+        int64_t pos = atStretch ? 0 : d;
+        Value *one = extractOne(value, loc, redLlvmType, rank, pos, rewriter);
+        Value *expand = expandRanks(one, loc, redSrcType, redDstType, rewriter);
+        result = insertOne(result, expand, loc, llvmType, rank, d, rewriter);
+      }
+    }
+    return result;
+  }
+
+  // Picks the proper sequence for inserting.
+  Value *insertOne(Value *val1, Value *val2, Location loc, Type llvmType,
+                   int64_t rank, int64_t pos,
+                   ConversionPatternRewriter &rewriter) const {
+    if (rank == 1) {
+      auto idxType = rewriter.getIndexType();
+      auto constant = rewriter.create<LLVM::ConstantOp>(
+          loc, lowering.convertType(idxType),
+          rewriter.getIntegerAttr(idxType, pos));
+      return rewriter.create<LLVM::InsertElementOp>(loc, llvmType, val1, val2,
+                                                    constant);
+    }
+    return rewriter.create<LLVM::InsertValueOp>(loc, llvmType, val1, val2,
+                                                rewriter.getI64ArrayAttr(pos));
+  }
+
+  // Picks the proper sequence for extracting.
+  Value *extractOne(Value *value, Location loc, Type llvmType, int64_t rank,
+                    int64_t pos, ConversionPatternRewriter &rewriter) const {
+    if (rank == 1) {
+      auto idxType = rewriter.getIndexType();
+      auto constant = rewriter.create<LLVM::ConstantOp>(
+          loc, lowering.convertType(idxType),
+          rewriter.getIntegerAttr(idxType, pos));
+      return rewriter.create<LLVM::ExtractElementOp>(loc, llvmType, value,
+                                                     constant);
+    }
+    return rewriter.create<LLVM::ExtractValueOp>(loc, llvmType, value,
+                                                 rewriter.getI64ArrayAttr(pos));
+  }
+
+  // Helper to reduce vector type by one rank.
+  static VectorType reducedVectorType(VectorType tp) {
+    assert((tp.getRank() > 1) && "unlowerable vector type");
+    return VectorType::get(tp.getShape().drop_front(), tp.getElementType());
+  }
+};
+
 class VectorExtractElementOpConversion : public LLVMOpLowering {
 public:
   explicit VectorExtractElementOpConversion(MLIRContext *context,
                                             LLVMTypeConverter &typeConverter)
-      : LLVMOpLowering(vector::ExtractElementOp::getOperationName(), context,
+      : LLVMOpLowering(vector::ExtractOp::getOperationName(), context,
                        typeConverter) {}
 
   PatternMatchResult
   matchAndRewrite(Operation *op, ArrayRef<Value *> operands,
                   ConversionPatternRewriter &rewriter) const override {
     auto loc = op->getLoc();
-    auto adaptor = vector::ExtractElementOpOperandAdaptor(operands);
-    auto extractOp = cast<vector::ExtractElementOp>(op);
+    auto adaptor = vector::ExtractOpOperandAdaptor(operands);
+    auto extractOp = cast<vector::ExtractOp>(op);
     auto vectorType = extractOp.vector()->getType().cast<VectorType>();
     auto resultType = extractOp.getResult()->getType();
     auto llvmResultType = lowering.convertType(resultType);
@@ -246,7 +431,7 @@ public:
 /// Populate the given list with patterns that convert from Vector to LLVM.
 void mlir::populateVectorToLLVMConversionPatterns(
     LLVMTypeConverter &converter, OwningRewritePatternList &patterns) {
-  patterns.insert<VectorExtractElementOpConversion,
+  patterns.insert<VectorBroadcastOpConversion, VectorExtractElementOpConversion,
                   VectorOuterProductOpConversion, VectorTypeCastOpConversion>(
       converter.getDialect()->getContext(), converter);
 }
diff --git a/third_party/mlir/lib/Dialect/AffineOps/AffineOps.cpp b/third_party/mlir/lib/Dialect/AffineOps/AffineOps.cpp
index 2cd357fd541..59e5afec9ce 100644
--- a/third_party/mlir/lib/Dialect/AffineOps/AffineOps.cpp
+++ b/third_party/mlir/lib/Dialect/AffineOps/AffineOps.cpp
@@ -173,7 +173,8 @@ static bool isDimOpValidSymbol(DimOp dimOp) {
 
 // Value can be used as a symbol if it is a constant, or it is defined at
 // the top level, or it is a result of affine apply operation with symbol
-// arguments.
+// arguments, or a result of the dim op on a memref satisfying certain
+// constraints.
 bool mlir::isValidSymbol(Value *value) {
   // The value must be an index type.
   if (!value->getType().isIndex())
@@ -227,7 +228,7 @@ verifyDimAndSymbolIdentifiers(OpTy &op, Operation::operand_range operands,
 //===----------------------------------------------------------------------===//
 
 void AffineApplyOp::build(Builder *builder, OperationState &result,
-                          AffineMap map, ArrayRef<Value *> operands) {
+                          AffineMap map, ValueRange operands) {
   result.addOperands(operands);
   result.types.append(map.getNumResults(), builder->getIndexType());
   result.addAttribute("map", AffineMapAttr::get(map));
@@ -747,6 +748,8 @@ template <typename AffineOpTy>
 struct SimplifyAffineOp : public OpRewritePattern<AffineOpTy> {
   using OpRewritePattern<AffineOpTy>::OpRewritePattern;
 
+  /// Replace the affine op with another instance of it with the supplied
+  /// map and mapOperands.
   void replaceAffineOp(PatternRewriter &rewriter, AffineOpTy affineOp,
                        AffineMap map, ArrayRef<Value *> mapOperands) const;
 
@@ -838,10 +841,10 @@ struct MemRefCastFolder : public RewritePattern {
 // TODO(b/133776335) Check that map operands are loop IVs or symbols.
 void AffineDmaStartOp::build(Builder *builder, OperationState &result,
                              Value *srcMemRef, AffineMap srcMap,
-                             ArrayRef<Value *> srcIndices, Value *destMemRef,
-                             AffineMap dstMap, ArrayRef<Value *> destIndices,
+                             ValueRange srcIndices, Value *destMemRef,
+                             AffineMap dstMap, ValueRange destIndices,
                              Value *tagMemRef, AffineMap tagMap,
-                             ArrayRef<Value *> tagIndices, Value *numElements,
+                             ValueRange tagIndices, Value *numElements,
                              Value *stride, Value *elementsPerStride) {
   result.addOperands(srcMemRef);
   result.addAttribute(getSrcMapAttrName(), AffineMapAttr::get(srcMap));
@@ -860,14 +863,11 @@ void AffineDmaStartOp::build(Builder *builder, OperationState &result,
 
 void AffineDmaStartOp::print(OpAsmPrinter &p) {
   p << "affine.dma_start " << *getSrcMemRef() << '[';
-  SmallVector<Value *, 8> operands(getSrcIndices());
-  p.printAffineMapOfSSAIds(getSrcMapAttr(), operands);
+  p.printAffineMapOfSSAIds(getSrcMapAttr(), getSrcIndices());
   p << "], " << *getDstMemRef() << '[';
-  operands.assign(getDstIndices().begin(), getDstIndices().end());
-  p.printAffineMapOfSSAIds(getDstMapAttr(), operands);
+  p.printAffineMapOfSSAIds(getDstMapAttr(), getDstIndices());
   p << "], " << *getTagMemRef() << '[';
-  operands.assign(getTagIndices().begin(), getTagIndices().end());
-  p.printAffineMapOfSSAIds(getTagMapAttr(), operands);
+  p.printAffineMapOfSSAIds(getTagMapAttr(), getTagIndices());
   p << "], " << *getNumElements();
   if (isStrided()) {
     p << ", " << *getStride();
@@ -1010,7 +1010,7 @@ void AffineDmaStartOp::getCanonicalizationPatterns(
 // TODO(b/133776335) Check that map operands are loop IVs or symbols.
 void AffineDmaWaitOp::build(Builder *builder, OperationState &result,
                             Value *tagMemRef, AffineMap tagMap,
-                            ArrayRef<Value *> tagIndices, Value *numElements) {
+                            ValueRange tagIndices, Value *numElements) {
   result.addOperands(tagMemRef);
   result.addAttribute(getTagMapAttrName(), AffineMapAttr::get(tagMap));
   result.addOperands(tagIndices);
@@ -1084,9 +1084,8 @@ void AffineDmaWaitOp::getCanonicalizationPatterns(
 //===----------------------------------------------------------------------===//
 
 void AffineForOp::build(Builder *builder, OperationState &result,
-                        ArrayRef<Value *> lbOperands, AffineMap lbMap,
-                        ArrayRef<Value *> ubOperands, AffineMap ubMap,
-                        int64_t step) {
+                        ValueRange lbOperands, AffineMap lbMap,
+                        ValueRange ubOperands, AffineMap ubMap, int64_t step) {
   assert(((!lbMap && lbOperands.empty()) ||
           lbOperands.size() == lbMap.getNumInputs()) &&
          "lower bound operand count does not match the affine map");
@@ -1479,7 +1478,7 @@ AffineBound AffineForOp::getUpperBound() {
                      ubMap);
 }
 
-void AffineForOp::setLowerBound(ArrayRef<Value *> lbOperands, AffineMap map) {
+void AffineForOp::setLowerBound(ValueRange lbOperands, AffineMap map) {
   assert(lbOperands.size() == map.getNumInputs());
   assert(map.getNumResults() >= 1 && "bound map has at least one result");
 
@@ -1492,7 +1491,7 @@ void AffineForOp::setLowerBound(ArrayRef<Value *> lbOperands, AffineMap map) {
   setAttr(getLowerBoundAttrName(), AffineMapAttr::get(map));
 }
 
-void AffineForOp::setUpperBound(ArrayRef<Value *> ubOperands, AffineMap map) {
+void AffineForOp::setUpperBound(ValueRange ubOperands, AffineMap map) {
   assert(ubOperands.size() == map.getNumInputs());
   assert(map.getNumResults() >= 1 && "bound map has at least one result");
 
@@ -1718,13 +1717,13 @@ void AffineIfOp::setIntegerSet(IntegerSet newSet) {
   setAttr(getConditionAttrName(), IntegerSetAttr::get(newSet));
 }
 
-void AffineIfOp::setConditional(IntegerSet set, ArrayRef<Value *> operands) {
+void AffineIfOp::setConditional(IntegerSet set, ValueRange operands) {
   setIntegerSet(set);
   getOperation()->setOperands(operands);
 }
 
 void AffineIfOp::build(Builder *builder, OperationState &result, IntegerSet set,
-                       ArrayRef<Value *> args, bool withElseRegion) {
+                       ValueRange args, bool withElseRegion) {
   result.addOperands(args);
   result.addAttribute(getConditionAttrName(), IntegerSetAttr::get(set));
   Region *thenRegion = result.addRegion();
@@ -1772,7 +1771,7 @@ void AffineIfOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
 //===----------------------------------------------------------------------===//
 
 void AffineLoadOp::build(Builder *builder, OperationState &result,
-                         AffineMap map, ArrayRef<Value *> operands) {
+                         AffineMap map, ValueRange operands) {
   assert(operands.size() == 1 + map.getNumInputs() && "inconsistent operands");
   result.addOperands(operands);
   if (map)
@@ -1782,8 +1781,7 @@ void AffineLoadOp::build(Builder *builder, OperationState &result,
 }
 
 void AffineLoadOp::build(Builder *builder, OperationState &result,
-                         Value *memref, AffineMap map,
-                         ArrayRef<Value *> mapOperands) {
+                         Value *memref, AffineMap map, ValueRange mapOperands) {
   assert(map.getNumInputs() == mapOperands.size() && "inconsistent index info");
   result.addOperands(memref);
   result.addOperands(mapOperands);
@@ -1793,7 +1791,7 @@ void AffineLoadOp::build(Builder *builder, OperationState &result,
 }
 
 void AffineLoadOp::build(Builder *builder, OperationState &result,
-                         Value *memref, ArrayRef<Value *> indices) {
+                         Value *memref, ValueRange indices) {
   auto memrefType = memref->getType().cast<MemRefType>();
   auto rank = memrefType.getRank();
   // Create identity map for memrefs with at least one dimension or () -> ()
@@ -1824,11 +1822,8 @@ ParseResult AffineLoadOp::parse(OpAsmParser &parser, OperationState &result) {
 
 void AffineLoadOp::print(OpAsmPrinter &p) {
   p << "affine.load " << *getMemRef() << '[';
-  AffineMapAttr mapAttr = getAttrOfType<AffineMapAttr>(getMapAttrName());
-  if (mapAttr) {
-    SmallVector<Value *, 2> operands(getMapOperands());
-    p.printAffineMapOfSSAIds(mapAttr, operands);
-  }
+  if (AffineMapAttr mapAttr = getAttrOfType<AffineMapAttr>(getMapAttrName()))
+    p.printAffineMapOfSSAIds(mapAttr, getMapOperands());
   p << ']';
   p.printOptionalAttrDict(getAttrs(), /*elidedAttrs=*/{getMapAttrName()});
   p << " : " << getMemRefType();
@@ -1874,7 +1869,7 @@ void AffineLoadOp::getCanonicalizationPatterns(
 
 void AffineStoreOp::build(Builder *builder, OperationState &result,
                           Value *valueToStore, Value *memref, AffineMap map,
-                          ArrayRef<Value *> mapOperands) {
+                          ValueRange mapOperands) {
   assert(map.getNumInputs() == mapOperands.size() && "inconsistent index info");
   result.addOperands(valueToStore);
   result.addOperands(memref);
@@ -1885,7 +1880,7 @@ void AffineStoreOp::build(Builder *builder, OperationState &result,
 // Use identity map.
 void AffineStoreOp::build(Builder *builder, OperationState &result,
                           Value *valueToStore, Value *memref,
-                          ArrayRef<Value *> indices) {
+                          ValueRange indices) {
   auto memrefType = memref->getType().cast<MemRefType>();
   auto rank = memrefType.getRank();
   // Create identity map for memrefs with at least one dimension or () -> ()
@@ -1919,11 +1914,8 @@ ParseResult AffineStoreOp::parse(OpAsmParser &parser, OperationState &result) {
 void AffineStoreOp::print(OpAsmPrinter &p) {
   p << "affine.store " << *getValueToStore();
   p << ", " << *getMemRef() << '[';
-  AffineMapAttr mapAttr = getAttrOfType<AffineMapAttr>(getMapAttrName());
-  if (mapAttr) {
-    SmallVector<Value *, 2> operands(getMapOperands());
-    p.printAffineMapOfSSAIds(mapAttr, operands);
-  }
+  if (AffineMapAttr mapAttr = getAttrOfType<AffineMapAttr>(getMapAttrName()))
+    p.printAffineMapOfSSAIds(mapAttr, getMapOperands());
   p << ']';
   p.printOptionalAttrDict(getAttrs(), /*elidedAttrs=*/{getMapAttrName()});
   p << " : " << getMemRefType();
@@ -2039,5 +2031,9 @@ OpFoldResult AffineMinOp::fold(ArrayRef<Attribute> operands) {
   return results[minIndex];
 }
 
+//===----------------------------------------------------------------------===//
+// TableGen'd op method definitions
+//===----------------------------------------------------------------------===//
+
 #define GET_OP_CLASSES
 #include "mlir/Dialect/AffineOps/AffineOps.cpp.inc"
diff --git a/third_party/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/third_party/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 8d84fadae8a..05dcd90ea45 100644
--- a/third_party/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/third_party/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -46,7 +46,7 @@ bool GPUDialect::isKernel(Operation *op) {
 
 GPUDialect::GPUDialect(MLIRContext *context)
     : Dialect(getDialectName(), context) {
-  addOperations<LaunchOp, LaunchFuncOp, GPUFuncOp,
+  addOperations<LaunchFuncOp,
 #define GET_OP_LIST
 #include "mlir/Dialect/GPU/GPUOps.cpp.inc"
                 >();
@@ -165,19 +165,11 @@ static LogicalResult verifyAllReduce(gpu::AllReduceOp allReduce) {
   return success();
 }
 
-// Namespace avoids ambiguous ReturnOpOperandAdaptor.
-namespace mlir {
-namespace gpu {
-#define GET_OP_CLASSES
-#include "mlir/Dialect/GPU/GPUOps.cpp.inc"
-} // namespace gpu
-} // namespace mlir
-
 //===----------------------------------------------------------------------===//
 // LaunchOp
 //===----------------------------------------------------------------------===//
 
-static SmallVector<Type, 4> getValueTypes(ArrayRef<Value *> values) {
+static SmallVector<Type, 4> getValueTypes(ValueRange values) {
   SmallVector<Type, 4> types;
   types.reserve(values.size());
   for (Value *v : values)
@@ -188,7 +180,7 @@ static SmallVector<Type, 4> getValueTypes(ArrayRef<Value *> values) {
 void LaunchOp::build(Builder *builder, OperationState &result, Value *gridSizeX,
                      Value *gridSizeY, Value *gridSizeZ, Value *blockSizeX,
                      Value *blockSizeY, Value *blockSizeZ,
-                     ArrayRef<Value *> operands) {
+                     ValueRange operands) {
   // Add grid and block sizes as op operands, followed by the data operands.
   result.addOperands(
       {gridSizeX, gridSizeY, gridSizeZ, blockSizeX, blockSizeY, blockSizeZ});
@@ -205,29 +197,27 @@ void LaunchOp::build(Builder *builder, OperationState &result, Value *gridSizeX,
   kernelRegion->push_back(body);
 }
 
-Region &LaunchOp::getBody() { return getOperation()->getRegion(0); }
-
 KernelDim3 LaunchOp::getBlockIds() {
-  assert(!getBody().getBlocks().empty() && "FuncOp body must not be empty.");
-  auto args = getBody().getBlocks().front().getArguments();
+  assert(!body().getBlocks().empty() && "FuncOp body must not be empty.");
+  auto args = body().getBlocks().front().getArguments();
   return KernelDim3{args[0], args[1], args[2]};
 }
 
 KernelDim3 LaunchOp::getThreadIds() {
-  assert(!getBody().getBlocks().empty() && "FuncOp body must not be empty.");
-  auto args = getBody().getBlocks().front().getArguments();
+  assert(!body().getBlocks().empty() && "FuncOp body must not be empty.");
+  auto args = body().getBlocks().front().getArguments();
   return KernelDim3{args[3], args[4], args[5]};
 }
 
 KernelDim3 LaunchOp::getGridSize() {
-  assert(!getBody().getBlocks().empty() && "FuncOp body must not be empty.");
-  auto args = getBody().getBlocks().front().getArguments();
+  assert(!body().getBlocks().empty() && "FuncOp body must not be empty.");
+  auto args = body().getBlocks().front().getArguments();
   return KernelDim3{args[6], args[7], args[8]};
 }
 
 KernelDim3 LaunchOp::getBlockSize() {
-  assert(!getBody().getBlocks().empty() && "FuncOp body must not be empty.");
-  auto args = getBody().getBlocks().front().getArguments();
+  assert(!body().getBlocks().empty() && "FuncOp body must not be empty.");
+  auto args = body().getBlocks().front().getArguments();
   return KernelDim3{args[9], args[10], args[11]};
 }
 
@@ -248,23 +238,24 @@ KernelDim3 LaunchOp::getBlockSizeOperandValues() {
 }
 
 llvm::iterator_range<Block::args_iterator> LaunchOp::getKernelArguments() {
-  auto args = getBody().getBlocks().front().getArguments();
+  auto args = body().getBlocks().front().getArguments();
   return llvm::drop_begin(args, LaunchOp::kNumConfigRegionAttributes);
 }
 
-LogicalResult LaunchOp::verify() {
+LogicalResult verify(LaunchOp op) {
   // Kernel launch takes kNumConfigOperands leading operands for grid/block
   // sizes and transforms them into kNumConfigRegionAttributes region arguments
   // for block/thread identifiers and grid/block sizes.
-  if (!getBody().empty()) {
-    Block &entryBlock = getBody().front();
-    if (entryBlock.getNumArguments() != kNumConfigOperands + getNumOperands())
-      return emitOpError("unexpected number of region arguments");
+  if (!op.body().empty()) {
+    Block &entryBlock = op.body().front();
+    if (entryBlock.getNumArguments() !=
+        LaunchOp::kNumConfigOperands + op.getNumOperands())
+      return op.emitOpError("unexpected number of region arguments");
   }
 
   // Block terminators without successors are expected to exit the kernel region
   // and must be `gpu.launch`.
-  for (Block &block : getBody()) {
+  for (Block &block : op.body()) {
     if (block.empty())
       continue;
     if (block.back().getNumSuccessors() != 0)
@@ -273,8 +264,8 @@ LogicalResult LaunchOp::verify() {
       return block.back()
                  .emitError("expected 'gpu.terminator' or a terminator with "
                             "successors")
-                 .attachNote(getLoc())
-             << "in '" << getOperationName() << "' body region";
+                 .attachNote(op.getLoc())
+             << "in '" << LaunchOp::getOperationName() << "' body region";
     }
   }
 
@@ -286,34 +277,37 @@ LogicalResult LaunchOp::verify() {
 //   (%size-x = %ssa-use, %size-y = %ssa-use, %size-z = %ssa-use)
 // where %size-* and %iter-* will correspond to the body region arguments.
 static void printSizeAssignment(OpAsmPrinter &p, KernelDim3 size,
-                                ArrayRef<Value *> operands, KernelDim3 ids) {
+                                ValueRange operands, KernelDim3 ids) {
   p << '(' << *ids.x << ", " << *ids.y << ", " << *ids.z << ") in (";
   p << *size.x << " = " << *operands[0] << ", ";
   p << *size.y << " = " << *operands[1] << ", ";
   p << *size.z << " = " << *operands[2] << ')';
 }
 
-void LaunchOp::print(OpAsmPrinter &p) {
-  SmallVector<Value *, 12> operandContainer(operand_begin(), operand_end());
-  ArrayRef<Value *> operands(operandContainer);
+void printLaunchOp(OpAsmPrinter &p, LaunchOp op) {
+  ValueRange operands = op.getOperands();
 
   // Print the launch configuration.
-  p << getOperationName() << ' ' << getBlocksKeyword();
-  printSizeAssignment(p, getGridSize(), operands.take_front(3), getBlockIds());
-  p << ' ' << getThreadsKeyword();
-  printSizeAssignment(p, getBlockSize(), operands.slice(3, 3), getThreadIds());
+  p << LaunchOp::getOperationName() << ' ' << op.getBlocksKeyword();
+  printSizeAssignment(p, op.getGridSize(),
+                      operands.drop_back(operands.size() - 3),
+                      op.getBlockIds());
+  p << ' ' << op.getThreadsKeyword();
+  printSizeAssignment(p, op.getBlockSize(), operands.slice(3, 3),
+                      op.getThreadIds());
 
   // From now on, the first kNumConfigOperands operands corresponding to grid
   // and block sizes are irrelevant, so we can drop them.
-  operands = operands.drop_front(kNumConfigOperands);
+  operands = operands.drop_front(LaunchOp::kNumConfigOperands);
 
   // Print the data argument remapping.
-  if (!getBody().empty() && !operands.empty()) {
-    p << ' ' << getArgsKeyword() << '(';
+  if (!op.body().empty() && !operands.empty()) {
+    p << ' ' << op.getArgsKeyword() << '(';
     for (unsigned i = 0, e = operands.size(); i < e; ++i) {
       if (i != 0)
         p << ", ";
-      p << *getBody().front().getArgument(kNumConfigRegionAttributes + i)
+      p << *op.body().front().getArgument(LaunchOp::kNumConfigRegionAttributes +
+                                          i)
         << " = " << *operands[i];
     }
     p << ") ";
@@ -329,8 +323,8 @@ void LaunchOp::print(OpAsmPrinter &p) {
     }
   }
 
-  p.printRegion(getBody(), /*printEntryBlockArgs=*/false);
-  p.printOptionalAttrDict(getAttrs());
+  p.printRegion(op.body(), /*printEntryBlockArgs=*/false);
+  p.printOptionalAttrDict(op.getAttrs());
 }
 
 // Parse the size assignment blocks for blocks and threads.  These have the form
@@ -369,10 +363,10 @@ parseSizeAssignment(OpAsmParser &parser,
 //                             (`args` ssa-reassignment `:` type-list)?
 //                             region attr-dict?
 // ssa-reassignment ::= `(` ssa-id `=` ssa-use (`,` ssa-id `=` ssa-use)* `)`
-ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
+ParseResult parseLaunchOp(OpAsmParser &parser, OperationState &result) {
   // Sizes of the grid and block.
-  SmallVector<OpAsmParser::OperandType, kNumConfigOperands> sizes(
-      kNumConfigOperands);
+  SmallVector<OpAsmParser::OperandType, LaunchOp::kNumConfigOperands> sizes(
+      LaunchOp::kNumConfigOperands);
   MutableArrayRef<OpAsmParser::OperandType> sizesRef(sizes);
 
   // Actual (data) operands passed to the kernel.
@@ -380,7 +374,7 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
 
   // Region arguments to be created.
   SmallVector<OpAsmParser::OperandType, 16> regionArgs(
-      kNumConfigRegionAttributes);
+      LaunchOp::kNumConfigRegionAttributes);
   MutableArrayRef<OpAsmParser::OperandType> regionArgsRef(regionArgs);
 
   // Parse the size assignment segments: the first segment assigns grid sizes
@@ -388,11 +382,11 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
   // sizes and defines values for thread identifiers.  In the region argument
   // list, identifiers precede sizes, and block-related values precede
   // thread-related values.
-  if (parser.parseKeyword(getBlocksKeyword().data()) ||
+  if (parser.parseKeyword(LaunchOp::getBlocksKeyword().data()) ||
       parseSizeAssignment(parser, sizesRef.take_front(3),
                           regionArgsRef.slice(6, 3),
                           regionArgsRef.slice(0, 3)) ||
-      parser.parseKeyword(getThreadsKeyword().data()) ||
+      parser.parseKeyword(LaunchOp::getThreadsKeyword().data()) ||
       parseSizeAssignment(parser, sizesRef.drop_front(3),
                           regionArgsRef.slice(9, 3),
                           regionArgsRef.slice(3, 3)) ||
@@ -405,7 +399,7 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
   // so is the trailing type list.  Parse it as well and use the parsed types
   // to resolve the operands passed to the kernel arguments.
   SmallVector<Type, 4> dataTypes;
-  if (!parser.parseOptionalKeyword(getArgsKeyword())) {
+  if (!parser.parseOptionalKeyword(LaunchOp::getArgsKeyword())) {
     llvm::SMLoc argsLoc = parser.getCurrentLocation();
 
     regionArgs.push_back({});
@@ -433,14 +427,15 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
   // block/thread identifiers and grid/block sizes, all of the `index` type.
   // Follow the actual kernel arguments.
   Type index = parser.getBuilder().getIndexType();
-  dataTypes.insert(dataTypes.begin(), kNumConfigRegionAttributes, index);
+  dataTypes.insert(dataTypes.begin(), LaunchOp::kNumConfigRegionAttributes,
+                   index);
   Region *body = result.addRegion();
   return failure(parser.parseRegion(*body, regionArgs, dataTypes) ||
                  parser.parseOptionalAttrDict(result.attributes));
 }
 
 void LaunchOp::eraseKernelArgument(unsigned index) {
-  Block &entryBlock = getBody().front();
+  Block &entryBlock = body().front();
   assert(index < entryBlock.getNumArguments() - kNumConfigRegionAttributes &&
          "kernel argument index overflow");
   entryBlock.eraseArgument(kNumConfigRegionAttributes + index);
@@ -455,7 +450,7 @@ class PropagateConstantBounds : public OpRewritePattern<LaunchOp> {
   PatternMatchResult matchAndRewrite(LaunchOp launchOp,
                                      PatternRewriter &rewriter) const override {
     auto origInsertionPoint = rewriter.saveInsertionPoint();
-    rewriter.setInsertionPointToStart(&launchOp.getBody().front());
+    rewriter.setInsertionPointToStart(&launchOp.body().front());
 
     // Traverse operands passed to kernel and check if some of them are known
     // constants.  If so, clone the constant operation inside the kernel region
@@ -505,7 +500,7 @@ void LaunchFuncOp::build(Builder *builder, OperationState &result,
                          ::mlir::FuncOp kernelFunc, Value *gridSizeX,
                          Value *gridSizeY, Value *gridSizeZ, Value *blockSizeX,
                          Value *blockSizeY, Value *blockSizeZ,
-                         ArrayRef<Value *> kernelOperands) {
+                         ValueRange kernelOperands) {
   // Add grid and block sizes as op operands, followed by the data operands.
   result.addOperands(
       {gridSizeX, gridSizeY, gridSizeZ, blockSizeX, blockSizeY, blockSizeZ});
@@ -520,8 +515,7 @@ void LaunchFuncOp::build(Builder *builder, OperationState &result,
 
 void LaunchFuncOp::build(Builder *builder, OperationState &result,
                          ::mlir::FuncOp kernelFunc, KernelDim3 gridSize,
-                         KernelDim3 blockSize,
-                         ArrayRef<Value *> kernelOperands) {
+                         KernelDim3 blockSize, ValueRange kernelOperands) {
   build(builder, result, kernelFunc, gridSize.x, gridSize.y, gridSize.z,
         blockSize.x, blockSize.y, blockSize.z, kernelOperands);
 }
@@ -639,7 +633,7 @@ parseAttributions(OpAsmParser &parser, StringRef keyword,
 /// <operation> ::= `gpu.func` symbol-ref-id `(` argument-list `)`
 ///                 (`->` function-result-list)? memory-attribution `kernel`?
 ///                 function-attributes? region
-ParseResult GPUFuncOp::parse(OpAsmParser &parser, OperationState &result) {
+static ParseResult parseGPUFuncOp(OpAsmParser &parser, OperationState &result) {
   SmallVector<OpAsmParser::OperandType, 8> entryArgs;
   SmallVector<SmallVector<NamedAttribute, 2>, 1> argAttrs;
   SmallVector<SmallVector<NamedAttribute, 2>, 1> resultAttrs;
@@ -667,26 +661,26 @@ ParseResult GPUFuncOp::parse(OpAsmParser &parser, OperationState &result) {
   // not to the functiont type.
   Builder &builder = parser.getBuilder();
   auto type = builder.getFunctionType(argTypes, resultTypes);
-  result.addAttribute(getTypeAttrName(), TypeAttr::get(type));
+  result.addAttribute(GPUFuncOp::getTypeAttrName(), TypeAttr::get(type));
 
   // Parse workgroup memory attributions.
-  if (failed(parseAttributions(parser, getWorkgroupKeyword(), entryArgs,
-                               argTypes)))
+  if (failed(parseAttributions(parser, GPUFuncOp::getWorkgroupKeyword(),
+                               entryArgs, argTypes)))
     return failure();
 
   // Store the number of operands we just parsed as the number of workgroup
   // memory attributions.
   unsigned numWorkgroupAttrs = argTypes.size() - type.getNumInputs();
-  result.addAttribute(getNumWorkgroupAttributionsAttrName(),
+  result.addAttribute(GPUFuncOp::getNumWorkgroupAttributionsAttrName(),
                       builder.getI64IntegerAttr(numWorkgroupAttrs));
 
   // Parse private memory attributions.
-  if (failed(
-          parseAttributions(parser, getPrivateKeyword(), entryArgs, argTypes)))
+  if (failed(parseAttributions(parser, GPUFuncOp::getPrivateKeyword(),
+                               entryArgs, argTypes)))
     return failure();
 
   // Parse the kernel attribute if present.
-  if (succeeded(parser.parseOptionalKeyword(getKernelKeyword())))
+  if (succeeded(parser.parseOptionalKeyword(GPUFuncOp::getKernelKeyword())))
     result.addAttribute(GPUDialect::getKernelFuncAttrName(),
                         builder.getUnitAttr());
 
@@ -712,24 +706,25 @@ static void printAttributions(OpAsmPrinter &p, StringRef keyword,
   p << ')';
 }
 
-void GPUFuncOp::print(OpAsmPrinter &p) {
-  p << getOperationName() << ' ';
-  p.printSymbolName(getName());
+/// Prints a GPU Func op.
+void printGPUFuncOp(OpAsmPrinter &p, GPUFuncOp op) {
+  p << GPUFuncOp::getOperationName() << ' ';
+  p.printSymbolName(op.getName());
 
-  FunctionType type = getType();
-  impl::printFunctionSignature(p, this->getOperation(), type.getInputs(),
+  FunctionType type = op.getType();
+  impl::printFunctionSignature(p, op.getOperation(), type.getInputs(),
                                /*isVariadic=*/false, type.getResults());
 
-  printAttributions(p, getWorkgroupKeyword(), getWorkgroupAttributions());
-  printAttributions(p, getPrivateKeyword(), getPrivateAttributions());
-  if (isKernel())
-    p << ' ' << getKernelKeyword();
+  printAttributions(p, op.getWorkgroupKeyword(), op.getWorkgroupAttributions());
+  printAttributions(p, op.getPrivateKeyword(), op.getPrivateAttributions());
+  if (op.isKernel())
+    p << ' ' << op.getKernelKeyword();
 
-  impl::printFunctionAttributes(p, this->getOperation(), type.getNumInputs(),
+  impl::printFunctionAttributes(p, op.getOperation(), type.getNumInputs(),
                                 type.getNumResults(),
-                                {getNumWorkgroupAttributionsAttrName(),
+                                {op.getNumWorkgroupAttributionsAttrName(),
                                  GPUDialect::getKernelFuncAttrName()});
-  p.printRegion(getBody(), /*printEntryBlockArgs=*/false);
+  p.printRegion(op.getBody(), /*printEntryBlockArgs=*/false);
 }
 
 /// Hook for FunctionLike verifier.
@@ -762,3 +757,11 @@ LogicalResult GPUFuncOp::verifyBody() {
 
   return success();
 }
+
+// Namespace avoids ambiguous ReturnOpOperandAdaptor.
+namespace mlir {
+namespace gpu {
+#define GET_OP_CLASSES
+#include "mlir/Dialect/GPU/GPUOps.cpp.inc"
+} // namespace gpu
+} // namespace mlir
diff --git a/third_party/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/third_party/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 235a74ba1c3..b466cc280df 100644
--- a/third_party/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/third_party/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -24,6 +24,7 @@
 #include "mlir/Dialect/StandardOps/Ops.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Builders.h"
+#include "mlir/IR/SymbolTable.h"
 #include "mlir/Pass/Pass.h"
 
 using namespace mlir;
@@ -114,7 +115,7 @@ static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
   std::string kernelFuncName =
       Twine(launchOp.getParentOfType<FuncOp>().getName(), "_kernel").str();
   FuncOp outlinedFunc = FuncOp::create(loc, kernelFuncName, type);
-  outlinedFunc.getBody().takeBody(launchOp.getBody());
+  outlinedFunc.getBody().takeBody(launchOp.body());
   Builder builder(launchOp.getContext());
   outlinedFunc.setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
                        builder.getUnitAttr());
@@ -132,11 +133,9 @@ static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
 // constant region arguments inlined.
 static void convertToLaunchFuncOp(gpu::LaunchOp &launchOp, FuncOp kernelFunc) {
   OpBuilder builder(launchOp);
-  SmallVector<Value *, 4> kernelOperandValues(
-      launchOp.getKernelOperandValues());
   auto launchFuncOp = builder.create<gpu::LaunchFuncOp>(
       launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(),
-      launchOp.getBlockSizeOperandValues(), kernelOperandValues);
+      launchOp.getBlockSizeOperandValues(), launchOp.getKernelOperandValues());
   inlineBeneficiaryOps(kernelFunc, launchFuncOp);
   launchOp.erase();
 }
@@ -155,7 +154,7 @@ namespace {
 class GpuKernelOutliningPass : public ModulePass<GpuKernelOutliningPass> {
 public:
   void runOnModule() override {
-    ModuleManager moduleManager(getModule());
+    SymbolTable symbolTable(getModule());
     bool modified = false;
     for (auto func : getModule().getOps<FuncOp>()) {
       // Insert just after the function.
@@ -166,8 +165,8 @@ public:
         // Create nested module and insert outlinedFunc. The module will
         // originally get the same name as the function, but may be renamed on
         // insertion into the parent module.
-        auto kernelModule = createKernelModule(outlinedFunc, moduleManager);
-        moduleManager.insert(insertPt, kernelModule);
+        auto kernelModule = createKernelModule(outlinedFunc, symbolTable);
+        symbolTable.insert(kernelModule, insertPt);
 
         // Potentially changes signature, pulling in constants.
         convertToLaunchFuncOp(op, outlinedFunc);
@@ -185,16 +184,15 @@ public:
 private:
   // Returns a module containing kernelFunc and all callees (recursive).
   ModuleOp createKernelModule(FuncOp kernelFunc,
-                              const ModuleManager &parentModuleManager) {
+                              const SymbolTable &parentSymbolTable) {
     auto context = getModule().getContext();
     Builder builder(context);
     auto kernelModule =
         ModuleOp::create(builder.getUnknownLoc(), kernelFunc.getName());
     kernelModule.setAttr(gpu::GPUDialect::getKernelModuleAttrName(),
                          builder.getUnitAttr());
-    ModuleManager moduleManager(kernelModule);
-
-    moduleManager.insert(kernelFunc);
+    SymbolTable symbolTable(kernelModule);
+    symbolTable.insert(kernelFunc);
 
     llvm::SmallVector<Operation *, 8> symbolDefWorklist = {kernelFunc};
     while (!symbolDefWorklist.empty()) {
@@ -203,13 +201,13 @@ private:
         for (SymbolTable::SymbolUse symbolUse : *symbolUses) {
           StringRef symbolName =
               symbolUse.getSymbolRef().cast<FlatSymbolRefAttr>().getValue();
-          if (moduleManager.lookupSymbol(symbolName))
+          if (symbolTable.lookup(symbolName))
             continue;
 
           Operation *symbolDefClone =
-              parentModuleManager.lookupSymbol(symbolName)->clone();
+              parentSymbolTable.lookup(symbolName)->clone();
           symbolDefWorklist.push_back(symbolDefClone);
-          moduleManager.insert(symbolDefClone);
+          symbolTable.insert(symbolDefClone);
         }
       }
     }
diff --git a/third_party/mlir/lib/Dialect/LLVMIR/CMakeLists.txt b/third_party/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
index 40bcb572e56..8c53e2dcf33 100644
--- a/third_party/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
+++ b/third_party/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
@@ -5,7 +5,7 @@ add_llvm_library(MLIRLLVMIR
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/LLVMIR
   )
 add_dependencies(MLIRLLVMIR MLIRLLVMOpsIncGen MLIRLLVMConversionsIncGen LLVMAsmParser LLVMCore LLVMSupport)
-target_link_libraries(MLIRLLVMIR LLVMAsmParser LLVMCore LLVMSupport)
+target_link_libraries(MLIRLLVMIR LLVMAsmParser LLVMCore LLVMSupport MLIRIR)
 
 add_llvm_library(MLIRNVVMIR
   IR/NVVMDialect.cpp
@@ -14,7 +14,7 @@ add_llvm_library(MLIRNVVMIR
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/LLVMIR
   )
 add_dependencies(MLIRNVVMIR MLIRNVVMOpsIncGen MLIRNVVMConversionsIncGen LLVMAsmParser LLVMCore LLVMSupport)
-target_link_libraries(MLIRNVVMIR LLVMAsmParser LLVMCore LLVMSupport)
+target_link_libraries(MLIRNVVMIR LLVMAsmParser LLVMCore LLVMSupport MLIRIR)
 
 add_llvm_library(MLIRROCDLIR
   IR/ROCDLDialect.cpp
@@ -23,4 +23,4 @@ add_llvm_library(MLIRROCDLIR
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/LLVMIR
   )
 add_dependencies(MLIRROCDLIR MLIRROCDLOpsIncGen MLIRROCDLConversionsIncGen LLVMAsmParser LLVMCore LLVMSupport)
-target_link_libraries(MLIRROCDLIR LLVMAsmParser LLVMCore LLVMSupport)
+target_link_libraries(MLIRROCDLIR LLVMAsmParser LLVMCore LLVMSupport MLIRIR)
diff --git a/third_party/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/third_party/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
index 00911012c1d..78da9998c6d 100644
--- a/third_party/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/third_party/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@@ -825,7 +825,8 @@ static LogicalResult verify(AddressOfOp op) {
     return op.emitOpError(
         "must reference a global defined by 'llvm.mlir.global'");
 
-  if (global.getType().getPointerTo() != op.getResult()->getType())
+  if (global.getType().getPointerTo(global.addr_space().getZExtValue()) !=
+      op.getResult()->getType())
     return op.emitOpError(
         "the type must be a pointer to the type of the referred global");
 
@@ -868,7 +869,8 @@ static StringRef getLinkageAttrName() { return "linkage"; }
 
 void GlobalOp::build(Builder *builder, OperationState &result, LLVMType type,
                      bool isConstant, Linkage linkage, StringRef name,
-                     Attribute value, ArrayRef<NamedAttribute> attrs) {
+                     Attribute value, unsigned addrSpace,
+                     ArrayRef<NamedAttribute> attrs) {
   result.addAttribute(SymbolTable::getSymbolAttrName(),
                       builder->getStringAttr(name));
   result.addAttribute("type", TypeAttr::get(type));
@@ -878,6 +880,8 @@ void GlobalOp::build(Builder *builder, OperationState &result, LLVMType type,
     result.addAttribute("value", value);
   result.addAttribute(getLinkageAttrName(), builder->getI64IntegerAttr(
                                                 static_cast<int64_t>(linkage)));
+  if (addrSpace != 0)
+    result.addAttribute("addr_space", builder->getI32IntegerAttr(addrSpace));
   result.attributes.append(attrs.begin(), attrs.end());
   result.addRegion();
 }
diff --git a/third_party/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/third_party/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index b1377643e5e..340f18276b2 100644
--- a/third_party/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/third_party/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -320,7 +320,7 @@ static ParseResult parseRangeOp(OpAsmParser &parser, OperationState &result) {
 // SliceOp
 //===----------------------------------------------------------------------===//
 void mlir::linalg::SliceOp::build(Builder *b, OperationState &result,
-                                  Value *base, ArrayRef<Value *> indexings) {
+                                  Value *base, ValueRange indexings) {
   result.addOperands(base);
   result.addOperands(indexings);
 
diff --git a/third_party/mlir/lib/Dialect/Linalg/Transforms/LinalgTransforms.cpp b/third_party/mlir/lib/Dialect/Linalg/Transforms/LinalgTransforms.cpp
index 0e4aaa7ac83..1b4509ffc11 100644
--- a/third_party/mlir/lib/Dialect/Linalg/Transforms/LinalgTransforms.cpp
+++ b/third_party/mlir/lib/Dialect/Linalg/Transforms/LinalgTransforms.cpp
@@ -33,11 +33,11 @@ using namespace mlir::linalg;
 const StringLiteral mlir::linalg::LinalgTransforms::kLinalgTransformMarker =
     "__internal_linalg_transform__";
 
-LogicalResult mlir::linalg::tileLinalgOpAndSetMarker(PatternRewriter &rewriter,
-                                                     Operation *op,
-                                                     ArrayRef<int64_t> sizes,
-                                                     StringRef linalgMarker) {
-  auto tileRes = tileLinalgOperation(rewriter, op, sizes);
+LogicalResult mlir::linalg::tileLinalgOpAndSetMarker(
+    PatternRewriter &rewriter, Operation *op, ArrayRef<int64_t> sizes,
+    StringRef linalgMarker, ArrayRef<unsigned> permutation) {
+  assert(permutation.empty() || permutation.size() == sizes.size());
+  auto tileRes = tileLinalgOperation(rewriter, op, sizes, permutation);
   if (!tileRes)
     return failure();
   tileRes->op.setAttr(LinalgTransforms::kLinalgTransformMarker,
diff --git a/third_party/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/third_party/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
index 09a1ba6b332..2c84eeecbba 100644
--- a/third_party/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/third_party/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -215,10 +215,17 @@ makeTiledViews(OpBuilder &b, Location loc, LinalgOp linalgOp,
   return res;
 }
 
-llvm::Optional<TiledLinalgOp>
-mlir::linalg::tileLinalgOp(OpBuilder &b, LinalgOp op,
-                           ArrayRef<Value *> tileSizes,
-                           OperationFolder *folder) {
+void applyPermutationToLoopRanges(SmallVector<SubViewOp::Range, 4> &loopRanges,
+                                  ArrayRef<unsigned> permutation) {
+  SmallVector<SubViewOp::Range, 4> auxVec(loopRanges.size());
+  for (unsigned i = 0; i < permutation.size(); ++i)
+    auxVec[i] = loopRanges[permutation[i]];
+  loopRanges = auxVec;
+}
+
+llvm::Optional<TiledLinalgOp> mlir::linalg::tileLinalgOp(
+    OpBuilder &b, LinalgOp op, ArrayRef<Value *> tileSizes,
+    ArrayRef<unsigned> permutation, OperationFolder *folder) {
   // 1. Enforce the convention that "tiling by zero" skips tiling a particular
   // dimension. This convention is significantly simpler to handle instead of
   // adjusting affine maps to account for missing dimensions.
@@ -226,6 +233,15 @@ mlir::linalg::tileLinalgOp(OpBuilder &b, LinalgOp op,
                  op.getNumWindowLoops() ==
              tileSizes.size() &&
          "expected matching number of tile sizes and loops");
+
+  // If permutation is empty, use the identity. Build the permutation map
+  // otherwise.
+  auto invPermutationMap = AffineMap::getMultiDimIdentityMap(
+      tileSizes.size(), ScopedContext::getContext());
+  if (!permutation.empty())
+    invPermutationMap = inversePermutation(
+        AffineMap::getPermutationMap(permutation, ScopedContext::getContext()));
+
   OpBuilder::InsertionGuard g(b);
   b.setInsertionPoint(op);
   ScopedContext scope(b, op.getLoc());
@@ -239,6 +255,8 @@ mlir::linalg::tileLinalgOp(OpBuilder &b, LinalgOp op,
   auto loopRanges =
       makeTiledLoopRanges(b, scope.getLocation(), viewSizesToLoopsMap,
                           viewSizes, tileSizes, folder);
+  if (!permutation.empty())
+    applyPermutationToLoopRanges(loopRanges, permutation);
 
   // 3. Create the tiled loops.
   LinalgOp res = op;
@@ -248,6 +266,15 @@ mlir::linalg::tileLinalgOp(OpBuilder &b, LinalgOp op,
     auto b = ScopedContext::getBuilder();
     auto loc = ScopedContext::getLocation();
     SmallVector<Value *, 4> ivValues(ivs.begin(), ivs.end());
+
+    // If we have to apply a permutation to the tiled loop nest, we have to
+    // reorder the induction variables This permutation is the right one
+    // assuming that loopRanges have previously been permuted by
+    // (i,j,k)->(k,i,j) So this permutation should be the inversePermutation of
+    // that one: (d0,d1,d2)->(d2,d0,d1)
+    if (!permutation.empty())
+      ivValues = applyMapToValues(b, loc, invPermutationMap, ivValues, folder);
+
     auto views =
         makeTiledViews(b, loc, op, ivValues, tileSizes, viewSizes, folder);
     auto operands = getAssumedNonViewOperands(op);
@@ -264,10 +291,9 @@ mlir::linalg::tileLinalgOp(OpBuilder &b, LinalgOp op,
   return TiledLinalgOp{res, loops};
 }
 
-llvm::Optional<TiledLinalgOp>
-mlir::linalg::tileLinalgOp(OpBuilder &b, LinalgOp op,
-                           ArrayRef<int64_t> tileSizes,
-                           OperationFolder *folder) {
+llvm::Optional<TiledLinalgOp> mlir::linalg::tileLinalgOp(
+    OpBuilder &b, LinalgOp op, ArrayRef<int64_t> tileSizes,
+    ArrayRef<unsigned> permutation, OperationFolder *folder) {
   if (tileSizes.empty())
     return llvm::None;
 
@@ -297,14 +323,15 @@ mlir::linalg::tileLinalgOp(OpBuilder &b, LinalgOp op,
       tileSizeValues.push_back(constant_index(folder, 0));
   }
 
-  return tileLinalgOp(b, op, tileSizeValues, folder);
+  return tileLinalgOp(b, op, tileSizeValues, permutation, folder);
 }
 
 static void tileLinalgOps(FuncOp f, ArrayRef<int64_t> tileSizes) {
   OpBuilder b(f);
   OperationFolder folder(f.getContext());
   f.walk([tileSizes, &b, &folder](LinalgOp op) {
-    auto opLoopsPair = tileLinalgOp(b, op, tileSizes, &folder);
+    auto opLoopsPair =
+        tileLinalgOp(b, op, tileSizes, /*permutation=*/{}, &folder);
     // If tiling occurred successfully, erase old op.
     if (opLoopsPair)
       op.erase();
diff --git a/third_party/mlir/lib/Dialect/QuantOps/IR/QuantTypes.cpp b/third_party/mlir/lib/Dialect/QuantOps/IR/QuantTypes.cpp
index 421d660a664..bc8290cda16 100644
--- a/third_party/mlir/lib/Dialect/QuantOps/IR/QuantTypes.cpp
+++ b/third_party/mlir/lib/Dialect/QuantOps/IR/QuantTypes.cpp
@@ -33,28 +33,20 @@ unsigned QuantizedType::getFlags() const {
 }
 
 LogicalResult QuantizedType::verifyConstructionInvariants(
-    llvm::Optional<Location> loc, MLIRContext *context, unsigned flags,
+    Optional<Location> loc, MLIRContext *context, unsigned flags,
     Type storageType, Type expressedType, int64_t storageTypeMin,
     int64_t storageTypeMax) {
   // Verify that the storage type is integral.
   // This restriction may be lifted at some point in favor of using bf16
   // or f16 as exact representations on hardware where that is advantageous.
   auto intStorageType = storageType.dyn_cast<IntegerType>();
-  if (!intStorageType) {
-    if (loc) {
-      emitError(*loc, "storage type must be integral");
-    }
-    return failure();
-  }
+  if (!intStorageType)
+    return emitOptionalError(loc, "storage type must be integral");
   unsigned integralWidth = intStorageType.getWidth();
 
   // Verify storage width.
-  if (integralWidth == 0 || integralWidth > MaxStorageBits) {
-    if (loc) {
-      emitError(*loc, "illegal storage type size: ") << integralWidth;
-    }
-    return failure();
-  }
+  if (integralWidth == 0 || integralWidth > MaxStorageBits)
+    return emitOptionalError(loc, "illegal storage type size: ", integralWidth);
 
   // Verify storageTypeMin and storageTypeMax.
   bool isSigned =
@@ -66,11 +58,8 @@ LogicalResult QuantizedType::verifyConstructionInvariants(
   if (storageTypeMax - storageTypeMin <= 0 ||
       storageTypeMin < defaultIntegerMin ||
       storageTypeMax > defaultIntegerMax) {
-    if (loc) {
-      emitError(*loc, "illegal storage min and storage max: (")
-          << storageTypeMin << ":" << storageTypeMax << ")";
-    }
-    return failure();
+    return emitOptionalError(loc, "illegal storage min and storage max: (",
+                             storageTypeMin, ":", storageTypeMax, ")");
   }
   return success();
 }
@@ -235,7 +224,7 @@ AnyQuantizedType AnyQuantizedType::getChecked(unsigned flags, Type storageType,
 }
 
 LogicalResult AnyQuantizedType::verifyConstructionInvariants(
-    llvm::Optional<Location> loc, MLIRContext *context, unsigned flags,
+    Optional<Location> loc, MLIRContext *context, unsigned flags,
     Type storageType, Type expressedType, int64_t storageTypeMin,
     int64_t storageTypeMax) {
   if (failed(QuantizedType::verifyConstructionInvariants(
@@ -247,12 +236,8 @@ LogicalResult AnyQuantizedType::verifyConstructionInvariants(
   // Verify that the expressed type is floating point.
   // If this restriction is ever eliminated, the parser/printer must be
   // extended.
-  if (expressedType && !expressedType.isa<FloatType>()) {
-    if (loc) {
-      emitError(*loc, "expressed type must be floating point");
-    }
-    return failure();
-  }
+  if (expressedType && !expressedType.isa<FloatType>())
+    return emitOptionalError(loc, "expressed type must be floating point");
 
   return success();
 }
@@ -280,7 +265,7 @@ UniformQuantizedType::getChecked(unsigned flags, Type storageType,
 }
 
 LogicalResult UniformQuantizedType::verifyConstructionInvariants(
-    llvm::Optional<Location> loc, MLIRContext *context, unsigned flags,
+    Optional<Location> loc, MLIRContext *context, unsigned flags,
     Type storageType, Type expressedType, double scale, int64_t zeroPoint,
     int64_t storageTypeMin, int64_t storageTypeMax) {
   if (failed(QuantizedType::verifyConstructionInvariants(
@@ -291,30 +276,19 @@ LogicalResult UniformQuantizedType::verifyConstructionInvariants(
 
   // Uniform quantization requires fully expressed parameters, including
   // expressed type.
-  if (!expressedType) {
-    if (loc) {
-      emitError(*loc, "uniform quantization requires expressed type");
-    }
-    return failure();
-  }
+  if (!expressedType)
+    return emitOptionalError(loc,
+                             "uniform quantization requires expressed type");
 
   // Verify that the expressed type is floating point.
   // If this restriction is ever eliminated, the parser/printer must be
   // extended.
-  if (!expressedType.isa<FloatType>()) {
-    if (loc) {
-      emitError(*loc, "expressed type must be floating point");
-    }
-    return failure();
-  }
+  if (!expressedType.isa<FloatType>())
+    return emitOptionalError(loc, "expressed type must be floating point");
 
   // Verify scale.
-  if (scale <= 0.0 || std::isinf(scale) || std::isnan(scale)) {
-    if (loc) {
-      emitError(*loc) << "illegal scale: " << scale;
-    }
-    return failure();
-  }
+  if (scale <= 0.0 || std::isinf(scale) || std::isnan(scale))
+    return emitOptionalError(loc, "illegal scale: ", scale);
 
   return success();
 }
@@ -348,7 +322,7 @@ UniformQuantizedPerAxisType UniformQuantizedPerAxisType::getChecked(
 }
 
 LogicalResult UniformQuantizedPerAxisType::verifyConstructionInvariants(
-    llvm::Optional<Location> loc, MLIRContext *context, unsigned flags,
+    Optional<Location> loc, MLIRContext *context, unsigned flags,
     Type storageType, Type expressedType, ArrayRef<double> scales,
     ArrayRef<int64_t> zeroPoints, int32_t quantizedDimension,
     int64_t storageTypeMin, int64_t storageTypeMax) {
@@ -360,40 +334,25 @@ LogicalResult UniformQuantizedPerAxisType::verifyConstructionInvariants(
 
   // Uniform quantization requires fully expressed parameters, including
   // expressed type.
-  if (!expressedType) {
-    if (loc) {
-      emitError(*loc, "uniform quantization requires expressed type");
-    }
-    return failure();
-  }
+  if (!expressedType)
+    return emitOptionalError(loc,
+                             "uniform quantization requires expressed type");
 
   // Verify that the expressed type is floating point.
   // If this restriction is ever eliminated, the parser/printer must be
   // extended.
-  if (!expressedType.isa<FloatType>()) {
-    if (loc) {
-      emitError(*loc, "expressed type must be floating point");
-    }
-    return failure();
-  }
+  if (!expressedType.isa<FloatType>())
+    return emitOptionalError(loc, "expressed type must be floating point");
 
   // Ensure that the number of scales and zeroPoints match.
-  if (scales.size() != zeroPoints.size()) {
-    if (loc) {
-      emitError(*loc, "illegal number of scales and zeroPoints: ")
-          << scales.size() << ", " << zeroPoints.size();
-    }
-    return failure();
-  }
+  if (scales.size() != zeroPoints.size())
+    return emitOptionalError(loc, "illegal number of scales and zeroPoints: ",
+                             scales.size(), ", ", zeroPoints.size());
 
   // Verify scale.
   for (double scale : scales) {
-    if (scale <= 0.0 || std::isinf(scale) || std::isnan(scale)) {
-      if (loc) {
-        emitError(*loc) << "illegal scale: " << scale;
-      }
-      return failure();
-    }
+    if (scale <= 0.0 || std::isinf(scale) || std::isnan(scale))
+      return emitOptionalError(loc, "illegal scale: ", scale);
   }
 
   return success();
diff --git a/third_party/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp b/third_party/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp
index e3b550223e5..694a98fd075 100644
--- a/third_party/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp
+++ b/third_party/mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp
@@ -71,7 +71,7 @@ mlir::spirv::getEntryPointABIAttr(ArrayRef<int32_t> localSize,
 Type SPIRVTypeConverter::getIndexType(MLIRContext *context) {
   // Convert to 32-bit integers for now. Might need a way to control this in
   // future.
-  // TODO(ravishankarm): It is porbably better to make it 64-bit integers. To
+  // TODO(ravishankarm): It is probably better to make it 64-bit integers. To
   // this some support is needed in SPIR-V dialect for Conversion
   // instructions. The Vulkan spec requires the builtins like
   // GlobalInvocationID, etc. to be 32-bit (unsigned) integers which should be
@@ -189,7 +189,7 @@ static spirv::GlobalVariableOp getBuiltinVariable(spirv::ModuleOp &moduleOp,
   return nullptr;
 }
 
-/// Gets name of global variable for a buitlin.
+/// Gets name of global variable for a builtin.
 static std::string getBuiltinVarName(spirv::BuiltIn builtin) {
   return std::string("__builtin_var_") + stringifyBuiltIn(builtin).str() + "__";
 }
@@ -230,7 +230,7 @@ getOrInsertBuiltinVariable(spirv::ModuleOp &moduleOp, Location loc,
 }
 
 /// Gets the global variable associated with a builtin and add
-/// it if it doesnt exist.
+/// it if it doesn't exist.
 Value *mlir::spirv::getBuiltinVariableValue(Operation *op,
                                             spirv::BuiltIn builtin,
                                             OpBuilder &builder) {
diff --git a/third_party/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp b/third_party/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp
index 89abbe894e6..66af4305858 100644
--- a/third_party/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp
+++ b/third_party/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp
@@ -42,6 +42,7 @@ static constexpr const char kBranchWeightAttrName[] = "branch_weights";
 static constexpr const char kCallee[] = "callee";
 static constexpr const char kDefaultValueAttrName[] = "default_value";
 static constexpr const char kExecutionScopeAttrName[] = "execution_scope";
+static constexpr const char kEqualSemanticsAttrName[] = "equal_semantics";
 static constexpr const char kFnNameAttrName[] = "fn";
 static constexpr const char kIndicesAttrName[] = "indices";
 static constexpr const char kInitializerAttrName[] = "initializer";
@@ -50,6 +51,7 @@ static constexpr const char kMemoryScopeAttrName[] = "memory_scope";
 static constexpr const char kSpecConstAttrName[] = "spec_const";
 static constexpr const char kSpecIdAttrName[] = "spec_id";
 static constexpr const char kTypeAttrName[] = "type";
+static constexpr const char kUnequalSemanticsAttrName[] = "unequal_semantics";
 static constexpr const char kValueAttrName[] = "value";
 static constexpr const char kValuesAttrName[] = "values";
 static constexpr const char kVariableAttrName[] = "variable";
@@ -375,6 +377,34 @@ static unsigned getBitWidth(Type type) {
   llvm_unreachable("unhandled bit width computation for type");
 }
 
+/// Walks the given type hierarchy with the given indices, potentially down
+/// to component granularity, to select an element type. Returns null type and
+/// emits errors with the given loc on failure.
+static Type getElementType(Type type, ArrayAttr indices, Location loc) {
+  if (!indices.size()) {
+    emitError(loc, "expected at least one index");
+    return nullptr;
+  }
+
+  int32_t index;
+  for (auto indexAttr : indices) {
+    index = indexAttr.dyn_cast<IntegerAttr>().getInt();
+    if (auto cType = type.dyn_cast<spirv::CompositeType>()) {
+      if (index < 0 || static_cast<uint64_t>(index) >= cType.getNumElements()) {
+        emitError(loc, "index ") << index << " out of bounds for " << type;
+        return nullptr;
+      }
+      type = cType.getElementType(index);
+    } else {
+      emitError(loc, "cannot extract from non-composite type ")
+          << type << " with index " << index;
+      return nullptr;
+    }
+  }
+
+  return type;
+}
+
 /// Returns true if the given `block` only contains one `spv._merge` op.
 static inline bool isMergeBlock(Block &block) {
   return !block.empty() && std::next(block.begin()) == block.end() &&
@@ -537,8 +567,7 @@ static LogicalResult verifyShiftOp(Operation *op) {
 // spv.AccessChainOp
 //===----------------------------------------------------------------------===//
 
-static Type getElementPtrType(Type type, ArrayRef<Value *> indices,
-                              Location baseLoc) {
+static Type getElementPtrType(Type type, ValueRange indices, Location baseLoc) {
   if (indices.empty()) {
     emitError(baseLoc, "'spv.AccessChain' op expected at least "
                        "one index ");
@@ -596,7 +625,7 @@ static Type getElementPtrType(Type type, ArrayRef<Value *> indices,
 }
 
 void spirv::AccessChainOp::build(Builder *builder, OperationState &state,
-                                 Value *basePtr, ArrayRef<Value *> indices) {
+                                 Value *basePtr, ValueRange indices) {
   auto type = getElementPtrType(basePtr->getType(), indices, state.location);
   assert(type && "Unable to deduce return type based on basePtr and indices");
   build(builder, state, type, basePtr, indices);
@@ -750,6 +779,81 @@ static LogicalResult verify(spirv::AddressOfOp addressOfOp) {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// spv.AtomicCompareExchangeWeak
+//===----------------------------------------------------------------------===//
+
+static ParseResult parseAtomicCompareExchangeWeakOp(OpAsmParser &parser,
+                                                    OperationState &state) {
+  spirv::Scope memoryScope;
+  spirv::MemorySemantics equalSemantics, unequalSemantics;
+  SmallVector<OpAsmParser::OperandType, 3> operandInfo;
+  Type type;
+  if (parseEnumAttribute(memoryScope, parser, state, kMemoryScopeAttrName) ||
+      parseEnumAttribute(equalSemantics, parser, state,
+                         kEqualSemanticsAttrName) ||
+      parseEnumAttribute(unequalSemantics, parser, state,
+                         kUnequalSemanticsAttrName) ||
+      parser.parseOperandList(operandInfo, 3))
+    return failure();
+
+  auto loc = parser.getCurrentLocation();
+  if (parser.parseColonType(type))
+    return failure();
+
+  auto ptrType = type.dyn_cast<spirv::PointerType>();
+  if (!ptrType)
+    return parser.emitError(loc, "expected pointer type");
+
+  if (parser.resolveOperands(
+          operandInfo,
+          {ptrType, ptrType.getPointeeType(), ptrType.getPointeeType()},
+          parser.getNameLoc(), state.operands))
+    return failure();
+
+  return parser.addTypeToList(ptrType.getPointeeType(), state.types);
+}
+
+static void print(spirv::AtomicCompareExchangeWeakOp atomOp,
+                  OpAsmPrinter &printer) {
+  printer << spirv::AtomicCompareExchangeWeakOp::getOperationName() << " \""
+          << stringifyScope(atomOp.memory_scope()) << "\" \""
+          << stringifyMemorySemantics(atomOp.equal_semantics()) << "\" \""
+          << stringifyMemorySemantics(atomOp.unequal_semantics()) << "\" ";
+  printer.printOperands(atomOp.getOperands());
+  printer << " : " << atomOp.pointer()->getType();
+}
+
+static LogicalResult verify(spirv::AtomicCompareExchangeWeakOp atomOp) {
+  // According to the spec:
+  // "The type of Value must be the same as Result Type. The type of the value
+  // pointed to by Pointer must be the same as Result Type. This type must also
+  // match the type of Comparator."
+  if (atomOp.getType() != atomOp.value()->getType())
+    return atomOp.emitOpError("value operand must have the same type as the op "
+                              "result, but found ")
+           << atomOp.value()->getType() << " vs " << atomOp.getType();
+
+  if (atomOp.getType() != atomOp.comparator()->getType())
+    return atomOp.emitOpError(
+               "comparator operand must have the same type as the op "
+               "result, but found ")
+           << atomOp.comparator()->getType() << " vs " << atomOp.getType();
+
+  Type pointeeType =
+      atomOp.pointer()->getType().cast<spirv::PointerType>().getPointeeType();
+  if (atomOp.getType() != pointeeType)
+    return atomOp.emitOpError(
+               "pointer operand's pointee type must have the same "
+               "as the op result type, but found ")
+           << pointeeType << " vs " << atomOp.getType();
+
+  // TODO(antiagainst): Unequal cannot be set to Release or Acquire and Release.
+  // In addition, Unequal cannot be set to a stronger memory-order then Equal.
+
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // spv.BitcastOp
 //===----------------------------------------------------------------------===//
@@ -1017,28 +1121,11 @@ static void print(spirv::CompositeExtractOp compositeExtractOp,
 }
 
 static LogicalResult verify(spirv::CompositeExtractOp compExOp) {
-  auto resultType = compExOp.composite()->getType();
   auto indicesArrayAttr = compExOp.indices().dyn_cast<ArrayAttr>();
-
-  if (!indicesArrayAttr.size()) {
-    return compExOp.emitOpError(
-        "expected at least one index for spv.CompositeExtractOp");
-  }
-
-  int32_t index;
-  for (auto indexAttr : indicesArrayAttr) {
-    index = indexAttr.dyn_cast<IntegerAttr>().getInt();
-    if (auto cType = resultType.dyn_cast<spirv::CompositeType>()) {
-      if (index < 0 || static_cast<uint64_t>(index) >= cType.getNumElements()) {
-        return compExOp.emitOpError("index ")
-               << index << " out of bounds for " << resultType;
-      }
-      resultType = cType.getElementType(index);
-    } else {
-      return compExOp.emitError("cannot extract from non-composite type ")
-             << resultType << " with index " << index;
-    }
-  }
+  auto resultType = getElementType(compExOp.composite()->getType(),
+                                   indicesArrayAttr, compExOp.getLoc());
+  if (!resultType)
+    return failure();
 
   if (resultType != compExOp.getType()) {
     return compExOp.emitOpError("invalid result type: expected ")
@@ -1058,6 +1145,60 @@ OpFoldResult spirv::CompositeExtractOp::fold(ArrayRef<Attribute> operands) {
   return extractCompositeElement(operands[0], indexVector);
 }
 
+//===----------------------------------------------------------------------===//
+// spv.CompositeInsert
+//===----------------------------------------------------------------------===//
+
+static ParseResult parseCompositeInsertOp(OpAsmParser &parser,
+                                          OperationState &state) {
+  SmallVector<OpAsmParser::OperandType, 2> operands;
+  Type objectType, compositeType;
+  Attribute indicesAttr;
+  auto loc = parser.getCurrentLocation();
+
+  return failure(
+      parser.parseOperandList(operands, 2) ||
+      parser.parseAttribute(indicesAttr, kIndicesAttrName, state.attributes) ||
+      parser.parseColonType(objectType) ||
+      parser.parseKeywordType("into", compositeType) ||
+      parser.resolveOperands(operands, {objectType, compositeType}, loc,
+                             state.operands) ||
+      parser.addTypesToList(compositeType, state.types));
+}
+
+static LogicalResult verify(spirv::CompositeInsertOp compositeInsertOp) {
+  auto indicesArrayAttr = compositeInsertOp.indices().dyn_cast<ArrayAttr>();
+  auto objectType =
+      getElementType(compositeInsertOp.composite()->getType(), indicesArrayAttr,
+                     compositeInsertOp.getLoc());
+  if (!objectType)
+    return failure();
+
+  if (objectType != compositeInsertOp.object()->getType()) {
+    return compositeInsertOp.emitOpError("object operand type should be ")
+           << objectType << ", but found "
+           << compositeInsertOp.object()->getType();
+  }
+
+  if (compositeInsertOp.composite()->getType() != compositeInsertOp.getType()) {
+    return compositeInsertOp.emitOpError("result type should be the same as "
+                                         "the composite type, but found ")
+           << compositeInsertOp.composite()->getType() << " vs "
+           << compositeInsertOp.getType();
+  }
+
+  return success();
+}
+
+static void print(spirv::CompositeInsertOp compositeInsertOp,
+                  OpAsmPrinter &printer) {
+  printer << spirv::CompositeInsertOp::getOperationName() << " "
+          << *compositeInsertOp.object() << ", "
+          << *compositeInsertOp.composite() << compositeInsertOp.indices()
+          << " : " << compositeInsertOp.object()->getType() << " into "
+          << compositeInsertOp.composite()->getType();
+}
+
 //===----------------------------------------------------------------------===//
 // spv.constant
 //===----------------------------------------------------------------------===//
diff --git a/third_party/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp b/third_party/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp
index 11509bb7688..e60805aca1b 100644
--- a/third_party/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp
+++ b/third_party/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp
@@ -61,6 +61,23 @@ static inline bool isFnEntryBlock(Block *block) {
 }
 
 namespace {
+/// A struct for containing a header block's merge and continue targets.
+///
+/// This struct is used to track original structured control flow info from
+/// SPIR-V blob. This info will be used to create spv.selection/spv.loop
+/// later.
+struct BlockMergeInfo {
+  Block *mergeBlock;
+  Block *continueBlock; // nullptr for spv.selection
+
+  BlockMergeInfo() : mergeBlock(nullptr), continueBlock(nullptr) {}
+  BlockMergeInfo(Block *m, Block *c = nullptr)
+      : mergeBlock(m), continueBlock(c) {}
+};
+
+/// Map from a selection/loop's header block to its merge (and continue) target.
+using BlockMergeInfoMap = DenseMap<Block *, BlockMergeInfo>;
+
 /// A SPIR-V module serializer.
 ///
 /// A SPIR-V binary module is a single linear stream of instructions; each
@@ -253,16 +270,6 @@ private:
   //    block and redirect all branches to the old header block to the old
   //    merge block (which contains the spv.selection/spv.loop op now).
 
-  /// A struct for containing a header block's merge and continue targets.
-  struct BlockMergeInfo {
-    Block *mergeBlock;
-    Block *continueBlock; // nullptr for spv.selection
-
-    BlockMergeInfo() : mergeBlock(nullptr), continueBlock(nullptr) {}
-    BlockMergeInfo(Block *m, Block *c = nullptr)
-        : mergeBlock(m), continueBlock(c) {}
-  };
-
   /// For OpPhi instructions, we use block arguments to represent them. OpPhi
   /// encodes a list of (value, predecessor) pairs. At the time of handling the
   /// block containing an OpPhi instruction, the predecessor block might not be
@@ -270,7 +277,7 @@ private:
   /// the block argument from the predecessors. We use the following approach:
   ///
   /// 1. For each OpPhi instruction, add a block argument to the current block
-  ///    in construction. Record the block argment in `valueMap` so its uses
+  ///    in construction. Record the block argument in `valueMap` so its uses
   ///    can be resolved. For the list of (value, predecessor) pairs, update
   ///    `blockPhiInfo` for bookkeeping.
   /// 2. After processing all blocks, loop over `blockPhiInfo` to fix up each
@@ -433,7 +440,7 @@ private:
   DenseMap<uint32_t, Block *> blockMap;
 
   // Header block to its merge (and continue) target mapping.
-  DenseMap<Block *, BlockMergeInfo> blockMergeInfo;
+  BlockMergeInfoMap blockMergeInfo;
 
   // Block to its phi (block argument) mapping.
   DenseMap<Block *, BlockPhiInfo> blockPhiInfo;
@@ -1648,17 +1655,21 @@ public:
   /// This method will create an spv.loop op in the `mergeBlock` and move all
   /// blocks in the structured loop into the spv.loop's region. All branches to
   /// the `headerBlock` will be redirected to the `mergeBlock`.
-  static LogicalResult structurize(Location loc, Block *headerBlock,
-                                   Block *mergeBlock, Block *continueBlock) {
-    return ControlFlowStructurizer(loc, headerBlock, mergeBlock, continueBlock)
+  /// This method will also update `mergeInfo` by remapping all blocks inside to
+  /// the newly cloned ones inside structured control flow op's regions.
+  static LogicalResult structurize(Location loc, BlockMergeInfoMap &mergeInfo,
+                                   Block *headerBlock, Block *mergeBlock,
+                                   Block *continueBlock) {
+    return ControlFlowStructurizer(loc, mergeInfo, headerBlock, mergeBlock,
+                                   continueBlock)
         .structurizeImpl();
   }
 
 private:
-  ControlFlowStructurizer(Location loc, Block *header, Block *merge,
-                          Block *cont)
-      : location(loc), headerBlock(header), mergeBlock(merge),
-        continueBlock(cont) {}
+  ControlFlowStructurizer(Location loc, BlockMergeInfoMap &mergeInfo,
+                          Block *header, Block *merge, Block *cont)
+      : location(loc), blockMergeInfo(mergeInfo), headerBlock(header),
+        mergeBlock(merge), continueBlock(cont) {}
 
   /// Creates a new spv.selection op at the beginning of the `mergeBlock`.
   spirv::SelectionOp createSelectionOp();
@@ -1666,14 +1677,15 @@ private:
   /// Creates a new spv.loop op at the beginning of the `mergeBlock`.
   spirv::LoopOp createLoopOp();
 
-  /// Collects all blocks reachable from `headerBlock` except `mergeBlock` and
-  /// `continueBlock` into `constructBlocks`.
+  /// Collects all blocks reachable from `headerBlock` except `mergeBlock`.
   void collectBlocksInConstruct();
 
   LogicalResult structurizeImpl();
 
   Location location;
 
+  BlockMergeInfoMap &blockMergeInfo;
+
   Block *headerBlock;
   Block *mergeBlock;
   Block *continueBlock; // nullptr for spv.selection
@@ -1713,10 +1725,11 @@ void ControlFlowStructurizer::collectBlocksInConstruct() {
   // Put the header block in the work list first.
   constructBlocks.insert(headerBlock);
 
-  // For each item in the work list, add its successors under conditions.
+  // For each item in the work list, add its successors excluding the merge
+  // block.
   for (unsigned i = 0; i < constructBlocks.size(); ++i) {
     for (auto *successor : constructBlocks[i]->getSuccessors())
-      if (successor != mergeBlock && successor != continueBlock)
+      if (successor != mergeBlock)
         constructBlocks.insert(successor);
   }
 }
@@ -1741,11 +1754,6 @@ LogicalResult ControlFlowStructurizer::structurizeImpl() {
   mapper.map(mergeBlock, &body.back());
 
   collectBlocksInConstruct();
-  if (isLoop) {
-    // Add the loop continue block at the last so it's the second to last block
-    // in LoopOp's region.
-    constructBlocks.insert(continueBlock);
-  }
 
   // We've identified all blocks belonging to the selection/loop's region. Now
   // need to "move" them into the selection/loop. Instead of really moving the
@@ -1779,8 +1787,11 @@ LogicalResult ControlFlowStructurizer::structurizeImpl() {
         auto *newArg = newBlock->addArgument(blockArg->getType());
         mapper.map(blockArg, newArg);
         LLVM_DEBUG(llvm::dbgs() << "[cf] remapped block argument " << blockArg
-                                << " to " << newArg);
+                                << " to " << newArg << '\n');
       }
+    } else {
+      LLVM_DEBUG(llvm::dbgs()
+                 << "[cf] block " << block << " is a function entry block\n");
     }
 
     for (auto &op : *block)
@@ -1833,13 +1844,42 @@ LogicalResult ControlFlowStructurizer::structurizeImpl() {
   // All the blocks cloned into the SelectionOp/LoopOp's region can now be
   // cleaned up.
   LLVM_DEBUG(llvm::dbgs() << "[cf] cleaning up blocks after clone\n");
-  // First we need to drop all uses on ops inside all blocks. This is needed
-  // because we can have blocks referencing SSA values from one another.
+  // First we need to drop all operands' references inside all blocks. This is
+  // needed because we can have blocks referencing SSA values from one another.
   for (auto *block : constructBlocks)
     block->dropAllReferences();
 
-  // Then erase all blocks except the old header block.
+  // Then erase all old blocks.
   for (auto *block : constructBlocks) {
+    // We've cloned all blocks belonging to this construct into the structured
+    // control flow op's region. Among these blocks, some may compose another
+    // selection/loop. If so, they will be recorded within blockMergeInfo.
+    // We need to update the pointers there to the newly remapped ones so we can
+    // continue structurizing them later.
+    // TODO(antiagainst): The asserts in the following assumes input SPIR-V blob
+    // forms correctly nested selection/loop constructs. We should relax this
+    // and support error cases better.
+    auto it = blockMergeInfo.find(block);
+    if (it != blockMergeInfo.end()) {
+      Block *newHeader = mapper.lookupOrNull(block);
+      assert(newHeader && "nested loop header block should be remapped!");
+
+      Block *newContinue = it->second.continueBlock;
+      if (newContinue) {
+        newContinue = mapper.lookupOrNull(newContinue);
+        assert(newContinue && "nested loop continue block should be remapped!");
+      }
+
+      Block *newMerge = it->second.mergeBlock;
+      if (Block *mappedTo = mapper.lookupOrNull(newMerge))
+        newMerge = mappedTo;
+
+      // The iterator should be erased before adding a new entry into
+      // blockMergeInfo to avoid iterator invalidation.
+      blockMergeInfo.erase(it);
+      blockMergeInfo.try_emplace(newHeader, newMerge, newContinue);
+    }
+
     // The structured selection/loop's entry block does not have arguments.
     // If the function's header block is also part of the structured control
     // flow, we cannot just simply erase it because it may contain arguments
@@ -1858,6 +1898,11 @@ LogicalResult ControlFlowStructurizer::structurizeImpl() {
     }
   }
 
+  LLVM_DEBUG(
+      llvm::dbgs() << "[cf] after structurizing construct with header block "
+                   << headerBlock << ":\n"
+                   << *op << '\n');
+
   return success();
 }
 
@@ -1913,13 +1958,13 @@ LogicalResult Deserializer::wireUpBlockArgument() {
 LogicalResult Deserializer::structurizeControlFlow() {
   LLVM_DEBUG(llvm::dbgs() << "[cf] start structurizing control flow\n");
 
-  for (const auto &info : blockMergeInfo) {
-    auto *headerBlock = info.first;
+  while (!blockMergeInfo.empty()) {
+    Block *headerBlock = blockMergeInfo.begin()->first;
+    BlockMergeInfo mergeInfo = blockMergeInfo.begin()->second;
+
     LLVM_DEBUG(llvm::dbgs() << "[cf] header block " << headerBlock << ":\n");
     LLVM_DEBUG(headerBlock->print(llvm::dbgs()));
 
-    const auto &mergeInfo = info.second;
-
     auto *mergeBlock = mergeInfo.mergeBlock;
     assert(mergeBlock && "merge block cannot be nullptr");
     if (!mergeBlock->args_empty())
@@ -1934,11 +1979,14 @@ LogicalResult Deserializer::structurizeControlFlow() {
       LLVM_DEBUG(continueBlock->print(llvm::dbgs()));
     }
 
-    if (failed(ControlFlowStructurizer::structurize(unknownLoc, headerBlock,
-                                                    mergeBlock, continueBlock)))
+    // Erase this case before calling into structurizer, who will update
+    // blockMergeInfo.
+    blockMergeInfo.erase(blockMergeInfo.begin());
+    if (failed(ControlFlowStructurizer::structurize(unknownLoc, blockMergeInfo,
+                                                    headerBlock, mergeBlock,
+                                                    continueBlock)))
       return failure();
   }
-  blockMergeInfo.clear();
 
   LLVM_DEBUG(llvm::dbgs() << "[cf] completed structurizing control flow\n");
   return success();
diff --git a/third_party/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp b/third_party/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp
index ebe3ceba336..ebafcb8675e 100644
--- a/third_party/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp
+++ b/third_party/mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp
@@ -43,14 +43,12 @@ using namespace mlir;
 
 /// Encodes an SPIR-V instruction with the given `opcode` and `operands` into
 /// the given `binary` vector.
-LogicalResult encodeInstructionInto(SmallVectorImpl<uint32_t> &binary,
-                                    spirv::Opcode op,
-                                    ArrayRef<uint32_t> operands) {
+static LogicalResult encodeInstructionInto(SmallVectorImpl<uint32_t> &binary,
+                                           spirv::Opcode op,
+                                           ArrayRef<uint32_t> operands) {
   uint32_t wordCount = 1 + operands.size();
   binary.push_back(spirv::getPrefixedOpcode(wordCount, op));
-  if (!operands.empty()) {
     binary.append(operands.begin(), operands.end());
-  }
   return success();
 }
 
@@ -84,6 +82,18 @@ static LogicalResult visitInPrettyBlockOrder(
   return success();
 }
 
+/// Returns the last structured control flow op's merge block if the given
+/// `block` contains any structured control flow op. Otherwise returns nullptr.
+static Block *getLastStructuredControlFlowOpMergeBlock(Block *block) {
+  for (Operation &op : llvm::reverse(block->getOperations())) {
+    if (auto selectionOp = dyn_cast<spirv::SelectionOp>(op))
+      return selectionOp.getMergeBlock();
+    if (auto loopOp = dyn_cast<spirv::LoopOp>(op))
+      return loopOp.getMergeBlock();
+  }
+  return nullptr;
+}
+
 namespace {
 
 /// A SPIR-V module serializer.
@@ -1375,11 +1385,17 @@ LogicalResult Serializer::emitPhiForBlockArguments(Block *block) {
   // to this block.
   SmallVector<std::pair<Block *, Operation::operand_iterator>, 4> predecessors;
   for (Block *predecessor : block->getPredecessors()) {
-    auto *op = predecessor->getTerminator();
-    if (auto branchOp = dyn_cast<spirv::BranchOp>(op)) {
+    auto *terminator = predecessor->getTerminator();
+    // Check whether this predecessor block contains a structured control flow
+    // op. If so, the structured control flow op will be serialized to multiple
+    // SPIR-V blocks. The branch op jumping to the OpPhi's block then resides in
+    // the last structured control flow op's merge block.
+    if (auto *merge = getLastStructuredControlFlowOpMergeBlock(predecessor))
+      predecessor = merge;
+    if (auto branchOp = dyn_cast<spirv::BranchOp>(terminator)) {
       predecessors.emplace_back(predecessor, branchOp.operand_begin());
     } else {
-      return op->emitError("unimplemented terminator for Phi creation");
+      return terminator->emitError("unimplemented terminator for Phi creation");
     }
   }
 
diff --git a/third_party/mlir/lib/Dialect/SPIRV/Serialization/TranslateRegistration.cpp b/third_party/mlir/lib/Dialect/SPIRV/Serialization/TranslateRegistration.cpp
index ffd62bd9bb9..655f559b765 100644
--- a/third_party/mlir/lib/Dialect/SPIRV/Serialization/TranslateRegistration.cpp
+++ b/third_party/mlir/lib/Dialect/SPIRV/Serialization/TranslateRegistration.cpp
@@ -86,15 +86,16 @@ LogicalResult serializeModule(ModuleOp module, llvm::raw_ostream &output) {
 
   SmallVector<uint32_t, 0> binary;
 
-  auto spirvModules = module.getOps<spirv::ModuleOp>();
+  SmallVector<spirv::ModuleOp, 1> spirvModules;
+  module.walk([&](spirv::ModuleOp op) { spirvModules.push_back(op); });
 
-  if (spirvModules.begin() == spirvModules.end())
+  if (spirvModules.empty())
     return module.emitError("found no 'spv.module' op");
 
-  if (std::next(spirvModules.begin()) != spirvModules.end())
+  if (spirvModules.size() != 1)
     return module.emitError("found more than one 'spv.module' op");
 
-  if (failed(spirv::serialize(*spirvModules.begin(), binary)))
+  if (failed(spirv::serialize(spirvModules[0], binary)))
     return failure();
 
   output.write(reinterpret_cast<char *>(binary.data()),
diff --git a/third_party/mlir/lib/Dialect/StandardOps/Ops.cpp b/third_party/mlir/lib/Dialect/StandardOps/Ops.cpp
index 9f6510d0f17..ee90ceab064 100644
--- a/third_party/mlir/lib/Dialect/StandardOps/Ops.cpp
+++ b/third_party/mlir/lib/Dialect/StandardOps/Ops.cpp
@@ -17,6 +17,7 @@
 
 #include "mlir/Dialect/StandardOps/Ops.h"
 
+#include "mlir/Dialect/CommonFolders.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/Builders.h"
@@ -73,8 +74,7 @@ struct StdInlinerInterface : public DialectInlinerInterface {
 
     // Replace the return with a branch to the dest.
     OpBuilder builder(op);
-    builder.create<BranchOp>(op->getLoc(), newDest,
-                             llvm::to_vector<4>(returnOp.getOperands()));
+    builder.create<BranchOp>(op->getLoc(), newDest, returnOp.getOperands());
     op->erase();
   }
 
@@ -234,54 +234,6 @@ struct MemRefCastFolder : public RewritePattern {
     rewriter.updatedRootInPlace(op);
   }
 };
-
-/// Performs const folding `calculate` with element-wise behavior on the two
-/// attributes in `operands` and returns the result if possible.
-template <class AttrElementT,
-          class ElementValueT = typename AttrElementT::ValueType,
-          class CalculationT =
-              std::function<ElementValueT(ElementValueT, ElementValueT)>>
-Attribute constFoldBinaryOp(ArrayRef<Attribute> operands,
-                            const CalculationT &calculate) {
-  assert(operands.size() == 2 && "binary op takes two operands");
-  if (!operands[0] || !operands[1])
-    return {};
-  if (operands[0].getType() != operands[1].getType())
-    return {};
-
-  if (operands[0].isa<AttrElementT>() && operands[1].isa<AttrElementT>()) {
-    auto lhs = operands[0].cast<AttrElementT>();
-    auto rhs = operands[1].cast<AttrElementT>();
-
-    return AttrElementT::get(lhs.getType(),
-                             calculate(lhs.getValue(), rhs.getValue()));
-  } else if (operands[0].isa<SplatElementsAttr>() &&
-             operands[1].isa<SplatElementsAttr>()) {
-    // Both operands are splats so we can avoid expanding the values out and
-    // just fold based on the splat value.
-    auto lhs = operands[0].cast<SplatElementsAttr>();
-    auto rhs = operands[1].cast<SplatElementsAttr>();
-
-    auto elementResult = calculate(lhs.getSplatValue<ElementValueT>(),
-                                   rhs.getSplatValue<ElementValueT>());
-    return DenseElementsAttr::get(lhs.getType(), elementResult);
-  } else if (operands[0].isa<ElementsAttr>() &&
-             operands[1].isa<ElementsAttr>()) {
-    // Operands are ElementsAttr-derived; perform an element-wise fold by
-    // expanding the values.
-    auto lhs = operands[0].cast<ElementsAttr>();
-    auto rhs = operands[1].cast<ElementsAttr>();
-
-    auto lhsIt = lhs.getValues<ElementValueT>().begin();
-    auto rhsIt = rhs.getValues<ElementValueT>().begin();
-    SmallVector<ElementValueT, 4> elementResults;
-    elementResults.reserve(lhs.getNumElements());
-    for (size_t i = 0, e = lhs.getNumElements(); i < e; ++i, ++lhsIt, ++rhsIt)
-      elementResults.push_back(calculate(*lhsIt, *rhsIt));
-    return DenseElementsAttr::get(lhs.getType(), elementResults);
-  }
-  return {};
-}
 } // end anonymous namespace.
 
 //===----------------------------------------------------------------------===//
@@ -477,7 +429,7 @@ struct SimplifyBrToBlockWithSinglePred : public OpRewritePattern<BranchOp> {
       return matchFailure();
 
     // Merge the successor into the current block and erase the branch.
-    rewriter.mergeBlocks(succ, opParent, llvm::to_vector<1>(op.getOperands()));
+    rewriter.mergeBlocks(succ, opParent, op.getOperands());
     rewriter.eraseOp(op);
     return matchSuccess();
   }
@@ -595,9 +547,8 @@ struct SimplifyIndirectCallWithKnownCallee
 
     // Replace with a direct call.
     SmallVector<Type, 8> callResults(indirectCall.getResultTypes());
-    SmallVector<Value *, 8> callOperands(indirectCall.getArgOperands());
     rewriter.replaceOpWithNewOp<CallOp>(indirectCall, calledFn, callResults,
-                                        callOperands);
+                                        indirectCall.getArgOperands());
     return matchSuccess();
   }
 };
@@ -1007,15 +958,13 @@ struct SimplifyConstCondBranchPred : public OpRewritePattern<CondBranchOp> {
                                      PatternRewriter &rewriter) const override {
     if (matchPattern(condbr.getCondition(), m_NonZero())) {
       // True branch taken.
-      rewriter.replaceOpWithNewOp<BranchOp>(
-          condbr, condbr.getTrueDest(),
-          llvm::to_vector<4>(condbr.getTrueOperands()));
+      rewriter.replaceOpWithNewOp<BranchOp>(condbr, condbr.getTrueDest(),
+                                            condbr.getTrueOperands());
       return matchSuccess();
     } else if (matchPattern(condbr.getCondition(), m_Zero())) {
       // False branch taken.
-      rewriter.replaceOpWithNewOp<BranchOp>(
-          condbr, condbr.getFalseDest(),
-          llvm::to_vector<4>(condbr.getFalseOperands()));
+      rewriter.replaceOpWithNewOp<BranchOp>(condbr, condbr.getFalseDest(),
+                                            condbr.getFalseOperands());
       return matchSuccess();
     }
     return matchFailure();
@@ -1364,11 +1313,26 @@ OpFoldResult DimOp::fold(ArrayRef<Attribute> operands) {
   else if (auto memrefType = opType.dyn_cast<MemRefType>())
     indexSize = memrefType.getShape()[getIndex()];
 
-  if (indexSize >= 0)
+  if (!ShapedType::isDynamic(indexSize))
     return IntegerAttr::get(IndexType::get(getContext()), indexSize);
 
-  // Fold dim to the size argument of a SubViewOp.
+  // Fold dim to the size argument for an AllocOp/ViewOp/SubViewOp.
+  auto memrefType = opType.dyn_cast<MemRefType>();
+  if (!memrefType)
+    return {};
+
+  // The size at getIndex() is now a dynamic size of a memref.
+
   auto memref = memrefOrTensor()->getDefiningOp();
+  if (auto alloc = dyn_cast_or_null<AllocOp>(memref))
+    return *(alloc.getDynamicSizes().begin() +
+             memrefType.getDynamicDimIndex(getIndex()));
+
+  if (auto view = dyn_cast_or_null<ViewOp>(memref))
+    return *(view.getDynamicSizes().begin() +
+             memrefType.getDynamicDimIndex(getIndex()));
+
+  // The subview op here is expected to have rank dynamic sizes now.
   if (auto subview = dyn_cast_or_null<SubViewOp>(memref)) {
     auto sizes = subview.sizes();
     if (!sizes.empty())
@@ -1427,10 +1391,10 @@ OpFoldResult DivIUOp::fold(ArrayRef<Attribute> operands) {
 // ---------------------------------------------------------------------------
 
 void DmaStartOp::build(Builder *builder, OperationState &result,
-                       Value *srcMemRef, ArrayRef<Value *> srcIndices,
-                       Value *destMemRef, ArrayRef<Value *> destIndices,
+                       Value *srcMemRef, ValueRange srcIndices,
+                       Value *destMemRef, ValueRange destIndices,
                        Value *numElements, Value *tagMemRef,
-                       ArrayRef<Value *> tagIndices, Value *stride,
+                       ValueRange tagIndices, Value *stride,
                        Value *elementsPerStride) {
   result.addOperands(srcMemRef);
   result.addOperands(srcIndices);
@@ -1578,7 +1542,7 @@ void DmaStartOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
 // ---------------------------------------------------------------------------
 
 void DmaWaitOp::build(Builder *builder, OperationState &result,
-                      Value *tagMemRef, ArrayRef<Value *> tagIndices,
+                      Value *tagMemRef, ValueRange tagIndices,
                       Value *numElements) {
   result.addOperands(tagMemRef);
   result.addOperands(tagIndices);
@@ -1769,46 +1733,70 @@ bool MemRefCastOp::areCastCompatible(Type a, Type b) {
   auto aT = a.dyn_cast<MemRefType>();
   auto bT = b.dyn_cast<MemRefType>();
 
-  if (!aT || !bT)
-    return false;
-  if (aT.getElementType() != bT.getElementType())
-    return false;
-  if (aT.getAffineMaps() != bT.getAffineMaps()) {
-    int64_t aOffset, bOffset;
-    SmallVector<int64_t, 4> aStrides, bStrides;
-    if (failed(getStridesAndOffset(aT, aStrides, aOffset)) ||
-        failed(getStridesAndOffset(bT, bStrides, bOffset)) ||
-        aStrides.size() != bStrides.size())
-      return false;
+  auto uaT = a.dyn_cast<UnrankedMemRefType>();
+  auto ubT = b.dyn_cast<UnrankedMemRefType>();
 
-    // Strides along a dimension/offset are compatible if the value in the
-    // source memref is static and the value in the target memref is the
-    // same. They are also compatible if either one is dynamic (see description
-    // of MemRefCastOp for details).
-    auto checkCompatible = [](int64_t a, int64_t b) {
-      return (a == MemRefType::getDynamicStrideOrOffset() ||
-              b == MemRefType::getDynamicStrideOrOffset() || a == b);
-    };
-    if (!checkCompatible(aOffset, bOffset))
+  if (aT && bT) {
+    if (aT.getElementType() != bT.getElementType())
       return false;
-    for (auto aStride : enumerate(aStrides))
-      if (!checkCompatible(aStride.value(), bStrides[aStride.index()]))
+    if (aT.getAffineMaps() != bT.getAffineMaps()) {
+      int64_t aOffset, bOffset;
+      SmallVector<int64_t, 4> aStrides, bStrides;
+      if (failed(getStridesAndOffset(aT, aStrides, aOffset)) ||
+          failed(getStridesAndOffset(bT, bStrides, bOffset)) ||
+          aStrides.size() != bStrides.size())
         return false;
-  }
-  if (aT.getMemorySpace() != bT.getMemorySpace())
-    return false;
 
-  // They must have the same rank, and any specified dimensions must match.
-  if (aT.getRank() != bT.getRank())
-    return false;
-
-  for (unsigned i = 0, e = aT.getRank(); i != e; ++i) {
-    int64_t aDim = aT.getDimSize(i), bDim = bT.getDimSize(i);
-    if (aDim != -1 && bDim != -1 && aDim != bDim)
+      // Strides along a dimension/offset are compatible if the value in the
+      // source memref is static and the value in the target memref is the
+      // same. They are also compatible if either one is dynamic (see
+      // description of MemRefCastOp for details).
+      auto checkCompatible = [](int64_t a, int64_t b) {
+        return (a == MemRefType::getDynamicStrideOrOffset() ||
+                b == MemRefType::getDynamicStrideOrOffset() || a == b);
+      };
+      if (!checkCompatible(aOffset, bOffset))
+        return false;
+      for (auto aStride : enumerate(aStrides))
+        if (!checkCompatible(aStride.value(), bStrides[aStride.index()]))
+          return false;
+    }
+    if (aT.getMemorySpace() != bT.getMemorySpace())
       return false;
+
+    // They must have the same rank, and any specified dimensions must match.
+    if (aT.getRank() != bT.getRank())
+      return false;
+
+    for (unsigned i = 0, e = aT.getRank(); i != e; ++i) {
+      int64_t aDim = aT.getDimSize(i), bDim = bT.getDimSize(i);
+      if (aDim != -1 && bDim != -1 && aDim != bDim)
+        return false;
+    }
+    return true;
+  } else {
+    if (!aT && !uaT)
+      return false;
+    if (!bT && !ubT)
+      return false;
+    // Unranked to unranked casting is unsupported
+    if (uaT && ubT)
+      return false;
+
+    auto aEltType = (aT) ? aT.getElementType() : uaT.getElementType();
+    auto bEltType = (bT) ? bT.getElementType() : ubT.getElementType();
+    if (aEltType != bEltType)
+      return false;
+
+    auto aMemSpace = (aT) ? aT.getMemorySpace() : uaT.getMemorySpace();
+    auto bMemSpace = (bT) ? bT.getMemorySpace() : ubT.getMemorySpace();
+    if (aMemSpace != bMemSpace)
+      return false;
+
+    return true;
   }
 
-  return true;
+  return false;
 }
 
 OpFoldResult MemRefCastOp::fold(ArrayRef<Attribute> operands) {
@@ -2570,8 +2558,8 @@ static Type inferSubViewResultType(MemRefType memRefType) {
 }
 
 void mlir::SubViewOp::build(Builder *b, OperationState &result, Value *source,
-                            ArrayRef<Value *> offsets, ArrayRef<Value *> sizes,
-                            ArrayRef<Value *> strides, Type resultType,
+                            ValueRange offsets, ValueRange sizes,
+                            ValueRange strides, Type resultType,
                             ArrayRef<NamedAttribute> attrs) {
   if (!resultType)
     resultType = inferSubViewResultType(source->getType().cast<MemRefType>());
@@ -2840,13 +2828,11 @@ public:
         staticShape, subViewType.getElementType(), subViewType.getAffineMaps(),
         subViewType.getMemorySpace());
     auto newSubViewOp = rewriter.create<SubViewOp>(
-        subViewOp.getLoc(), subViewOp.source(),
-        llvm::to_vector<4>(subViewOp.offsets()), ArrayRef<Value *>(),
-        llvm::to_vector<4>(subViewOp.strides()), newMemRefType);
+        subViewOp.getLoc(), subViewOp.source(), subViewOp.offsets(),
+        ArrayRef<Value *>(), subViewOp.strides(), newMemRefType);
     // Insert a memref_cast for compatibility of the uses of the op.
     rewriter.replaceOpWithNewOp<MemRefCastOp>(
-        llvm::to_vector<4>(subViewOp.sizes()), subViewOp, newSubViewOp,
-        subViewOp.getType());
+        subViewOp.sizes(), subViewOp, newSubViewOp, subViewOp.getType());
     return matchSuccess();
   }
 };
@@ -2891,15 +2877,12 @@ public:
     MemRefType newMemRefType =
         MemRefType::get(subViewType.getShape(), subViewType.getElementType(),
                         layoutMap, subViewType.getMemorySpace());
-    auto newSubViewOp =
-        rewriter.create<SubViewOp>(subViewOp.getLoc(), subViewOp.source(),
-                                   llvm::to_vector<4>(subViewOp.offsets()),
-                                   llvm::to_vector<4>(subViewOp.sizes()),
-                                   ArrayRef<Value *>(), newMemRefType);
+    auto newSubViewOp = rewriter.create<SubViewOp>(
+        subViewOp.getLoc(), subViewOp.source(), subViewOp.offsets(),
+        subViewOp.sizes(), ArrayRef<Value *>(), newMemRefType);
     // Insert a memref_cast for compatibility of the uses of the op.
     rewriter.replaceOpWithNewOp<MemRefCastOp>(
-        llvm::to_vector<4>(subViewOp.strides()), subViewOp, newSubViewOp,
-        subViewOp.getType());
+        subViewOp.strides(), subViewOp, newSubViewOp, subViewOp.getType());
     return matchSuccess();
   }
 };
@@ -2948,12 +2931,10 @@ public:
                         layoutMap, subViewType.getMemorySpace());
     auto newSubViewOp = rewriter.create<SubViewOp>(
         subViewOp.getLoc(), subViewOp.source(), ArrayRef<Value *>(),
-        llvm::to_vector<4>(subViewOp.sizes()),
-        llvm::to_vector<4>(subViewOp.strides()), newMemRefType);
+        subViewOp.sizes(), subViewOp.strides(), newMemRefType);
     // Insert a memref_cast for compatibility of the uses of the op.
     rewriter.replaceOpWithNewOp<MemRefCastOp>(
-        llvm::to_vector<4>(subViewOp.offsets()), subViewOp, newSubViewOp,
-        subViewOp.getType());
+        subViewOp.offsets(), subViewOp, newSubViewOp, subViewOp.getType());
     return matchSuccess();
   }
 };
diff --git a/third_party/mlir/lib/Dialect/VectorOps/VectorOps.cpp b/third_party/mlir/lib/Dialect/VectorOps/VectorOps.cpp
index 7f3be9d9fa9..c1e88aa0076 100644
--- a/third_party/mlir/lib/Dialect/VectorOps/VectorOps.cpp
+++ b/third_party/mlir/lib/Dialect/VectorOps/VectorOps.cpp
@@ -21,10 +21,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/VectorOps/VectorOps.h"
+#include "mlir/Dialect/StandardOps/Ops.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/OpImplementation.h"
+#include "mlir/IR/PatternMatch.h"
 #include "mlir/IR/TypeUtilities.h"
 #include "mlir/Support/Functional.h"
 #include "mlir/Support/LLVM.h"
@@ -80,16 +82,12 @@ static ParseResult parseContractionOp(OpAsmParser &parser,
   if (masksInfo.size() != 2)
     return parser.emitError(parser.getNameLoc(),
                             "expected zero or exactly 2 vector mask operands");
-  auto indexType = parser.getBuilder().getIndexType();
   auto lhsType = types[0].cast<VectorType>();
   auto rhsType = types[1].cast<VectorType>();
+  auto maskElementType = parser.getBuilder().getI1Type();
   SmallVector<Type, 2> maskTypes;
-  SmallVector<Type, 4> lhsMaskElementTypes(lhsType.getRank(), indexType);
-  maskTypes.push_back(
-      TupleType::get(lhsMaskElementTypes, parser.getBuilder().getContext()));
-  SmallVector<Type, 4> rhsMaskElementTypes(rhsType.getRank(), indexType);
-  maskTypes.push_back(
-      TupleType::get(rhsMaskElementTypes, parser.getBuilder().getContext()));
+  maskTypes.push_back(VectorType::get(lhsType.getShape(), maskElementType));
+  maskTypes.push_back(VectorType::get(rhsType.getShape(), maskElementType));
   if (parser.resolveOperands(masksInfo, maskTypes, loc, result.operands))
     return failure();
   return success();
@@ -229,15 +227,10 @@ static LogicalResult verify(ContractionOp op) {
   if ((lhsMaskType && !rhsMaskType) || (!lhsMaskType && rhsMaskType))
     return op.emitOpError("invalid number of vector masks specified");
   if (lhsMaskType && rhsMaskType) {
-    // Verify tuple element size is != rank.
-    if (lhsMaskType.getTypes().size() != lhsType.getShape().size() ||
-        rhsMaskType.getTypes().size() != rhsType.getShape().size())
-      return op.emitOpError("invalid number of vector mask elements");
-    // Verify all tuple elements are index type.
-    for (auto eltType : lhsMaskType.getTypes()) {
-      if (!eltType.isa<IndexType>())
-        return op.emitOpError("vector mask element must have index type");
-    }
+    // Verify mask rank == argument rank.
+    if (lhsMaskType.getShape().size() != lhsType.getShape().size() ||
+        rhsMaskType.getShape().size() != rhsType.getShape().size())
+      return op.emitOpError("invalid vector mask rank");
   }
   return success();
 }
@@ -271,6 +264,44 @@ getDimMap(ArrayRef<AffineMap> indexingMaps, ArrayAttr iteratorTypes,
   return dimMap;
 }
 
+void ContractionOp::getIterationBounds(
+    SmallVectorImpl<int64_t> &iterationBounds) {
+  auto lhsShape = getLhsType().getShape();
+  auto resShape = getResultType().getShape();
+  SmallVector<AffineMap, 4> indexingMaps(getIndexingMaps());
+  SmallVector<int64_t, 2> iterationShape;
+  for (auto it : llvm::enumerate(iterator_types())) {
+    // Search lhs/rhs map results for 'targetExpr'.
+    auto targetExpr = getAffineDimExpr(it.index(), getContext());
+    auto iteratorTypeName = it.value().cast<StringAttr>().getValue();
+    if (iteratorTypeName == getReductionIteratorTypeName()) {
+      // Get reduction dim size from lhs shape (same size in rhsShape).
+      int64_t lhsDimIndex = getResultIndex(indexingMaps[0], targetExpr);
+      assert(lhsDimIndex >= 0);
+      iterationBounds.push_back(lhsShape[lhsDimIndex]);
+      continue;
+    }
+    // Get parallel dimension size from result shape.
+    int64_t resDimIndex = getResultIndex(indexingMaps[2], targetExpr);
+    assert(resDimIndex >= 0);
+    iterationBounds.push_back(resShape[resDimIndex]);
+  }
+}
+
+void ContractionOp::getIterationIndexMap(
+    std::vector<DenseMap<int64_t, int64_t>> &iterationIndexMap) {
+  unsigned numMaps = indexing_maps().getValue().size();
+  iterationIndexMap.resize(numMaps);
+  for (auto it : llvm::enumerate(indexing_maps())) {
+    auto index = it.index();
+    auto map = it.value().cast<AffineMapAttr>().getValue();
+    for (unsigned i = 0, e = map.getNumResults(); i < e; ++i) {
+      auto dim = map.getResult(i).cast<AffineDimExpr>();
+      iterationIndexMap[index][dim.getPosition()] = i;
+    }
+  }
+}
+
 std::vector<std::pair<int64_t, int64_t>> ContractionOp::getContractingDimMap() {
   SmallVector<AffineMap, 4> indexingMaps(getIndexingMaps());
   return getDimMap(indexingMaps, iterator_types(),
@@ -293,34 +324,33 @@ SmallVector<AffineMap, 4> ContractionOp::getIndexingMaps() {
 }
 
 //===----------------------------------------------------------------------===//
-// ExtractElementOp
+// ExtractOp
 //===----------------------------------------------------------------------===//
 
-static Type inferExtractElementOpResultType(VectorType vectorType,
-                                            ArrayAttr position) {
+static Type inferExtractOpResultType(VectorType vectorType,
+                                     ArrayAttr position) {
   if (static_cast<int64_t>(position.size()) == vectorType.getRank())
     return vectorType.getElementType();
   return VectorType::get(vectorType.getShape().drop_front(position.size()),
                          vectorType.getElementType());
 }
 
-void ExtractElementOp::build(Builder *builder, OperationState &result,
-                             Value *source, ArrayRef<int32_t> position) {
+void vector::ExtractOp::build(Builder *builder, OperationState &result,
+                              Value *source, ArrayRef<int32_t> position) {
   result.addOperands(source);
   auto positionAttr = builder->getI32ArrayAttr(position);
-  result.addTypes(inferExtractElementOpResultType(
-      source->getType().cast<VectorType>(), positionAttr));
+  result.addTypes(inferExtractOpResultType(source->getType().cast<VectorType>(),
+                                           positionAttr));
   result.addAttribute(getPositionAttrName(), positionAttr);
 }
 
-static void print(OpAsmPrinter &p, ExtractElementOp op) {
+static void print(OpAsmPrinter &p, vector::ExtractOp op) {
   p << op.getOperationName() << " " << *op.vector() << op.position();
   p.printOptionalAttrDict(op.getAttrs(), {"position"});
   p << " : " << op.vector()->getType();
 }
 
-static ParseResult parseExtractElementOp(OpAsmParser &parser,
-                                         OperationState &result) {
+static ParseResult parseExtractOp(OpAsmParser &parser, OperationState &result) {
   llvm::SMLoc attributeLoc, typeLoc;
   SmallVector<NamedAttribute, 4> attrs;
   OpAsmParser::OperandType vector;
@@ -343,13 +373,13 @@ static ParseResult parseExtractElementOp(OpAsmParser &parser,
         attributeLoc,
         "expected position attribute of rank smaller than vector rank");
 
-  Type resType = inferExtractElementOpResultType(vectorType, positionAttr);
+  Type resType = inferExtractOpResultType(vectorType, positionAttr);
   result.attributes = attrs;
   return failure(parser.resolveOperand(vector, type, result.operands) ||
                  parser.addTypeToList(resType, result.types));
 }
 
-static LogicalResult verify(ExtractElementOp op) {
+static LogicalResult verify(vector::ExtractOp op) {
   auto positionAttr = op.position().getValue();
   if (positionAttr.empty())
     return op.emitOpError("expected non-empty position attribute");
@@ -384,16 +414,16 @@ static LogicalResult verify(BroadcastOp op) {
   // Scalar to vector broadcast is always valid. A vector
   // to vector broadcast needs some additional checking.
   if (srcVectorType) {
-    const int64_t srcRank = srcVectorType.getRank();
-    const int64_t dstRank = dstVectorType.getRank();
+    int64_t srcRank = srcVectorType.getRank();
+    int64_t dstRank = dstVectorType.getRank();
     if (srcRank > dstRank)
       return op.emitOpError("source rank higher than destination rank");
     // Source has an exact match or singleton value for all trailing dimensions
     // (all leading dimensions are simply duplicated).
-    const int64_t lead = dstRank - srcRank;
-    for (int64_t i = 0; i < srcRank; i++) {
-      const int64_t srcDim = srcVectorType.getDimSize(i);
-      const int64_t dstDim = dstVectorType.getDimSize(lead + i);
+    int64_t lead = dstRank - srcRank;
+    for (int64_t r = 0; r < srcRank; ++r) {
+      int64_t srcDim = srcVectorType.getDimSize(r);
+      int64_t dstDim = dstVectorType.getDimSize(lead + r);
       if (srcDim != 1 && srcDim != dstDim)
         return op.emitOpError("dimension mismatch (")
                << srcDim << " vs. " << dstDim << ")";
@@ -415,29 +445,26 @@ static ParseResult parseBroadcastOp(OpAsmParser &parser,
 }
 
 //===----------------------------------------------------------------------===//
-// InsertElementOp
+// InsertOp
 //===----------------------------------------------------------------------===//
 
-void InsertElementOp::build(Builder *builder, OperationState &result,
-                            Value *source, Value *dest,
-                            ArrayRef<int32_t> position) {
+void InsertOp::build(Builder *builder, OperationState &result, Value *source,
+                     Value *dest, ArrayRef<int32_t> position) {
   result.addOperands({source, dest});
   auto positionAttr = builder->getI32ArrayAttr(position);
   result.addTypes(dest->getType());
   result.addAttribute(getPositionAttrName(), positionAttr);
 }
 
-static void print(OpAsmPrinter &p, InsertElementOp op) {
+static void print(OpAsmPrinter &p, InsertOp op) {
   p << op.getOperationName() << " " << *op.source() << ", " << *op.dest()
     << op.position();
-  p.printOptionalAttrDict(op.getAttrs(),
-                          {InsertElementOp::getPositionAttrName()});
+  p.printOptionalAttrDict(op.getAttrs(), {InsertOp::getPositionAttrName()});
   p << " : " << op.getSourceType();
   p << " into " << op.getDestVectorType();
 }
 
-static ParseResult parseInsertElementOp(OpAsmParser &parser,
-                                        OperationState &result) {
+static ParseResult parseInsertOp(OpAsmParser &parser, OperationState &result) {
   SmallVector<NamedAttribute, 4> attrs;
   OpAsmParser::OperandType source, dest;
   Type sourceType;
@@ -445,8 +472,7 @@ static ParseResult parseInsertElementOp(OpAsmParser &parser,
   Attribute attr;
   return failure(parser.parseOperand(source) || parser.parseComma() ||
                  parser.parseOperand(dest) ||
-                 parser.parseAttribute(attr,
-                                       InsertElementOp::getPositionAttrName(),
+                 parser.parseAttribute(attr, InsertOp::getPositionAttrName(),
                                        result.attributes) ||
                  parser.parseOptionalAttrDict(attrs) ||
                  parser.parseColonType(sourceType) ||
@@ -456,7 +482,7 @@ static ParseResult parseInsertElementOp(OpAsmParser &parser,
                  parser.addTypeToList(destType, result.types));
 }
 
-static LogicalResult verify(InsertElementOp op) {
+static LogicalResult verify(InsertOp op) {
   auto positionAttr = op.position().getValue();
   if (positionAttr.empty())
     return op.emitOpError("expected non-empty position attribute");
@@ -803,6 +829,74 @@ static LogicalResult verify(StridedSliceOp op) {
   return success();
 }
 
+namespace {
+
+static void populateFromInt64AttrArray(ArrayAttr arrayAttr,
+                                       SmallVectorImpl<int64_t> &results) {
+  for (auto attr : arrayAttr)
+    results.push_back(attr.cast<IntegerAttr>().getInt());
+}
+
+// Pattern to rewrite a StridedSliceOp(ConstantMaskOp) -> ConstantMaskOp.
+class StridedSliceConstantMaskFolder final
+    : public OpRewritePattern<StridedSliceOp> {
+public:
+  using OpRewritePattern<StridedSliceOp>::OpRewritePattern;
+
+  PatternMatchResult matchAndRewrite(StridedSliceOp stridedSliceOp,
+                                     PatternRewriter &rewriter) const override {
+    // Return if 'stridedSliceOp' operand is not defined by a ConstantMaskOp.
+    auto defOp = stridedSliceOp.vector()->getDefiningOp();
+    auto constantMaskOp = dyn_cast_or_null<ConstantMaskOp>(defOp);
+    if (!constantMaskOp)
+      return matchFailure();
+    // Return if 'stridedSliceOp' has non-unit strides.
+    if (llvm::any_of(stridedSliceOp.strides(), [](Attribute attr) {
+          return attr.cast<IntegerAttr>().getInt() != 1;
+        }))
+      return matchFailure();
+    // Gather constant mask dimension sizes.
+    SmallVector<int64_t, 4> maskDimSizes;
+    populateFromInt64AttrArray(constantMaskOp.mask_dim_sizes(), maskDimSizes);
+    // Gather strided slice offsets and sizes.
+    SmallVector<int64_t, 4> sliceOffsets;
+    populateFromInt64AttrArray(stridedSliceOp.offsets(), sliceOffsets);
+    SmallVector<int64_t, 4> sliceSizes;
+    populateFromInt64AttrArray(stridedSliceOp.sizes(), sliceSizes);
+
+    // Compute slice of vector mask region.
+    SmallVector<int64_t, 4> sliceMaskDimSizes;
+    assert(sliceOffsets.size() == maskDimSizes.size());
+    for (const auto &it : llvm::zip(maskDimSizes, sliceOffsets, sliceSizes)) {
+      int64_t maskDimSize = std::get<0>(it);
+      int64_t sliceOffset = std::get<1>(it);
+      int64_t sliceSize = std::get<2>(it);
+      int64_t sliceMaskDimSize = std::max(
+          static_cast<int64_t>(0),
+          std::min(sliceOffset + sliceSize, maskDimSize) - sliceOffset);
+      sliceMaskDimSizes.push_back(sliceMaskDimSize);
+    }
+    // If any of 'sliceMaskDimSizes' are zero, then set all to zero (masked
+    // region is a conjunction of mask dim intervals).
+    if (llvm::any_of(sliceMaskDimSizes, [](int64_t sz) { return sz == 0; }))
+      sliceMaskDimSizes.assign(maskDimSizes.size(), 0);
+
+    // Replace 'stridedSliceOp' with ConstantMaskOp with sliced mask region.
+    rewriter.replaceOpWithNewOp<ConstantMaskOp>(
+        stridedSliceOp, stridedSliceOp.getResult()->getType(),
+        rewriter.getI64ArrayAttr(sliceMaskDimSizes));
+    return matchSuccess();
+  }
+};
+
+} // end anonymous namespace
+
+void StridedSliceOp::getCanonicalizationPatterns(
+    OwningRewritePatternList &results, MLIRContext *context) {
+  // Pattern to rewrite a StridedSliceOp(ConstantMaskOp) -> ConstantMaskOp.
+  results.insert<StridedSliceConstantMaskFolder>(context);
+}
+
 //===----------------------------------------------------------------------===//
 // TransferReadOp
 //===----------------------------------------------------------------------===//
@@ -995,6 +1089,53 @@ static LogicalResult verify(TypeCastOp &op) {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// ConstantMaskOp
+//===----------------------------------------------------------------------===//
+
+ParseResult parseConstantMaskOp(OpAsmParser &parser, OperationState &result) {
+  Type resultType;
+  ArrayAttr maskDimSizesAttr;
+  StringRef attrName = ConstantMaskOp::getMaskDimSizesAttrName();
+  return failure(
+      parser.parseOptionalAttrDict(result.attributes) ||
+      parser.parseAttribute(maskDimSizesAttr, attrName, result.attributes) ||
+      parser.parseColonType(resultType) ||
+      parser.addTypeToList(resultType, result.types));
+}
+
+static void print(OpAsmPrinter &p, ConstantMaskOp &op) {
+  p << op.getOperationName() << ' ' << op.mask_dim_sizes();
+  p << " : " << op.getResult()->getType();
+}
+
+static LogicalResult verify(ConstantMaskOp &op) {
+  // Verify that array attr size matches the rank of the vector result.
+  auto resultType = op.getResult()->getType().cast<VectorType>();
+  if (static_cast<int64_t>(op.mask_dim_sizes().size()) != resultType.getRank())
+    return op.emitOpError(
+        "must specify array attr of size equal vector result rank");
+  // Verify that each array attr element is in bounds of corresponding vector
+  // result dimension size.
+  auto resultShape = resultType.getShape();
+  SmallVector<int64_t, 4> maskDimSizes;
+  for (auto it : llvm::enumerate(op.mask_dim_sizes())) {
+    int64_t attrValue = it.value().cast<IntegerAttr>().getInt();
+    if (attrValue < 0 || attrValue > resultShape[it.index()])
+      return op.emitOpError(
+          "array attr of size out of bounds of vector result dimension size");
+    maskDimSizes.push_back(attrValue);
+  }
+  // Verify that if one mask dim size is zero, they all should be zero (because
+  // the mask region is a conjunction of each mask dimension interval).
+  bool any_zeros = llvm::is_contained(maskDimSizes, 0);
+  bool all_zeros = llvm::all_of(maskDimSizes, [](int64_t s) { return s == 0; });
+  if (any_zeros && !all_zeros)
+    return op.emitOpError("expected all mask dim sizes to be zeros, "
+                          "as a result of conjunction with zero mask dim");
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // CreateMaskOp
 //===----------------------------------------------------------------------===//
@@ -1026,33 +1167,45 @@ static LogicalResult verify(CreateMaskOp &op) {
   return success();
 }
 
-//===----------------------------------------------------------------------===//
-// IndexTupleOp
-//===----------------------------------------------------------------------===//
+namespace {
 
-ParseResult parseIndexTupleOp(OpAsmParser &parser, OperationState &result) {
-  auto indexType = parser.getBuilder().getIndexType();
-  Type resultType;
-  SmallVector<OpAsmParser::OperandType, 4> operandInfo;
-  return failure(
-      parser.parseOperandList(operandInfo) ||
-      parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonType(resultType) ||
-      parser.resolveOperands(operandInfo, indexType, result.operands) ||
-      parser.addTypeToList(resultType, result.types));
+// Pattern to rewrite a CreateMaskOp with a ConstantMaskOp.
+class CreateMaskFolder final : public OpRewritePattern<CreateMaskOp> {
+public:
+  using OpRewritePattern<CreateMaskOp>::OpRewritePattern;
+
+  PatternMatchResult matchAndRewrite(CreateMaskOp createMaskOp,
+                                     PatternRewriter &rewriter) const override {
+    // Return if any of 'createMaskOp' operands are not defined by a constant.
+    auto is_not_def_by_constant = [](Value *operand) {
+      return !isa_and_nonnull<ConstantIndexOp>(operand->getDefiningOp());
+    };
+    if (llvm::any_of(createMaskOp.operands(), is_not_def_by_constant))
+      return matchFailure();
+    // Gather constant mask dimension sizes.
+    SmallVector<int64_t, 4> maskDimSizes;
+    for (auto *operand : createMaskOp.operands()) {
+      auto defOp = operand->getDefiningOp();
+      maskDimSizes.push_back(cast<ConstantIndexOp>(defOp).getValue());
+    }
+    // Replace 'createMaskOp' with ConstantMaskOp.
+    rewriter.replaceOpWithNewOp<ConstantMaskOp>(
+        createMaskOp, createMaskOp.getResult()->getType(),
+        rewriter.getI64ArrayAttr(maskDimSizes));
+    return matchSuccess();
+  }
+};
+
+} // end anonymous namespace
+
+void CreateMaskOp::getCanonicalizationPatterns(
+    OwningRewritePatternList &results, MLIRContext *context) {
+  results.insert<CreateMaskFolder>(context);
 }
 
-static void print(OpAsmPrinter &p, IndexTupleOp &op) {
-  p << op.getOperationName() << ' ';
-  p.printOperands(op.operands());
-  p << " : " << op.getResult()->getType();
-}
-
-static LogicalResult verify(IndexTupleOp &op) {
-  for (auto operand : op.getOperands())
-    if (!operand->getType().isa<IndexType>())
-      return op.emitOpError("all operands must be of index type");
-  return success();
+void mlir::vector::populateVectorToVectorCanonicalizationPatterns(
+    OwningRewritePatternList &patterns, MLIRContext *context) {
+  patterns.insert<CreateMaskFolder, StridedSliceConstantMaskFolder>(context);
 }
 
 namespace mlir {
diff --git a/third_party/mlir/lib/Dialect/VectorOps/VectorToVector.cpp b/third_party/mlir/lib/Dialect/VectorOps/VectorToVector.cpp
index 1e2e651189f..82d19f5efc5 100644
--- a/third_party/mlir/lib/Dialect/VectorOps/VectorToVector.cpp
+++ b/third_party/mlir/lib/Dialect/VectorOps/VectorToVector.cpp
@@ -77,6 +77,15 @@ static int64_t computeMaxLinearIndex(ArrayRef<int64_t> basis) {
   return res;
 }
 
+/// Computes and returns the linearized index of 'offsets' w.r.t. 'basis'.
+static int64_t linearize(ArrayRef<int64_t> offsets, ArrayRef<int64_t> basis) {
+  assert(offsets.size() == basis.size());
+  int64_t linearIndex = 0;
+  for (unsigned idx = 0, e = basis.size(); idx < e; ++idx)
+    linearIndex += offsets[idx] * basis[idx];
+  return linearIndex;
+}
+
 /// Given a shape with sizes greater than 0 along all dimensions, returns the
 /// delinearized components of linearIndex along shape.
 static SmallVector<int64_t, 8> delinearize(int64_t linearIndex,
@@ -151,9 +160,9 @@ static Operation *cloneOpWithOperandsAndTypes(PatternRewriter &builder,
                                               Location loc, Operation *op,
                                               ArrayRef<Value *> operands,
                                               ArrayRef<Type> resultTypes) {
-  OperationState *res = new OperationState(loc, op->getName().getStringRef(),
-                                           operands, resultTypes, {});
-  return builder.createOperation(*res);
+  OperationState res(loc, op->getName().getStringRef(), operands, resultTypes,
+                     op->getAttrs());
+  return builder.createOperation(res);
 }
 
 // Helper function for Tablegen.
@@ -164,6 +173,224 @@ static bool hasShape(Value *v, ArrayRef<int64_t> shape) {
   return std::equal(t.getShape().begin(), t.getShape().end(), shape.begin());
 }
 
+static Value *makeSplatZero(Location loc, PatternRewriter &rewriter,
+                            VectorType vt) {
+  auto t = vt.getElementType();
+  Value *f = nullptr;
+  if (t.isBF16() || t.isF16())
+    f = rewriter.create<ConstantOp>(loc, t, rewriter.getF64FloatAttr(0.0f));
+  else if (t.isF32())
+    f = rewriter.create<ConstantOp>(loc, t, rewriter.getF32FloatAttr(0.0f));
+  else if (t.isF64())
+    f = rewriter.create<ConstantOp>(loc, t, rewriter.getF64FloatAttr(0.0f));
+  if (f)
+    return rewriter.create<SplatOp>(loc, vt, f);
+  llvm_unreachable("Unsupported type in `makeSplatZero`");
+}
+
+// Populates 'resultElements[indexMap[i]]' with elements from 'inputElements[i]'
+// for each index 'i' in inputElements with a valid mapping in 'indexMap'.
+static void getMappedElements(const DenseMap<int64_t, int64_t> &indexMap,
+                              ArrayRef<int64_t> inputElements,
+                              SmallVectorImpl<int64_t> &resultElements) {
+  assert(indexMap.size() == resultElements.size());
+  assert(inputElements.size() >= resultElements.size());
+  for (unsigned i = 0, e = inputElements.size(); i < e; ++i) {
+    auto it = indexMap.find(i);
+    if (it != indexMap.end())
+      resultElements[it->second] = inputElements[i];
+  }
+}
+
+// UnrolledOperandState aggregates per-operand state required for op unrolling.
+struct UnrolledOperandState {
+  Value *operand;
+  SmallVector<int64_t, 4> unrolledShape;
+  SmallVector<int64_t, 4> unrollFactors;
+  SmallVector<int64_t, 8> basis;
+  int64_t numInstances;
+};
+
+// Populates 'state' with unrolled shape, unroll factors, basis and
+// num unrolled instances for 'operand'.
+static void getUnrolledOperandState(Value *operand,
+                                    const DenseMap<int64_t, int64_t> &indexMap,
+                                    ArrayRef<int64_t> targetShape,
+                                    UnrolledOperandState &state) {
+  auto vectorType = operand->getType().cast<VectorType>();
+  state.operand = operand;
+  // Compute unrolled shape of 'operand'.
+  state.unrolledShape.resize(vectorType.getRank());
+  getMappedElements(indexMap, targetShape, state.unrolledShape);
+  // Compute unroll factors for unrolled shape.
+  auto maybeUnrollFactors =
+      shapeRatio(vectorType.getShape(), state.unrolledShape);
+  assert(maybeUnrollFactors.hasValue());
+  state.unrollFactors = *maybeUnrollFactors;
+  // Compute 'basis' and 'numInstances' based on 'state.unrollFactors'.
+  state.basis = computeStrides(state.unrollFactors);
+  state.numInstances = computeMaxLinearIndex(state.unrollFactors);
+}
+
+// Computes and returns the linear index of the unrolled vector at
+// 'vectorOffsets' within the vector operand represented by 'state'.
+static int64_t
+getUnrolledOperandLinearIndex(UnrolledOperandState &state,
+                              ArrayRef<int64_t> vectorOffsets,
+                              DenseMap<int64_t, int64_t> &indexMap) {
+  // Compute operand offsets.
+  SmallVector<int64_t, 4> sliceOffsets(state.unrolledShape.size());
+  getMappedElements(indexMap, vectorOffsets, sliceOffsets);
+  // Compute and return linear index of 'sliceOffsets' w.r.t 'state.basis'.
+  return linearize(sliceOffsets, state.basis);
+}
+
+// Returns an unrolled vector at 'vectorOffsets' within the vector operand
+// represented by 'state'. The value is created if not present in 'cache'.
+static Value *getOrCreateUnrolledOperandSlice(
+    Location loc, UnrolledOperandState &state, ArrayRef<int64_t> vectorOffsets,
+    ArrayRef<int64_t> offsets, DenseMap<int64_t, int64_t> &indexMap,
+    SmallVectorImpl<Value *> &cache, PatternRewriter &builder) {
+  // Compute operand offsets.
+  SmallVector<int64_t, 4> sliceOffsets(state.unrolledShape.size());
+  getMappedElements(indexMap, offsets, sliceOffsets);
+  // TODO(b/144845578) Support non-1 strides.
+  SmallVector<int64_t, 4> sliceStrides(state.unrolledShape.size(), 1);
+  // Compute linear index of 'sliceOffsets' w.r.t 'state.basis'.
+  int64_t sliceLinearIndex =
+      getUnrolledOperandLinearIndex(state, vectorOffsets, indexMap);
+  assert(sliceLinearIndex < static_cast<int64_t>(cache.size()));
+  auto *operandSlice = cache[sliceLinearIndex];
+  if (operandSlice == nullptr) {
+    // Initialize 'cache' with slice from 'state.operand'.
+    operandSlice = builder.create<vector::StridedSliceOp>(
+        loc, state.operand, sliceOffsets, state.unrolledShape, sliceStrides);
+    // Store value back to 'cache'.
+    cache[sliceLinearIndex] = operandSlice;
+  }
+  return operandSlice;
+}
+
+//
+// unrollSingleResultStructuredOp
+//
+// Returns a value representing the result of structured operation 'op'
+// with iteration bounds 'iterationBounds' unrolled to 'targetShape'.
+// An iteration space index map argument 'iterationIndexMapList' must be
+// specified, with a map for each structured op input and a single map for the
+// single result. The map at index 'indexMapListResultIndex' in the list must
+// be the single result map.
+//
+// Example:
+//
+//  // Before unrolling
+//
+//   operand0                operand1                operand2
+//       \                      |                      /
+//        -------------------- opA --------------------
+//
+//  // After unrolling by 2
+//
+//   operand0                operand1                operand2
+//   /      \                /      \                /      \
+// slice00  slice01       slice10  slice11        slice20  slice21
+//   \         |            |          |            /          |
+//    -------------------- opA0 --------------------           |
+//             |            |          |                       |
+//              \           |          |                      /
+//               -------------------- opA1 -------------------
+//                          |          |
+//                           \        /
+//                           insertslice
+//                                |
+
+// TODO(andydavis) Generalize this to support structured ops beyond
+// vector ContractionOp, and merge it with 'unrollSingleResultOpMatchingType'
+static Value *unrollSingleResultStructuredOp(
+    Operation *op, ArrayRef<int64_t> iterationBounds,
+    std::vector<DenseMap<int64_t, int64_t>> &iterationIndexMapList,
+    unsigned indexMapListResultIndex, ArrayRef<int64_t> targetShape,
+    PatternRewriter &builder) {
+  auto shapedType = op->getResult(0)->getType().dyn_cast_or_null<ShapedType>();
+  if (!shapedType || !shapedType.hasStaticShape())
+    assert(false && "Expected a statically shaped result type");
+
+  // Compute unroll factors for 'iterationBounds' based on 'targetShape'
+  auto maybeUnrollFactors = shapeRatio(iterationBounds, targetShape);
+  if (!maybeUnrollFactors.hasValue())
+    assert(false && "Failed to compute unroll factors for target shape");
+  auto unrollFactors = *maybeUnrollFactors;
+
+  // Compute unrolled operation state for each mapped operand.
+  unsigned numMaps = iterationIndexMapList.size();
+  SmallVector<UnrolledOperandState, 3> unrolledOperandState(numMaps);
+  assert(op->getNumOperands() >= numMaps);
+  for (unsigned i = 0; i < numMaps; ++i) {
+    getUnrolledOperandState(op->getOperand(i), iterationIndexMapList[i],
+                            targetShape, unrolledOperandState[i]);
+  }
+  // Compute number of total unrolled instances.
+  auto numUnrolledInstances = computeMaxLinearIndex(unrollFactors);
+  auto basis = computeStrides(unrollFactors);
+
+  auto &resultOperandState = unrolledOperandState[indexMapListResultIndex];
+  auto unrolledResultType = VectorType::get(resultOperandState.unrolledShape,
+                                            shapedType.getElementType());
+
+  // Initialize caches for intermediate vector results.
+  std::vector<SmallVector<Value *, 4>> caches(numMaps);
+  for (unsigned i = 0; i < numMaps; ++i) {
+    caches[i].resize(unrolledOperandState[i].numInstances);
+  }
+
+  // Unroll 'numUnrolledInstances' of 'op', storing results in 'caches'.
+  for (unsigned i = 0; i < numUnrolledInstances; ++i) {
+    // De-linearize w.r.t. 'basis'.
+    auto vectorOffsets = delinearize(i, basis);
+    // Convert from unrolled vector-space offsets to element-space offsets.
+    auto offsets = zipMap([](int64_t v1, int64_t v2) { return v1 * v2; },
+                          vectorOffsets, targetShape);
+    // Get cached slice (or create slice) for each operand at 'offsets'.
+    SmallVector<Value *, 3> operands;
+    operands.reserve(numMaps);
+    for (unsigned i = 0; i < numMaps; ++i) {
+      operands.push_back(getOrCreateUnrolledOperandSlice(
+          op->getLoc(), unrolledOperandState[i], vectorOffsets, offsets,
+          iterationIndexMapList[i], caches[i], builder));
+    }
+    // Create op on sliced vector arguments.
+    auto resultVector =
+        cloneOpWithOperandsAndTypes(builder, op->getLoc(), op, operands,
+                                    unrolledResultType)
+            ->getResult(0);
+
+    // Compute linear result index.
+    int64_t resultIndex = getUnrolledOperandLinearIndex(
+        resultOperandState, vectorOffsets,
+        iterationIndexMapList[indexMapListResultIndex]);
+    // Update result cache at 'resultIndex'.
+    caches[indexMapListResultIndex][resultIndex] = resultVector;
+  }
+
+  // Make zero splat into which we will insert results from
+  // 'cache[indexMapListResultIndex]'
+  auto resultVectorType = op->getResult(0)->getType().cast<VectorType>();
+  auto *res = makeSplatZero(op->getLoc(), builder, resultVectorType);
+  SmallVector<int64_t, 4> strides(resultOperandState.unrollFactors.size(), 1);
+  // Insert vector accumulators into output.
+  for (unsigned i = 0; i < resultOperandState.numInstances; ++i) {
+    auto vectorOffsets = delinearize(i, resultOperandState.basis);
+    // Convert from unrolled vector-space offsets to element-space offsets.
+    auto offsets = zipMap([](int64_t v1, int64_t v2) { return v1 * v2; },
+                          vectorOffsets, resultOperandState.unrolledShape);
+    res = builder.create<vector::InsertStridedSliceOp>(
+        op->getLoc(), caches[indexMapListResultIndex][i], res, offsets,
+        strides);
+  }
+
+  return res;
+}
+
 // Entry point for unrolling declarative pattern rewrites.
 // `op` is unrolled to the `targetShape` as follows, for each of its operands:
 //   1. the unrolled type `unrolledVectorType` and number of unrolled instances
@@ -200,6 +427,30 @@ static bool hasShape(Value *v, ArrayRef<int64_t> shape) {
 Value * mlir::vector::unrollSingleResultOpMatchingType(PatternRewriter &builder,
                                                Operation *op,
                                                ArrayRef<int64_t> targetShape) {
+  if (auto contractionOp = dyn_cast<vector::ContractionOp>(op)) {
+    // Get contraction op iteration bounds.
+    SmallVector<int64_t, 6> iterationBounds;
+    contractionOp.getIterationBounds(iterationBounds);
+    assert(iterationBounds.size() == targetShape.size());
+    // Get map from iteration space index to lhs/rhs/result shape index.
+    std::vector<DenseMap<int64_t, int64_t>> iterationIndexMapList;
+    contractionOp.getIterationIndexMap(iterationIndexMapList);
+    if (llvm::size(contractionOp.masks()) == 2) {
+      // Add maps for lhs/rhs vector mask arguments (same lhs/rhs vector shape)
+      iterationIndexMapList.push_back(iterationIndexMapList[0]);
+      iterationIndexMapList.push_back(iterationIndexMapList[1]);
+    }
+    // Unroll 'op' 'iterationBounds' to 'targetShape'.
+    // TODO(andydavis) Use linalg style 'args_in'/'args_out' to partition
+    // 'iterationIndexMapList' instead of 'indexMapListResultIndex'.
+    return unrollSingleResultStructuredOp(
+        op, iterationBounds, iterationIndexMapList,
+        /*indexMapListResultIndex=*/2, targetShape, builder);
+  }
+  // TODO(andydavis) Create trivial iteration bounds and index map for
+  // elementwise operations and call 'unrollSingleResultStructuredOp'. Remove
+  // fakefork/join if possible.
+
   LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE
                        "]: unrollSingleResultOpMatchingType on func:\n");
   LLVM_DEBUG(op->getParentOfType<FuncOp>().print(dbgs()));
@@ -293,18 +544,15 @@ struct ConvertMatchingFakeForkFakeJoinOp : public RewritePattern {
                          "]: ConvertMatchingFakeForkFakeJoinOp on op: "
                       << *op << " in func:\n");
     LLVM_DEBUG(op->getParentOfType<FuncOp>().print(dbgs()));
-    SmallVector<Value *, 4> forwardedOperands;
-    forwardedOperands.append(definingOp->getOperands().begin(),
-                             definingOp->getOperands().end());
-    rewriter.replaceOp(op, forwardedOperands);
+    rewriter.replaceOp(op, definingOp->getOperands());
     return matchSuccess();
   }
 };
 
 // Rewrites a fakeFork, whose (unique) operand is a blockArgument, into multiple
 // vector.strided_slice ops.
-struct ConvertFakeForkFromBlockArgsOp : public RewritePattern {
-  ConvertFakeForkFromBlockArgsOp(MLIRContext *context)
+struct ConvertFakeForkFromBlockArgsOrTransferReadOp : public RewritePattern {
+  ConvertFakeForkFromBlockArgsOrTransferReadOp(MLIRContext *context)
       // low-benefit to kick-in late
       : RewritePattern(kFakeForkOp, 0, context) {}
 
@@ -318,8 +566,9 @@ struct ConvertFakeForkFromBlockArgsOp : public RewritePattern {
       return matchSuccess();
     }
 
-    auto *blockArg = op->getOperand(0);
-    if (!isa<BlockArgument>(blockArg))
+    auto *arg = op->getOperand(0);
+    if (!isa<BlockArgument>(arg) &&
+        !isa<vector::TransferReadOp>(arg->getDefiningOp()))
       return matchFailure();
 
     LLVM_DEBUG(dbgs() << "\n[" DEBUG_TYPE
@@ -333,14 +582,14 @@ struct ConvertFakeForkFromBlockArgsOp : public RewritePattern {
     if (unrollFactors.empty()) {
       // No more unrollFactors, just sanity check + forward the unique operand.
       assert(op->getNumResults() == 1);
-      assert(op->getOperand(0)->getType() == op->getResult(0)->getType());
-      rewriter.replaceOp(op, op->getOperand(0));
+      assert(arg->getType() == op->getResult(0)->getType());
+      rewriter.replaceOp(op, arg);
       return matchSuccess();
     }
 
     // Strides are always 1 for now.
     // TODO(b/144845578) support non-1 strides.
-    auto forkedVectorType = op->getOperand(0)->getType().cast<VectorType>();
+    auto forkedVectorType = arg->getType().cast<VectorType>();
     SmallVector<int64_t, 4> strides(unrollFactors.size(), 1);
     auto nUnrolled = computeMaxLinearIndex(unrollFactors);
     SmallVector<Value *, 4> extractedVectors;
@@ -356,8 +605,8 @@ struct ConvertFakeForkFromBlockArgsOp : public RewritePattern {
                           leadingSize, unrollFactors);
       extractedVectors.push_back(
           rewriter
-              .create<vector::StridedSliceOp>(op->getLoc(), blockArg, offsets,
-                                              sizes, strides)
+              .create<vector::StridedSliceOp>(op->getLoc(), arg, offsets, sizes,
+                                              strides)
               .getResult());
     }
     rewriter.replaceOp(op, extractedVectors);
@@ -365,24 +614,6 @@ struct ConvertFakeForkFromBlockArgsOp : public RewritePattern {
   }
 };
 
-static Value *makeSplatZero(Location loc, PatternRewriter &rewriter,
-                            VectorType vt) {
-  auto t = vt.getElementType();
-  Value *f = nullptr;
-  if (t.isBF16() || t.isF16())
-    f = rewriter.create<ConstantOp>(loc, t, rewriter.getF16FloatAttr(0.0f))
-            .getResult();
-  else if (t.isF32())
-    f = rewriter.create<ConstantOp>(loc, t, rewriter.getF32FloatAttr(0.0f))
-            .getResult();
-  else if (t.isF64())
-    f = rewriter.create<ConstantOp>(loc, t, rewriter.getF64FloatAttr(0.0f))
-            .getResult();
-  if (f)
-    return rewriter.create<SplatOp>(loc, vt, f).getResult();
-  llvm_unreachable("Unsupported type in `makeSplatZero`");
-}
-
 // Rewrites a fakeJoin, whose (unique) operand is a blockArgument, into multiple
 // vector.strided_slice ops.
 struct ConvertFakeJoinOp : public RewritePattern {
@@ -451,8 +682,9 @@ void mlir::populateVectorToVectorConversionPatterns(
     MLIRContext *context, OwningRewritePatternList &patterns,
     ArrayRef<int64_t> coarseVectorShape, ArrayRef<int64_t> fineVectorShape) {
   vector::populateWithGenerated(context, &patterns);
-  patterns.insert<ConvertMatchingFakeForkFakeJoinOp,
-                  ConvertFakeForkFromBlockArgsOp, ConvertFakeJoinOp,
-                  DCEPattern<FakeForkTrait>, DCEPattern<FakeJoinTrait>>(
-      context);
+  vector::populateVectorToVectorCanonicalizationPatterns(patterns, context);
+  patterns
+      .insert<ConvertMatchingFakeForkFakeJoinOp,
+              ConvertFakeForkFromBlockArgsOrTransferReadOp, ConvertFakeJoinOp,
+              DCEPattern<FakeForkTrait>, DCEPattern<FakeJoinTrait>>(context);
 }
diff --git a/third_party/mlir/lib/ExecutionEngine/ExecutionEngine.cpp b/third_party/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
index 58f783824bc..2913c436ad5 100644
--- a/third_party/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/third_party/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -234,9 +234,12 @@ Expected<std::unique_ptr<ExecutionEngine>> ExecutionEngine::create(
     auto objectLayer = std::make_unique<RTDyldObjectLinkingLayer>(
         session, []() { return std::make_unique<SectionMemoryManager>(); });
     auto dataLayout = deserModule->getDataLayout();
+    llvm::orc::JITDylib *mainJD = session.getJITDylibByName("<main>");
+    if (!mainJD)
+      mainJD = &session.createJITDylib("<main>");
 
     // Resolve symbols that are statically linked in the current process.
-    session.getMainJITDylib().addGenerator(
+    mainJD->addGenerator(
         cantFail(DynamicLibrarySearchGenerator::GetForCurrentProcess(
             dataLayout.getGlobalPrefix())));
 
diff --git a/third_party/mlir/lib/IR/AffineMap.cpp b/third_party/mlir/lib/IR/AffineMap.cpp
index e56d0e83f65..98357b1348b 100644
--- a/third_party/mlir/lib/IR/AffineMap.cpp
+++ b/third_party/mlir/lib/IR/AffineMap.cpp
@@ -106,6 +106,20 @@ AffineMap AffineMap::getConstantMap(int64_t val, MLIRContext *context) {
              {getAffineConstantExpr(val, context)});
 }
 
+/// Returns an AffineMap representing a permutation.
+AffineMap AffineMap::getPermutationMap(ArrayRef<unsigned> permutation,
+                                       MLIRContext *context) {
+  assert(!permutation.empty() &&
+         "Cannot create permutation map from empty permutation vector");
+  SmallVector<AffineExpr, 4> affExprs;
+  for (auto index : permutation)
+    affExprs.push_back(getAffineDimExpr(index, context));
+  auto m = std::max_element(permutation.begin(), permutation.end());
+  auto permutationMap = AffineMap::get(*m + 1, 0, affExprs);
+  assert(permutationMap.isPermutation() && "Invalid permutation vector");
+  return permutationMap;
+}
+
 AffineMap AffineMap::getMultiDimIdentityMap(unsigned numDims,
                                             MLIRContext *context) {
   SmallVector<AffineExpr, 4> dimExprs;
diff --git a/third_party/mlir/lib/IR/AsmPrinter.cpp b/third_party/mlir/lib/IR/AsmPrinter.cpp
index 655a776118c..0ea447ed324 100644
--- a/third_party/mlir/lib/IR/AsmPrinter.cpp
+++ b/third_party/mlir/lib/IR/AsmPrinter.cpp
@@ -733,6 +733,19 @@ static void printSymbolReference(StringRef symbolRef, raw_ostream &os) {
   os << '"';
 }
 
+// Print out a valid ElementsAttr that is succinct and can represent any
+// potential shape/type, for use when eliding a large ElementsAttr.
+//
+// We choose to use an opaque ElementsAttr literal with conspicuous content to
+// hopefully alert readers to the fact that this has been elided.
+//
+// Unfortunately, neither of the strings of an opaque ElementsAttr literal will
+// accept the string "elided". The first string must be a registered dialect
+// name and the latter must be a hex constant.
+static void printElidedElementsAttr(raw_ostream &os) {
+  os << R"(opaque<"", "0xDEADBEEF">)";
+}
+
 void ModulePrinter::printAttribute(Attribute attr, bool mayElideType) {
   if (!attr) {
     os << "<<NULL ATTRIBUTE>>";
@@ -836,19 +849,20 @@ void ModulePrinter::printAttribute(Attribute attr, bool mayElideType) {
   }
   case StandardAttributes::OpaqueElements: {
     auto eltsAttr = attr.cast<OpaqueElementsAttr>();
+    if (printerFlags.shouldElideElementsAttr(eltsAttr)) {
+      printElidedElementsAttr(os);
+      break;
+    }
     os << "opaque<\"" << eltsAttr.getDialect()->getNamespace() << "\", ";
-    os << '"' << "0x";
-
-    // Check for large ElementsAttr elision.
-    if (printerFlags.shouldElideElementsAttr(eltsAttr))
-      os << "...";
-    else
-      os << llvm::toHex(eltsAttr.getValue());
-    os << "\">";
+    os << '"' << "0x" << llvm::toHex(eltsAttr.getValue()) << "\">";
     break;
   }
   case StandardAttributes::DenseElements: {
     auto eltsAttr = attr.cast<DenseElementsAttr>();
+    if (printerFlags.shouldElideElementsAttr(eltsAttr)) {
+      printElidedElementsAttr(os);
+      break;
+    }
     os << "dense<";
     printDenseElementsAttr(eltsAttr);
     os << '>';
@@ -856,6 +870,11 @@ void ModulePrinter::printAttribute(Attribute attr, bool mayElideType) {
   }
   case StandardAttributes::SparseElements: {
     auto elementsAttr = attr.cast<SparseElementsAttr>();
+    if (printerFlags.shouldElideElementsAttr(elementsAttr.getIndices()) ||
+        printerFlags.shouldElideElementsAttr(elementsAttr.getValues())) {
+      printElidedElementsAttr(os);
+      break;
+    }
     os << "sparse<";
     printDenseElementsAttr(elementsAttr.getIndices());
     os << ", ";
@@ -916,13 +935,6 @@ void ModulePrinter::printDenseElementsAttr(DenseElementsAttr attr) {
     return;
   }
 
-  // Check for large elements attr elision. We explicitly check *after* splat,
-  // as the splat printing is already elided.
-  if (printerFlags.shouldElideElementsAttr(attr)) {
-    os << "...";
-    return;
-  }
-
   // Special case for degenerate tensors.
   auto numElements = type.getNumElements();
   if (numElements == 0) {
@@ -1074,6 +1086,13 @@ void ModulePrinter::printType(Type type) {
     os << '>';
     return;
   }
+  case StandardTypes::UnrankedMemRef: {
+    auto v = type.cast<UnrankedMemRefType>();
+    os << "memref<*x";
+    printType(v.getElementType());
+    os << '>';
+    return;
+  }
   case StandardTypes::Complex:
     os << "complex<";
     printType(type.cast<ComplexType>().getElementType());
@@ -1097,7 +1116,7 @@ void ModulePrinter::printType(Type type) {
 //===----------------------------------------------------------------------===//
 
 namespace {
-/// This class provides the main specialication of the DialectAsmPrinter that is
+/// This class provides the main specialization of the DialectAsmPrinter that is
 /// used to provide support for print attributes and types. This hooks allows
 /// for dialects to hook into the main ModulePrinter.
 struct CustomDialectAsmPrinter : public DialectAsmPrinter {
@@ -1467,10 +1486,10 @@ public:
   /// SSA values in namesToUse.  This may only be used for IsolatedFromAbove
   /// operations.  If any entry in namesToUse is null, the corresponding
   /// argument name is left alone.
-  void shadowRegionArgs(Region &region, ArrayRef<Value *> namesToUse) override;
+  void shadowRegionArgs(Region &region, ValueRange namesToUse) override;
 
   void printAffineMapOfSSAIds(AffineMapAttr mapAttr,
-                              ArrayRef<Value *> operands) override {
+                              ValueRange operands) override {
     AffineMap map = mapAttr.getValue();
     unsigned numDims = map.getNumDims();
     auto printValueName = [&](unsigned pos, bool isSymbol) {
@@ -1832,8 +1851,7 @@ void OperationPrinter::printValueIDImpl(Value *value, bool printResultNo,
 /// SSA values in namesToUse.  This may only be used for IsolatedFromAbove
 /// operations.  If any entry in namesToUse is null, the corresponding
 /// argument name is left alone.
-void OperationPrinter::shadowRegionArgs(Region &region,
-                                        ArrayRef<Value *> namesToUse) {
+void OperationPrinter::shadowRegionArgs(Region &region, ValueRange namesToUse) {
   assert(!region.empty() && "cannot shadow arguments of an empty region");
   assert(region.front().getNumArguments() == namesToUse.size() &&
          "incorrect number of names passed in");
diff --git a/third_party/mlir/lib/IR/Attributes.cpp b/third_party/mlir/lib/IR/Attributes.cpp
index 5d7a4f08d1e..f2f3d41f980 100644
--- a/third_party/mlir/lib/IR/Attributes.cpp
+++ b/third_party/mlir/lib/IR/Attributes.cpp
@@ -214,35 +214,31 @@ double FloatAttr::getValueAsDouble(APFloat value) {
 }
 
 /// Verify construction invariants.
-static LogicalResult verifyFloatTypeInvariants(llvm::Optional<Location> loc,
+static LogicalResult verifyFloatTypeInvariants(Optional<Location> loc,
                                                Type type) {
-  if (!type.isa<FloatType>()) {
-    if (loc)
-      emitError(*loc, "expected floating point type");
-    return failure();
-  }
+  if (!type.isa<FloatType>())
+    return emitOptionalError(loc, "expected floating point type");
   return success();
 }
 
-LogicalResult FloatAttr::verifyConstructionInvariants(
-    llvm::Optional<Location> loc, MLIRContext *ctx, Type type, double value) {
+LogicalResult FloatAttr::verifyConstructionInvariants(Optional<Location> loc,
+                                                      MLIRContext *ctx,
+                                                      Type type, double value) {
   return verifyFloatTypeInvariants(loc, type);
 }
 
-LogicalResult
-FloatAttr::verifyConstructionInvariants(llvm::Optional<Location> loc,
-                                        MLIRContext *ctx, Type type,
-                                        const APFloat &value) {
+LogicalResult FloatAttr::verifyConstructionInvariants(Optional<Location> loc,
+                                                      MLIRContext *ctx,
+                                                      Type type,
+                                                      const APFloat &value) {
   // Verify that the type is correct.
   if (failed(verifyFloatTypeInvariants(loc, type)))
     return failure();
 
   // Verify that the type semantics match that of the value.
   if (&type.cast<FloatType>().getFloatSemantics() != &value.getSemantics()) {
-    if (loc)
-      emitError(*loc,
-                "FloatAttr type doesn't match the type implied by its value");
-    return failure();
+    return emitOptionalError(
+        loc, "FloatAttr type doesn't match the type implied by its value");
   }
   return success();
 }
@@ -330,14 +326,13 @@ Identifier OpaqueAttr::getDialectNamespace() const {
 StringRef OpaqueAttr::getAttrData() const { return getImpl()->attrData; }
 
 /// Verify the construction of an opaque attribute.
-LogicalResult OpaqueAttr::verifyConstructionInvariants(
-    llvm::Optional<Location> loc, MLIRContext *context, Identifier dialect,
-    StringRef attrData, Type type) {
-  if (!Dialect::isValidNamespace(dialect.strref())) {
-    if (loc)
-      emitError(*loc) << "invalid dialect namespace '" << dialect << "'";
-    return failure();
-  }
+LogicalResult OpaqueAttr::verifyConstructionInvariants(Optional<Location> loc,
+                                                       MLIRContext *context,
+                                                       Identifier dialect,
+                                                       StringRef attrData,
+                                                       Type type) {
+  if (!Dialect::isValidNamespace(dialect.strref()))
+    return emitOptionalError(loc, "invalid dialect namespace '", dialect, "'");
   return success();
 }
 
diff --git a/third_party/mlir/lib/IR/Block.cpp b/third_party/mlir/lib/IR/Block.cpp
index a5013bd86fb..ad68a36f1ee 100644
--- a/third_party/mlir/lib/IR/Block.cpp
+++ b/third_party/mlir/lib/IR/Block.cpp
@@ -122,7 +122,8 @@ bool Block::verifyOpOrder() {
   for (auto &i : *this) {
     // The previous operation must have a smaller order index than the next as
     // it appears earlier in the list.
-    if (prev && prev->orderIndex >= i.orderIndex)
+    if (prev && prev->orderIndex != Operation::kInvalidOrderIdx &&
+        prev->orderIndex >= i.orderIndex)
       return true;
     prev = &i;
   }
@@ -133,11 +134,9 @@ bool Block::verifyOpOrder() {
 void Block::recomputeOpOrder() {
   parentValidOpOrderPair.setInt(true);
 
-  // TODO(riverriddle) Have non-congruent indices to reduce the number of times
-  // an insert invalidates the list.
   unsigned orderIndex = 0;
   for (auto &op : *this)
-    op.orderIndex = orderIndex++;
+    op.orderIndex = (orderIndex += Operation::kOrderStride);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/third_party/mlir/lib/IR/Diagnostics.cpp b/third_party/mlir/lib/IR/Diagnostics.cpp
index b89b44dd3e0..70a802cd856 100644
--- a/third_party/mlir/lib/IR/Diagnostics.cpp
+++ b/third_party/mlir/lib/IR/Diagnostics.cpp
@@ -25,15 +25,22 @@
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Regex.h"
+#include "llvm/Support/Signals.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace mlir;
 using namespace mlir::detail;
 
+static llvm::cl::opt<bool> printStackTraceOnDiagnostic(
+    "mlir-print-stacktrace-on-diagnostic",
+    llvm::cl::desc("When a diagnostic is emitted, also print the stack trace "
+                   "as an attached note"));
+
 //===----------------------------------------------------------------------===//
 // DiagnosticArgument
 //===----------------------------------------------------------------------===//
@@ -285,6 +292,18 @@ static InFlightDiagnostic emitDiag(Location location,
   auto diag = diagEngine.emit(location, severity);
   if (!message.isTriviallyEmpty())
     diag << message;
+
+  // Add the stack trace as a note if necessary.
+  if (printStackTraceOnDiagnostic) {
+    std::string bt;
+    {
+      llvm::raw_string_ostream stream(bt);
+      llvm::sys::PrintStackTrace(stream);
+    }
+    if (!bt.empty())
+      diag.attachNote() << "diagnostic emitted with trace:\n" << bt;
+  }
+
   return diag;
 }
 
@@ -670,7 +689,7 @@ SourceMgrDiagnosticVerifierHandler::SourceMgrDiagnosticVerifierHandler(
   for (unsigned i = 0, e = mgr.getNumBuffers(); i != e; ++i)
     (void)impl->computeExpectedDiags(mgr.getMemoryBuffer(i + 1));
 
-  // Register a handler to verfy the diagnostics.
+  // Register a handler to verify the diagnostics.
   setHandler([&](Diagnostic &diag) {
     // Process the main diagnostics.
     process(diag);
diff --git a/third_party/mlir/lib/IR/MLIRContext.cpp b/third_party/mlir/lib/IR/MLIRContext.cpp
index be904f8da44..d3feca14477 100644
--- a/third_party/mlir/lib/IR/MLIRContext.cpp
+++ b/third_party/mlir/lib/IR/MLIRContext.cpp
@@ -90,8 +90,8 @@ struct BuiltinDialect : public Dialect {
                   UnknownLoc>();
 
     addTypes<ComplexType, FloatType, FunctionType, IndexType, IntegerType,
-             MemRefType, NoneType, OpaqueType, RankedTensorType, TupleType,
-             UnrankedTensorType, VectorType>();
+             MemRefType, UnrankedMemRefType, NoneType, OpaqueType,
+             RankedTensorType, TupleType, UnrankedTensorType, VectorType>();
 
     // TODO: These operations should be moved to a different dialect when they
     // have been fully decoupled from the core.
diff --git a/third_party/mlir/lib/IR/Operation.cpp b/third_party/mlir/lib/IR/Operation.cpp
index d079033e39b..26f20d324f0 100644
--- a/third_party/mlir/lib/IR/Operation.cpp
+++ b/third_party/mlir/lib/IR/Operation.cpp
@@ -286,7 +286,7 @@ void Operation::destroy() {
 /// Return the context this operation is associated with.
 MLIRContext *Operation::getContext() { return location->getContext(); }
 
-/// Return the dialact this operation is associated with, or nullptr if the
+/// Return the dialect this operation is associated with, or nullptr if the
 /// associated dialect is not registered.
 Dialect *Operation::getDialect() {
   if (auto *abstractOp = getAbstractOperation())
@@ -323,6 +323,13 @@ void Operation::replaceUsesOfWith(Value *from, Value *to) {
       operand.set(to);
 }
 
+/// Replace the current operands of this operation with the ones provided in
+/// 'operands'. If the operands list is not resizable, the size of 'operands'
+/// must be less than or equal to the current number of operands.
+void Operation::setOperands(ValueRange operands) {
+  getOperandStorage().setOperands(this, operands);
+}
+
 //===----------------------------------------------------------------------===//
 // Diagnostics
 //===----------------------------------------------------------------------===//
@@ -366,9 +373,12 @@ InFlightDiagnostic Operation::emitRemark(const Twine &message) {
 }
 
 //===----------------------------------------------------------------------===//
-// Other
+// Operation Ordering
 //===----------------------------------------------------------------------===//
 
+constexpr unsigned Operation::kInvalidOrderIdx;
+constexpr unsigned Operation::kOrderStride;
+
 /// Given an operation 'other' that is within the same parent block, return
 /// whether the current operation is before 'other' in the operation list
 /// of the parent block.
@@ -378,12 +388,77 @@ bool Operation::isBeforeInBlock(Operation *other) {
   assert(block && "Operations without parent blocks have no order.");
   assert(other && other->block == block &&
          "Expected other operation to have the same parent block.");
-  // Recompute the parent ordering if necessary.
-  if (!block->isOpOrderValid())
+  // If the order of the block is already invalid, directly recompute the
+  // parent.
+  if (!block->isOpOrderValid()) {
     block->recomputeOpOrder();
+  } else {
+    // Update the order either operation if necessary.
+    updateOrderIfNecessary();
+    other->updateOrderIfNecessary();
+  }
+
   return orderIndex < other->orderIndex;
 }
 
+/// Update the order index of this operation of this operation if necessary,
+/// potentially recomputing the order of the parent block.
+void Operation::updateOrderIfNecessary() {
+  assert(block && "expected valid parent");
+
+  // If the order is valid for this operation there is nothing to do.
+  if (hasValidOrder())
+    return;
+  Operation *blockFront = &block->front();
+  Operation *blockBack = &block->back();
+
+  // This method is expected to only be invoked on blocks with more than one
+  // operation.
+  assert(blockFront != blockBack && "expected more than one operation");
+
+  // If the operation is at the end of the block.
+  if (this == blockBack) {
+    Operation *prevNode = getPrevNode();
+    if (!prevNode->hasValidOrder())
+      return block->recomputeOpOrder();
+
+    // Add the stride to the previous operation.
+    orderIndex = prevNode->orderIndex + kOrderStride;
+    return;
+  }
+
+  // If this is the first operation try to use the next operation to compute the
+  // ordering.
+  if (this == blockFront) {
+    Operation *nextNode = getNextNode();
+    if (!nextNode->hasValidOrder())
+      return block->recomputeOpOrder();
+    // There is no order to give this operation.
+    if (nextNode->orderIndex == 0)
+      return block->recomputeOpOrder();
+
+    // If we can't use the stride, just take the middle value left. This is safe
+    // because we know there is at least one valid index to assign to.
+    if (nextNode->orderIndex <= kOrderStride)
+      orderIndex = (nextNode->orderIndex / 2);
+    else
+      orderIndex = kOrderStride;
+    return;
+  }
+
+  // Otherwise, this operation is between two others. Place this operation in
+  // the middle of the previous and next if possible.
+  Operation *prevNode = getPrevNode(), *nextNode = getNextNode();
+  if (!prevNode->hasValidOrder() || !nextNode->hasValidOrder())
+    return block->recomputeOpOrder();
+  unsigned prevOrder = prevNode->orderIndex, nextOrder = nextNode->orderIndex;
+
+  // Check to see if there is a valid order between the two.
+  if (prevOrder + 1 == nextOrder)
+    return block->recomputeOpOrder();
+  orderIndex = prevOrder + 1 + ((nextOrder - prevOrder) / 2);
+}
+
 //===----------------------------------------------------------------------===//
 // ilist_traits for Operation
 //===----------------------------------------------------------------------===//
@@ -430,8 +505,8 @@ void llvm::ilist_traits<::mlir::Operation>::addNodeToList(Operation *op) {
   assert(!op->getBlock() && "already in a operation block!");
   op->block = getContainingBlock();
 
-  // Invalidate the block ordering.
-  op->block->invalidateOpOrder();
+  // Invalidate the order on the operation.
+  op->orderIndex = Operation::kInvalidOrderIdx;
 }
 
 /// This is a trait method invoked when a operation is removed from a block.
@@ -671,6 +746,67 @@ Operation *Operation::clone() {
   return clone(mapper);
 }
 
+//===----------------------------------------------------------------------===//
+// ValueRange
+//===----------------------------------------------------------------------===//
+
+ValueRange::ValueRange(ArrayRef<Value *> values)
+    : owner(values.data()), count(values.size()) {}
+ValueRange::ValueRange(llvm::iterator_range<OperandIterator> values)
+    : count(llvm::size(values)) {
+  if (count != 0) {
+    auto begin = values.begin();
+    owner = &begin.getObject()->getOpOperand(begin.getIndex());
+  }
+}
+ValueRange::ValueRange(llvm::iterator_range<ResultIterator> values)
+    : count(llvm::size(values)) {
+  if (count != 0) {
+    auto begin = values.begin();
+    owner = &begin.getObject()->getOpResult(begin.getIndex());
+  }
+}
+
+/// Drop the first N elements, and keep M elements.
+ValueRange ValueRange::slice(unsigned n, unsigned m) const {
+  assert(n + m <= size() && "Invalid specifier");
+  OwnerT newOwner;
+  if (OpOperand *operand = owner.dyn_cast<OpOperand *>())
+    newOwner = operand + n;
+  else if (OpResult *result = owner.dyn_cast<OpResult *>())
+    newOwner = result + n;
+  else
+    newOwner = owner.get<Value *const *>() + n;
+  return ValueRange(newOwner, m);
+}
+
+/// Drop the first n elements.
+ValueRange ValueRange::drop_front(unsigned n) const {
+  assert(size() >= n && "Dropping more elements than exist");
+  return slice(n, size() - n);
+}
+
+/// Drop the last n elements.
+ValueRange ValueRange::drop_back(unsigned n) const {
+  assert(size() >= n && "Dropping more elements than exist");
+  return ValueRange(owner, size() - n);
+}
+
+ValueRange::Iterator::Iterator(OwnerT owner, unsigned curIndex)
+    : indexed_accessor_iterator<Iterator, OwnerT, Value *, Value *, Value *>(
+          owner, curIndex) {}
+
+Value *ValueRange::Iterator::operator*() const {
+  // Operands access the held value via 'get'.
+  if (OpOperand *operand = object.dyn_cast<OpOperand *>())
+    return operand[index].get();
+  // An OpResult is a value, so we can return it directly.
+  if (OpResult *result = object.dyn_cast<OpResult *>())
+    return &result[index];
+  // Otherwise, this is a raw value array so just index directly.
+  return object.get<Value *const *>()[index];
+}
+
 //===----------------------------------------------------------------------===//
 // OpState trait class.
 //===----------------------------------------------------------------------===//
diff --git a/third_party/mlir/lib/IR/OperationSupport.cpp b/third_party/mlir/lib/IR/OperationSupport.cpp
index ab665f50c67..2c9a9cce86b 100644
--- a/third_party/mlir/lib/IR/OperationSupport.cpp
+++ b/third_party/mlir/lib/IR/OperationSupport.cpp
@@ -50,6 +50,18 @@ OperationState::OperationState(Location location, StringRef name,
     this->regions.push_back(std::move(r));
 }
 
+void OperationState::addOperands(ValueRange newOperands) {
+  assert(successors.empty() && "Non successor operands should be added first.");
+  operands.append(newOperands.begin(), newOperands.end());
+}
+
+void OperationState::addSuccessor(Block *successor, ValueRange succOperands) {
+  successors.push_back(successor);
+  // Insert a sentinel operand to mark a barrier between successor operands.
+  operands.push_back(nullptr);
+  operands.append(succOperands.begin(), succOperands.end());
+}
+
 Region *OperationState::addRegion() {
   regions.emplace_back(new Region);
   return regions.back().get();
@@ -66,7 +78,7 @@ void OperationState::addRegion(std::unique_ptr<Region> &&region) {
 /// Replace the operands contained in the storage with the ones provided in
 /// 'operands'.
 void detail::OperandStorage::setOperands(Operation *owner,
-                                         ArrayRef<Value *> operands) {
+                                         ValueRange operands) {
   // If the number of operands is less than or equal to the current amount, we
   // can just update in place.
   if (operands.size() <= numOperands) {
diff --git a/third_party/mlir/lib/IR/PatternMatch.cpp b/third_party/mlir/lib/IR/PatternMatch.cpp
index b8ecab97552..3887a0308b0 100644
--- a/third_party/mlir/lib/IR/PatternMatch.cpp
+++ b/third_party/mlir/lib/IR/PatternMatch.cpp
@@ -85,8 +85,8 @@ PatternRewriter::~PatternRewriter() {
 /// clients can specify a list of other nodes that this replacement may make
 /// (perhaps transitively) dead.  If any of those ops are dead, this will
 /// remove them as well.
-void PatternRewriter::replaceOp(Operation *op, ArrayRef<Value *> newValues,
-                                ArrayRef<Value *> valuesToRemoveIfDead) {
+void PatternRewriter::replaceOp(Operation *op, ValueRange newValues,
+                                ValueRange valuesToRemoveIfDead) {
   // Notify the rewriter subclass that we're about to replace this root.
   notifyRootReplaced(op);
 
@@ -114,7 +114,7 @@ void PatternRewriter::eraseOp(Operation *op) {
 /// 'argValues' is used to replace the block arguments of 'source' after
 /// merging.
 void PatternRewriter::mergeBlocks(Block *source, Block *dest,
-                                  ArrayRef<Value *> argValues) {
+                                  ValueRange argValues) {
   assert(llvm::all_of(source->getPredecessors(),
                       [dest](Block *succ) { return succ == dest; }) &&
          "expected 'source' to have no predecessors or only 'dest'");
@@ -141,15 +141,12 @@ Block *PatternRewriter::splitBlock(Block *block, Block::iterator before) {
 /// op and newOp are known to have the same number of results, replace the
 /// uses of op with uses of newOp
 void PatternRewriter::replaceOpWithResultsOfAnotherOp(
-    Operation *op, Operation *newOp, ArrayRef<Value *> valuesToRemoveIfDead) {
+    Operation *op, Operation *newOp, ValueRange valuesToRemoveIfDead) {
   assert(op->getNumResults() == newOp->getNumResults() &&
          "replacement op doesn't match results of original op");
   if (op->getNumResults() == 1)
     return replaceOp(op, newOp->getResult(0), valuesToRemoveIfDead);
-
-  SmallVector<Value *, 8> newResults(newOp->getResults().begin(),
-                                     newOp->getResults().end());
-  return replaceOp(op, newResults, valuesToRemoveIfDead);
+  return replaceOp(op, newOp->getResults(), valuesToRemoveIfDead);
 }
 
 /// Move the blocks that belong to "region" before the given position in
@@ -190,8 +187,8 @@ void PatternRewriter::cloneRegionBefore(Region &region, Block *before) {
 /// The opsToRemoveIfDead list is an optional list of nodes that the rewriter
 /// should remove if they are dead at this point.
 ///
-void PatternRewriter::updatedRootInPlace(
-    Operation *op, ArrayRef<Value *> valuesToRemoveIfDead) {
+void PatternRewriter::updatedRootInPlace(Operation *op,
+                                         ValueRange valuesToRemoveIfDead) {
   // Notify the rewriter subclass that we're about to replace this root.
   notifyRootUpdated(op);
 
diff --git a/third_party/mlir/lib/IR/StandardTypes.cpp b/third_party/mlir/lib/IR/StandardTypes.cpp
index 4347856de36..7054f6d5ca8 100644
--- a/third_party/mlir/lib/IR/StandardTypes.cpp
+++ b/third_party/mlir/lib/IR/StandardTypes.cpp
@@ -61,13 +61,12 @@ bool Type::isIntOrFloat() { return isa<IntegerType>() || isa<FloatType>(); }
 constexpr unsigned IntegerType::kMaxWidth;
 
 /// Verify the construction of an integer type.
-LogicalResult IntegerType::verifyConstructionInvariants(
-    llvm::Optional<Location> loc, MLIRContext *context, unsigned width) {
+LogicalResult IntegerType::verifyConstructionInvariants(Optional<Location> loc,
+                                                        MLIRContext *context,
+                                                        unsigned width) {
   if (width > IntegerType::kMaxWidth) {
-    if (loc)
-      emitError(*loc) << "integer bitwidth is limited to "
-                      << IntegerType::kMaxWidth << " bits";
-    return failure();
+    return emitOptionalError(loc, "integer bitwidth is limited to ",
+                             IntegerType::kMaxWidth, " bits");
   }
   return success();
 }
@@ -213,26 +212,21 @@ VectorType VectorType::getChecked(ArrayRef<int64_t> shape, Type elementType,
                           StandardTypes::Vector, shape, elementType);
 }
 
-LogicalResult VectorType::verifyConstructionInvariants(
-    llvm::Optional<Location> loc, MLIRContext *context, ArrayRef<int64_t> shape,
-    Type elementType) {
-  if (shape.empty()) {
-    if (loc)
-      emitError(*loc, "vector types must have at least one dimension");
-    return failure();
-  }
+LogicalResult VectorType::verifyConstructionInvariants(Optional<Location> loc,
+                                                       MLIRContext *context,
+                                                       ArrayRef<int64_t> shape,
+                                                       Type elementType) {
+  if (shape.empty())
+    return emitOptionalError(loc,
+                             "vector types must have at least one dimension");
 
-  if (!isValidElementType(elementType)) {
-    if (loc)
-      emitError(*loc, "vector elements must be int or float type");
-    return failure();
-  }
+  if (!isValidElementType(elementType))
+    return emitOptionalError(loc, "vector elements must be int or float type");
+
+  if (any_of(shape, [](int64_t i) { return i <= 0; }))
+    return emitOptionalError(loc,
+                             "vector types must have positive constant sizes");
 
-  if (any_of(shape, [](int64_t i) { return i <= 0; })) {
-    if (loc)
-      emitError(*loc, "vector types must have positive constant sizes");
-    return failure();
-  }
   return success();
 }
 
@@ -247,11 +241,8 @@ ArrayRef<int64_t> VectorType::getShape() const { return getImpl()->getShape(); }
 static inline LogicalResult checkTensorElementType(Optional<Location> location,
                                                    MLIRContext *context,
                                                    Type elementType) {
-  if (!TensorType::isValidElementType(elementType)) {
-    if (location)
-      emitError(*location, "invalid tensor element type");
-    return failure();
-  }
+  if (!TensorType::isValidElementType(elementType))
+    return emitOptionalError(location, "invalid tensor element type");
   return success();
 }
 
@@ -273,14 +264,11 @@ RankedTensorType RankedTensorType::getChecked(ArrayRef<int64_t> shape,
 }
 
 LogicalResult RankedTensorType::verifyConstructionInvariants(
-    llvm::Optional<Location> loc, MLIRContext *context, ArrayRef<int64_t> shape,
+    Optional<Location> loc, MLIRContext *context, ArrayRef<int64_t> shape,
     Type elementType) {
   for (int64_t s : shape) {
-    if (s < -1) {
-      if (loc)
-        emitError(*loc, "invalid tensor dimension size");
-      return failure();
-    }
+    if (s < -1)
+      return emitOptionalError(loc, "invalid tensor dimension size");
   }
   return checkTensorElementType(loc, context, elementType);
 }
@@ -305,7 +293,7 @@ UnrankedTensorType UnrankedTensorType::getChecked(Type elementType,
 }
 
 LogicalResult UnrankedTensorType::verifyConstructionInvariants(
-    llvm::Optional<Location> loc, MLIRContext *context, Type elementType) {
+    Optional<Location> loc, MLIRContext *context, Type elementType) {
   return checkTensorElementType(loc, context, elementType);
 }
 
@@ -350,19 +338,14 @@ MemRefType MemRefType::getImpl(ArrayRef<int64_t> shape, Type elementType,
   auto *context = elementType.getContext();
 
   // Check that memref is formed from allowed types.
-  if (!elementType.isIntOrFloat() && !elementType.isa<VectorType>()) {
-    if (location)
-      emitError(*location, "invalid memref element type");
-    return nullptr;
-  }
+  if (!elementType.isIntOrFloat() && !elementType.isa<VectorType>())
+    return emitOptionalError(location, "invalid memref element type"),
+           MemRefType();
 
   for (int64_t s : shape) {
     // Negative sizes are not allowed except for `-1` that means dynamic size.
-    if (s < -1) {
-      if (location)
-        emitError(*location, "invalid memref size");
-      return {};
-    }
+    if (s < -1)
+      return emitOptionalError(location, "invalid memref size"), MemRefType();
   }
 
   // Check that the structure of the composition is valid, i.e. that each
@@ -407,6 +390,37 @@ ArrayRef<AffineMap> MemRefType::getAffineMaps() const {
 
 unsigned MemRefType::getMemorySpace() const { return getImpl()->memorySpace; }
 
+//===----------------------------------------------------------------------===//
+// UnrankedMemRefType
+//===----------------------------------------------------------------------===//
+
+UnrankedMemRefType UnrankedMemRefType::get(Type elementType,
+                                           unsigned memorySpace) {
+  return Base::get(elementType.getContext(), StandardTypes::UnrankedMemRef,
+                   elementType, memorySpace);
+}
+
+UnrankedMemRefType UnrankedMemRefType::getChecked(Type elementType,
+                                                  unsigned memorySpace,
+                                                  Location location) {
+  return Base::getChecked(location, elementType.getContext(),
+                          StandardTypes::UnrankedMemRef, elementType,
+                          memorySpace);
+}
+
+unsigned UnrankedMemRefType::getMemorySpace() const {
+  return getImpl()->memorySpace;
+}
+
+LogicalResult UnrankedMemRefType::verifyConstructionInvariants(
+    llvm::Optional<Location> loc, MLIRContext *context, Type elementType,
+    unsigned memorySpace) {
+  // Check that memref is formed from allowed types.
+  if (!elementType.isIntOrFloat() && !elementType.isa<VectorType>())
+    return emitOptionalError(*loc, "invalid memref element type");
+  return success();
+}
+
 /// Given MemRef `sizes` that are either static or dynamic, returns the
 /// canonical "contiguous" strides AffineExpr. Strides are multiplicative and
 /// once a dynamic dimension is encountered, all canonical strides become
@@ -631,11 +645,8 @@ ComplexType ComplexType::getChecked(Type elementType, Location location) {
 /// Verify the construction of an integer type.
 LogicalResult ComplexType::verifyConstructionInvariants(
     llvm::Optional<Location> loc, MLIRContext *context, Type elementType) {
-  if (!elementType.isa<FloatType>() && !elementType.isa<IntegerType>()) {
-    if (loc)
-      emitError(*loc, "invalid element type for complex");
-    return failure();
-  }
+  if (!elementType.isa<FloatType>() && !elementType.isa<IntegerType>())
+    return emitOptionalError(loc, "invalid element type for complex");
   return success();
 }
 
diff --git a/third_party/mlir/lib/IR/SymbolTable.cpp b/third_party/mlir/lib/IR/SymbolTable.cpp
index 44d56071479..bd8cb59cea7 100644
--- a/third_party/mlir/lib/IR/SymbolTable.cpp
+++ b/third_party/mlir/lib/IR/SymbolTable.cpp
@@ -31,23 +31,24 @@ static bool isPotentiallyUnknownSymbolTable(Operation *op) {
 //===----------------------------------------------------------------------===//
 
 /// Build a symbol table with the symbols within the given operation.
-SymbolTable::SymbolTable(Operation *op) : context(op->getContext()) {
-  assert(op->hasTrait<OpTrait::SymbolTable>() &&
+SymbolTable::SymbolTable(Operation *symbolTableOp)
+    : symbolTableOp(symbolTableOp) {
+  assert(symbolTableOp->hasTrait<OpTrait::SymbolTable>() &&
          "expected operation to have SymbolTable trait");
-  assert(op->getNumRegions() == 1 &&
+  assert(symbolTableOp->getNumRegions() == 1 &&
          "expected operation to have a single region");
+  assert(has_single_element(symbolTableOp->getRegion(0)) &&
+         "expected operation to have a single block");
 
-  for (auto &block : op->getRegion(0)) {
-    for (auto &op : block) {
-      auto nameAttr = op.getAttrOfType<StringAttr>(getSymbolAttrName());
-      if (!nameAttr)
-        continue;
+  for (auto &op : symbolTableOp->getRegion(0).front()) {
+    auto nameAttr = op.getAttrOfType<StringAttr>(getSymbolAttrName());
+    if (!nameAttr)
+      continue;
 
-      auto inserted = symbolTable.insert({nameAttr.getValue(), &op});
-      (void)inserted;
-      assert(inserted.second &&
-             "expected region to contain uniquely named symbol operations");
-    }
+    auto inserted = symbolTable.insert({nameAttr.getValue(), &op});
+    (void)inserted;
+    assert(inserted.second &&
+           "expected region to contain uniquely named symbol operations");
   }
 }
 
@@ -61,18 +62,32 @@ Operation *SymbolTable::lookup(StringRef name) const {
 void SymbolTable::erase(Operation *symbol) {
   auto nameAttr = symbol->getAttrOfType<StringAttr>(getSymbolAttrName());
   assert(nameAttr && "expected valid 'name' attribute");
+  assert(symbol->getParentOp() == symbolTableOp &&
+         "expected this operation to be inside of the operation with this "
+         "SymbolTable");
 
   auto it = symbolTable.find(nameAttr.getValue());
-  if (it != symbolTable.end() && it->second == symbol)
+  if (it != symbolTable.end() && it->second == symbol) {
     symbolTable.erase(it);
+    symbol->erase();
+  }
 }
 
-/// Insert a new symbol into the table, and rename it as necessary to avoid
-/// collisions.
-void SymbolTable::insert(Operation *symbol) {
+/// Insert a new symbol into the table and associated operation, and rename it
+/// as necessary to avoid collisions.
+void SymbolTable::insert(Operation *symbol, Block::iterator insertPt) {
   auto nameAttr = symbol->getAttrOfType<StringAttr>(getSymbolAttrName());
   assert(nameAttr && "expected valid 'name' attribute");
 
+  auto &body = symbolTableOp->getRegion(0).front();
+  if (insertPt == Block::iterator() || insertPt == body.end())
+    insertPt = Block::iterator(body.getTerminator());
+
+  assert(insertPt->getParentOp() == symbolTableOp &&
+         "expected insertPt to be in the associated module operation");
+
+  body.getOperations().insert(insertPt, symbol);
+
   // Add this symbol to the symbol table, uniquing the name if a conflict is
   // detected.
   if (symbolTable.insert({nameAttr.getValue(), symbol}).second)
@@ -89,7 +104,8 @@ void SymbolTable::insert(Operation *symbol) {
     nameBuffer += '_';
     nameBuffer += std::to_string(uniquingCounter++);
   } while (!symbolTable.insert({nameBuffer, symbol}).second);
-  symbol->setAttr(getSymbolAttrName(), StringAttr::get(nameBuffer, context));
+  symbol->setAttr(getSymbolAttrName(),
+                  StringAttr::get(nameBuffer, symbolTableOp->getContext()));
 }
 
 /// Returns the operation registered with the given symbol name with the
@@ -136,6 +152,9 @@ LogicalResult OpTrait::impl::verifySymbolTable(Operation *op) {
   if (op->getNumRegions() != 1)
     return op->emitOpError()
            << "Operations with a 'SymbolTable' must have exactly one region";
+  if (!has_single_element(op->getRegion(0)))
+    return op->emitOpError()
+           << "Operations with a 'SymbolTable' must have exactly one block";
 
   // Check that all symbols are uniquely named within child regions.
   llvm::StringMap<Location> nameToOrigLoc;
@@ -264,7 +283,7 @@ static Optional<WalkResult> walkSymbolUses(
         if (walkSymbolRefs(&op, callback).wasInterrupted())
           return WalkResult::interrupt();
 
-        // If this operation has regions, and it as well as its dialect arent't
+        // If this operation has regions, and it as well as its dialect aren't
         // registered then conservatively fail. The operation may define a
         // symbol table, so we can't opaquely know if we should traverse to find
         // nested uses.
diff --git a/third_party/mlir/lib/IR/TypeDetail.h b/third_party/mlir/lib/IR/TypeDetail.h
index 1cf0f5a7190..5bcb0b61aa5 100644
--- a/third_party/mlir/lib/IR/TypeDetail.h
+++ b/third_party/mlir/lib/IR/TypeDetail.h
@@ -119,8 +119,8 @@ struct FunctionTypeStorage : public TypeStorage {
 
 /// Shaped Type Storage.
 struct ShapedTypeStorage : public TypeStorage {
-  ShapedTypeStorage(Type elementType, unsigned subclassData = 0)
-      : TypeStorage(subclassData), elementType(elementType) {}
+  ShapedTypeStorage(Type elementTy, unsigned subclassData = 0)
+      : TypeStorage(subclassData), elementType(elementTy) {}
 
   /// The hash key used for uniquing.
   using KeyTy = Type;
@@ -252,6 +252,31 @@ struct MemRefTypeStorage : public ShapedTypeStorage {
   const unsigned memorySpace;
 };
 
+/// Unranked MemRef is a MemRef with unknown rank.
+/// Only element type and memory space are known
+struct UnrankedMemRefTypeStorage : public ShapedTypeStorage {
+
+  UnrankedMemRefTypeStorage(Type elementTy, const unsigned memorySpace)
+      : ShapedTypeStorage(elementTy), memorySpace(memorySpace) {}
+
+  /// The hash key used for uniquing.
+  using KeyTy = std::tuple<Type, unsigned>;
+  bool operator==(const KeyTy &key) const {
+    return key == KeyTy(elementType, memorySpace);
+  }
+
+  /// Construction.
+  static UnrankedMemRefTypeStorage *construct(TypeStorageAllocator &allocator,
+                                              const KeyTy &key) {
+
+    // Initialize the memory using placement new.
+    return new (allocator.allocate<UnrankedMemRefTypeStorage>())
+        UnrankedMemRefTypeStorage(std::get<0>(key), std::get<1>(key));
+  }
+  /// Memory space in which data referenced by memref resides.
+  const unsigned memorySpace;
+};
+
 /// Complex Type Storage.
 struct ComplexTypeStorage : public TypeStorage {
   ComplexTypeStorage(Type elementType) : elementType(elementType) {}
diff --git a/third_party/mlir/lib/IR/Types.cpp b/third_party/mlir/lib/IR/Types.cpp
index f1a6d8f11c9..23c80c96aad 100644
--- a/third_party/mlir/lib/IR/Types.cpp
+++ b/third_party/mlir/lib/IR/Types.cpp
@@ -80,13 +80,11 @@ Identifier OpaqueType::getDialectNamespace() const {
 StringRef OpaqueType::getTypeData() const { return getImpl()->typeData; }
 
 /// Verify the construction of an opaque type.
-LogicalResult OpaqueType::verifyConstructionInvariants(
-    llvm::Optional<Location> loc, MLIRContext *context, Identifier dialect,
-    StringRef typeData) {
-  if (!Dialect::isValidNamespace(dialect.strref())) {
-    if (loc)
-      emitError(*loc) << "invalid dialect namespace '" << dialect << "'";
-    return failure();
-  }
+LogicalResult OpaqueType::verifyConstructionInvariants(Optional<Location> loc,
+                                                       MLIRContext *context,
+                                                       Identifier dialect,
+                                                       StringRef typeData) {
+  if (!Dialect::isValidNamespace(dialect.strref()))
+    return emitOptionalError(loc, "invalid dialect namespace '", dialect, "'");
   return success();
 }
diff --git a/third_party/mlir/lib/Parser/Lexer.cpp b/third_party/mlir/lib/Parser/Lexer.cpp
index e8034d82c15..29104c82e23 100644
--- a/third_party/mlir/lib/Parser/Lexer.cpp
+++ b/third_party/mlir/lib/Parser/Lexer.cpp
@@ -315,7 +315,7 @@ Token Lexer::lexNumber(const char *tokStart) {
 
 /// Lex an identifier that starts with a prefix followed by suffix-id.
 ///
-///   affine-map-id ::= `#` suffix-id
+///   attribute-id  ::= `#` suffix-id
 ///   ssa-id        ::= '%' suffix-id
 ///   block-id      ::= '^' suffix-id
 ///   type-id       ::= '!' suffix-id
diff --git a/third_party/mlir/lib/Parser/Parser.cpp b/third_party/mlir/lib/Parser/Parser.cpp
index ccd5e17bda2..ddc8d0191f5 100644
--- a/third_party/mlir/lib/Parser/Parser.cpp
+++ b/third_party/mlir/lib/Parser/Parser.cpp
@@ -324,7 +324,7 @@ public:
 
   /// Parse an optional trailing location.
   ///
-  ///   trailing-location     ::= location?
+  ///   trailing-location     ::= (`loc` `(` location `)`)?
   ///
   ParseResult parseOptionalTrailingLocation(Location &loc) {
     // If there is a 'loc' we parse a trailing location.
@@ -1054,8 +1054,13 @@ ParseResult Parser::parseStridedLayout(int64_t &offset,
 
 /// Parse a memref type.
 ///
-///   memref-type ::= `memref` `<` dimension-list-ranked type
-///                   (`,` semi-affine-map-composition)? (`,` memory-space)? `>`
+///   memref-type ::= ranked-memref-type | unranked-memref-type
+///
+///   ranked-memref-type ::= `memref` `<` dimension-list-ranked type
+///                          (`,` semi-affine-map-composition)? (`,`
+///                          memory-space)? `>`
+///
+///   unranked-memref-type ::= `memref` `<*x` type (`,` memory-space)? `>`
 ///
 ///   semi-affine-map-composition ::= (semi-affine-map `,` )* semi-affine-map
 ///   memory-space ::= integer-literal /* | TODO: address-space-id */
@@ -1066,9 +1071,20 @@ Type Parser::parseMemRefType() {
   if (parseToken(Token::less, "expected '<' in memref type"))
     return nullptr;
 
+  bool isUnranked;
   SmallVector<int64_t, 4> dimensions;
-  if (parseDimensionListRanked(dimensions))
-    return nullptr;
+
+  if (consumeIf(Token::star)) {
+    // This is an unranked memref type.
+    isUnranked = true;
+    if (parseXInDimensionList())
+      return nullptr;
+
+  } else {
+    isUnranked = false;
+    if (parseDimensionListRanked(dimensions))
+      return nullptr;
+  }
 
   // Parse the element type.
   auto typeLoc = getToken().getLoc();
@@ -1093,6 +1109,8 @@ Type Parser::parseMemRefType() {
       consumeToken(Token::integer);
       parsedMemorySpace = true;
     } else {
+      if (isUnranked)
+        return emitError("cannot have affine map for unranked memref type");
       if (parsedMemorySpace)
         return emitError("expected memory space to be last in memref type");
       if (getToken().is(Token::kw_offset)) {
@@ -1131,6 +1149,10 @@ Type Parser::parseMemRefType() {
       return nullptr;
   }
 
+  if (isUnranked)
+    return UnrankedMemRefType::getChecked(elementType, memorySpace,
+                                          getEncodedSourceLocation(typeLoc));
+
   return MemRefType::getChecked(dimensions, elementType, affineMapComposition,
                                 memorySpace, getEncodedSourceLocation(typeLoc));
 }
@@ -3519,10 +3541,14 @@ Value *OperationParser::createForwardRefPlaceholder(SMLoc loc, Type type) {
 
 /// Parse an operation.
 ///
-///  operation ::=
-///    operation-result? string '(' ssa-use-list? ')' attribute-dict?
-///    `:` function-type trailing-location?
-///  operation-result ::= ssa-id ((`:` integer-literal) | (`,` ssa-id)*) `=`
+///  operation         ::= op-result-list?
+///                        (generic-operation | custom-operation)
+///                        trailing-location?
+///  generic-operation ::= string-literal '(' ssa-use-list? ')' attribute-dict?
+///                        `:` function-type
+///  custom-operation  ::= bare-id custom-operation-format
+///  op-result-list    ::= op-result (`,` op-result)* `=`
+///  op-result         ::= ssa-id (`:` integer-literal)
 ///
 ParseResult OperationParser::parseOperation() {
   auto loc = getToken().getLoc();
diff --git a/third_party/mlir/lib/Pass/IRPrinting.cpp b/third_party/mlir/lib/Pass/IRPrinting.cpp
index 7cf32f89da2..8e172156f05 100644
--- a/third_party/mlir/lib/Pass/IRPrinting.cpp
+++ b/third_party/mlir/lib/Pass/IRPrinting.cpp
@@ -20,26 +20,74 @@
 #include "mlir/Pass/PassManager.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/SHA1.h"
 
 using namespace mlir;
 using namespace mlir::detail;
 
 namespace {
+//===----------------------------------------------------------------------===//
+// OperationFingerPrint
+//===----------------------------------------------------------------------===//
+
+/// A unique fingerprint for a specific operation, and all of it's internal
+/// operations.
+class OperationFingerPrint {
+public:
+  OperationFingerPrint(Operation *topOp) {
+    llvm::SHA1 hasher;
+
+    // Hash each of the operations based upon their mutable bits:
+    topOp->walk([&](Operation *op) {
+      //   - Operation pointer
+      addDataToHash(hasher, op);
+      //   - Attributes
+      addDataToHash(hasher,
+                    op->getAttrList().getDictionary().getAsOpaquePointer());
+      //   - Blocks in Regions
+      for (Region &region : op->getRegions()) {
+        for (Block &block : region) {
+          addDataToHash(hasher, &block);
+          for (BlockArgument *arg : block.getArguments())
+            addDataToHash(hasher, arg);
+        }
+      }
+      //   - Location
+      addDataToHash(hasher, op->getLoc().getAsOpaquePointer());
+      //   - Operands
+      for (Value *operand : op->getOperands())
+        addDataToHash(hasher, operand);
+      //   - Successors
+      for (unsigned i = 0, e = op->getNumSuccessors(); i != e; ++i)
+        addDataToHash(hasher, op->getSuccessor(i));
+    });
+    hash = hasher.result();
+  }
+
+  bool operator==(const OperationFingerPrint &other) const {
+    return hash == other.hash;
+  }
+  bool operator!=(const OperationFingerPrint &other) const {
+    return !(*this == other);
+  }
+
+private:
+  template <typename T> void addDataToHash(llvm::SHA1 &hasher, const T &data) {
+    hasher.update(
+        ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(&data), sizeof(T)));
+  }
+
+  SmallString<20> hash;
+};
+
+//===----------------------------------------------------------------------===//
+// IRPrinter
+//===----------------------------------------------------------------------===//
+
 class IRPrinterInstrumentation : public PassInstrumentation {
 public:
-  /// A filter function to decide if the given pass should be printed. Returns
-  /// true if the pass should be printed, false otherwise.
-  using ShouldPrintFn = std::function<bool(Pass *)>;
-
-  IRPrinterInstrumentation(ShouldPrintFn &&shouldPrintBeforePass,
-                           ShouldPrintFn &&shouldPrintAfterPass,
-                           bool printModuleScope, raw_ostream &out)
-      : shouldPrintBeforePass(shouldPrintBeforePass),
-        shouldPrintAfterPass(shouldPrintAfterPass),
-        printModuleScope(printModuleScope), out(out) {
-    assert((shouldPrintBeforePass || shouldPrintAfterPass) &&
-           "expected atleast one valid filter function");
-  }
+  IRPrinterInstrumentation(std::unique_ptr<PassManager::IRPrinterConfig> config)
+      : config(std::move(config)) {}
 
 private:
   /// Instrumentation hooks.
@@ -47,14 +95,13 @@ private:
   void runAfterPass(Pass *pass, Operation *op) override;
   void runAfterPassFailed(Pass *pass, Operation *op) override;
 
-  /// Filter functions for before and after pass execution.
-  ShouldPrintFn shouldPrintBeforePass, shouldPrintAfterPass;
+  /// Configuration to use.
+  std::unique_ptr<PassManager::IRPrinterConfig> config;
 
-  /// Flag to toggle if the printer should always print at module scope.
-  bool printModuleScope;
-
-  /// The stream to output to.
-  raw_ostream &out;
+  /// The following is a set of fingerprints for operations that are currently
+  /// being operated on in a pass. This field is only used when the
+  /// configuration asked for change detection.
+  DenseMap<Pass *, OperationFingerPrint> beforePassFingerPrints;
 };
 } // end anonymous namespace
 
@@ -96,45 +143,138 @@ static void printIR(Operation *op, bool printModuleScope, raw_ostream &out,
 
 /// Instrumentation hooks.
 void IRPrinterInstrumentation::runBeforePass(Pass *pass, Operation *op) {
-  // Skip hidden passes and passes that the user filtered out.
-  if (!shouldPrintBeforePass || isHiddenPass(pass) ||
-      !shouldPrintBeforePass(pass))
+  if (isHiddenPass(pass))
     return;
-  out << formatv("*** IR Dump Before {0} ***", pass->getName());
-  printIR(op, printModuleScope, out, OpPrintingFlags());
-  out << "\n\n";
+  // If the config asked to detect changes, record the current fingerprint.
+  if (config->shouldPrintAfterOnlyOnChange())
+    beforePassFingerPrints.try_emplace(pass, op);
+
+  config->printBeforeIfEnabled(pass, op, [&](raw_ostream &out) {
+    out << formatv("*** IR Dump Before {0} ***", pass->getName());
+    printIR(op, config->shouldPrintAtModuleScope(), out, OpPrintingFlags());
+    out << "\n\n";
+  });
 }
 
 void IRPrinterInstrumentation::runAfterPass(Pass *pass, Operation *op) {
-  // Skip hidden passes and passes that the user filtered out.
-  if (!shouldPrintAfterPass || isHiddenPass(pass) ||
-      !shouldPrintAfterPass(pass))
+  if (isHiddenPass(pass))
     return;
-  out << formatv("*** IR Dump After {0} ***", pass->getName());
-  printIR(op, printModuleScope, out, OpPrintingFlags());
-  out << "\n\n";
+  // If the config asked to detect changes, compare the current fingerprint with
+  // the previous.
+  if (config->shouldPrintAfterOnlyOnChange()) {
+    auto fingerPrintIt = beforePassFingerPrints.find(pass);
+    assert(fingerPrintIt != beforePassFingerPrints.end() &&
+           "expected valid fingerprint");
+    // If the fingerprints are the same, we don't print the IR.
+    if (fingerPrintIt->second == OperationFingerPrint(op)) {
+      beforePassFingerPrints.erase(fingerPrintIt);
+      return;
+    }
+    beforePassFingerPrints.erase(fingerPrintIt);
+  }
+
+  config->printAfterIfEnabled(pass, op, [&](raw_ostream &out) {
+    out << formatv("*** IR Dump After {0} ***", pass->getName());
+    printIR(op, config->shouldPrintAtModuleScope(), out, OpPrintingFlags());
+    out << "\n\n";
+  });
 }
 
 void IRPrinterInstrumentation::runAfterPassFailed(Pass *pass, Operation *op) {
-  // Skip adaptor passes and passes that the user filtered out.
-  if (!shouldPrintAfterPass || isAdaptorPass(pass) ||
-      !shouldPrintAfterPass(pass))
+  if (isAdaptorPass(pass))
     return;
-  out << formatv("*** IR Dump After {0} Failed ***", pass->getName());
-  printIR(op, printModuleScope, out, OpPrintingFlags().printGenericOpForm());
-  out << "\n\n";
+  if (config->shouldPrintAfterOnlyOnChange())
+    beforePassFingerPrints.erase(pass);
+
+  config->printAfterIfEnabled(pass, op, [&](raw_ostream &out) {
+    out << formatv("*** IR Dump After {0} Failed ***", pass->getName());
+    printIR(op, config->shouldPrintAtModuleScope(), out,
+            OpPrintingFlags().printGenericOpForm());
+    out << "\n\n";
+  });
+}
+
+//===----------------------------------------------------------------------===//
+// IRPrinterConfig
+//===----------------------------------------------------------------------===//
+
+/// Initialize the configuration.
+PassManager::IRPrinterConfig::IRPrinterConfig(bool printModuleScope,
+                                              bool printAfterOnlyOnChange)
+    : printModuleScope(printModuleScope),
+      printAfterOnlyOnChange(printAfterOnlyOnChange) {}
+PassManager::IRPrinterConfig::~IRPrinterConfig() {}
+
+/// A hook that may be overridden by a derived config that checks if the IR
+/// of 'operation' should be dumped *before* the pass 'pass' has been
+/// executed. If the IR should be dumped, 'printCallback' should be invoked
+/// with the stream to dump into.
+void PassManager::IRPrinterConfig::printBeforeIfEnabled(
+    Pass *pass, Operation *operation, PrintCallbackFn printCallback) {
+  // By default, never print.
+}
+
+/// A hook that may be overridden by a derived config that checks if the IR
+/// of 'operation' should be dumped *after* the pass 'pass' has been
+/// executed. If the IR should be dumped, 'printCallback' should be invoked
+/// with the stream to dump into.
+void PassManager::IRPrinterConfig::printAfterIfEnabled(
+    Pass *pass, Operation *operation, PrintCallbackFn printCallback) {
+  // By default, never print.
 }
 
 //===----------------------------------------------------------------------===//
 // PassManager
 //===----------------------------------------------------------------------===//
 
+namespace {
+/// Simple wrapper config that allows for the simpler interface defined above.
+struct BasicIRPrinterConfig : public PassManager::IRPrinterConfig {
+  BasicIRPrinterConfig(
+      std::function<bool(Pass *, Operation *)> shouldPrintBeforePass,
+      std::function<bool(Pass *, Operation *)> shouldPrintAfterPass,
+      bool printModuleScope, bool printAfterOnlyOnChange, raw_ostream &out)
+      : IRPrinterConfig(printModuleScope, printAfterOnlyOnChange),
+        shouldPrintBeforePass(shouldPrintBeforePass),
+        shouldPrintAfterPass(shouldPrintAfterPass), out(out) {
+    assert((shouldPrintBeforePass || shouldPrintAfterPass) &&
+           "expected at least one valid filter function");
+  }
+
+  void printBeforeIfEnabled(Pass *pass, Operation *operation,
+                            PrintCallbackFn printCallback) final {
+    if (shouldPrintBeforePass && shouldPrintBeforePass(pass, operation))
+      printCallback(out);
+  }
+
+  void printAfterIfEnabled(Pass *pass, Operation *operation,
+                           PrintCallbackFn printCallback) final {
+    if (shouldPrintAfterPass && shouldPrintAfterPass(pass, operation))
+      printCallback(out);
+  }
+
+  /// Filter functions for before and after pass execution.
+  std::function<bool(Pass *, Operation *)> shouldPrintBeforePass;
+  std::function<bool(Pass *, Operation *)> shouldPrintAfterPass;
+
+  /// The stream to output to.
+  raw_ostream &out;
+};
+} // end anonymous namespace
+
+/// Add an instrumentation to print the IR before and after pass execution,
+/// using the provided configuration.
+void PassManager::enableIRPrinting(std::unique_ptr<IRPrinterConfig> config) {
+  addInstrumentation(
+      std::make_unique<IRPrinterInstrumentation>(std::move(config)));
+}
+
 /// Add an instrumentation to print the IR before and after pass execution.
 void PassManager::enableIRPrinting(
-    std::function<bool(Pass *)> shouldPrintBeforePass,
-    std::function<bool(Pass *)> shouldPrintAfterPass, bool printModuleScope,
-    raw_ostream &out) {
-  addInstrumentation(std::make_unique<IRPrinterInstrumentation>(
+    std::function<bool(Pass *, Operation *)> shouldPrintBeforePass,
+    std::function<bool(Pass *, Operation *)> shouldPrintAfterPass,
+    bool printModuleScope, bool printAfterOnlyOnChange, raw_ostream &out) {
+  enableIRPrinting(std::make_unique<BasicIRPrinterConfig>(
       std::move(shouldPrintBeforePass), std::move(shouldPrintAfterPass),
-      printModuleScope, out));
+      printModuleScope, printAfterOnlyOnChange, out));
 }
diff --git a/third_party/mlir/lib/Pass/Pass.cpp b/third_party/mlir/lib/Pass/Pass.cpp
index 6d8e230eeec..fc1ad5bb939 100644
--- a/third_party/mlir/lib/Pass/Pass.cpp
+++ b/third_party/mlir/lib/Pass/Pass.cpp
@@ -216,6 +216,11 @@ OpPassManager &OpPassManager::operator=(const OpPassManager &rhs) {
 
 OpPassManager::~OpPassManager() {}
 
+OpPassManager::pass_iterator OpPassManager::begin() {
+  return impl->passes.begin();
+}
+OpPassManager::pass_iterator OpPassManager::end() { return impl->passes.end(); }
+
 /// Run all of the passes in this manager over the current operation.
 LogicalResult OpPassManager::run(Operation *op, AnalysisManager am) {
   // Run each of the held passes.
@@ -341,6 +346,17 @@ void OpToOpPassAdaptorBase::mergeInto(OpToOpPassAdaptorBase &rhs) {
                        });
 }
 
+/// Returns the adaptor pass name.
+std::string OpToOpPassAdaptorBase::getName() {
+  std::string name = "Pipeline Collection : [";
+  llvm::raw_string_ostream os(name);
+  interleaveComma(getPassManagers(), os, [&](OpPassManager &pm) {
+    os << '\'' << pm.getOpName() << '\'';
+  });
+  os << ']';
+  return os.str();
+}
+
 OpToOpPassAdaptor::OpToOpPassAdaptor(OpPassManager &&mgr)
     : OpToOpPassAdaptorBase(std::move(mgr)) {}
 
@@ -560,9 +576,15 @@ LogicalResult PassManager::run(ModuleOp module) {
 
   // If reproducer generation is enabled, run the pass manager with crash
   // handling enabled.
-  if (crashReproducerFileName)
-    return runWithCrashRecovery(*this, am, module, *crashReproducerFileName);
-  return OpPassManager::run(module, am);
+  LogicalResult result =
+      crashReproducerFileName
+          ? runWithCrashRecovery(*this, am, module, *crashReproducerFileName)
+          : OpPassManager::run(module, am);
+
+  // Dump all of the pass statistics if necessary.
+  if (passStatisticsMode)
+    dumpStatistics();
+  return result;
 }
 
 /// Disable support for multi-threading within the pass manager.
diff --git a/third_party/mlir/lib/Pass/PassDetail.h b/third_party/mlir/lib/Pass/PassDetail.h
index 29bb04d3ad2..d0a2ea63e7d 100644
--- a/third_party/mlir/lib/Pass/PassDetail.h
+++ b/third_party/mlir/lib/Pass/PassDetail.h
@@ -48,6 +48,9 @@ public:
   /// Returns the pass managers held by this adaptor.
   MutableArrayRef<OpPassManager> getPassManagers() { return mgrs; }
 
+  /// Returns the adaptor pass name.
+  std::string getName();
+
 protected:
   // A set of adaptors to run.
   SmallVector<OpPassManager, 1> mgrs;
@@ -75,6 +78,11 @@ public:
   /// Run the held pipeline over all operations.
   void runOnOperation() override;
 
+  /// Return the async pass managers held by this parallel adaptor.
+  MutableArrayRef<SmallVector<OpPassManager, 1>> getParallelPassManagers() {
+    return asyncExecutors;
+  }
+
 private:
   // A set of executors, cloned from the main executor, that run asynchronously
   // on different threads.
diff --git a/third_party/mlir/lib/Pass/PassManagerOptions.cpp b/third_party/mlir/lib/Pass/PassManagerOptions.cpp
index 58eb35c7f6a..932bf98f61e 100644
--- a/third_party/mlir/lib/Pass/PassManagerOptions.cpp
+++ b/third_party/mlir/lib/Pass/PassManagerOptions.cpp
@@ -54,6 +54,11 @@ struct PassManagerOptions {
   llvm::cl::opt<bool> printAfterAll{"print-ir-after-all",
                                     llvm::cl::desc("Print IR after each pass"),
                                     llvm::cl::init(false)};
+  llvm::cl::opt<bool> printAfterChange{
+      "print-ir-after-change",
+      llvm::cl::desc(
+          "When printing the IR after a pass, only print if the IR changed"),
+      llvm::cl::init(false)};
   llvm::cl::opt<bool> printModuleScope{
       "print-ir-module-scope",
       llvm::cl::desc("When printing IR for print-ir-[before|after]{-all} "
@@ -69,14 +74,30 @@ struct PassManagerOptions {
   llvm::cl::opt<bool> passTiming{
       "pass-timing",
       llvm::cl::desc("Display the execution times of each pass")};
-  llvm::cl::opt<PassTimingDisplayMode> passTimingDisplayMode{
+  llvm::cl::opt<PassDisplayMode> passTimingDisplayMode{
       "pass-timing-display",
       llvm::cl::desc("Display method for pass timing data"),
-      llvm::cl::init(PassTimingDisplayMode::Pipeline),
+      llvm::cl::init(PassDisplayMode::Pipeline),
       llvm::cl::values(
-          clEnumValN(PassTimingDisplayMode::List, "list",
+          clEnumValN(PassDisplayMode::List, "list",
                      "display the results in a list sorted by total time"),
-          clEnumValN(PassTimingDisplayMode::Pipeline, "pipeline",
+          clEnumValN(PassDisplayMode::Pipeline, "pipeline",
+                     "display the results with a nested pipeline view"))};
+
+  //===--------------------------------------------------------------------===//
+  // Pass Statistics
+  //===--------------------------------------------------------------------===//
+  llvm::cl::opt<bool> passStatistics{
+      "pass-statistics", llvm::cl::desc("Display the statistics of each pass")};
+  llvm::cl::opt<PassDisplayMode> passStatisticsDisplayMode{
+      "pass-statistics-display",
+      llvm::cl::desc("Display method for pass statistics"),
+      llvm::cl::init(PassDisplayMode::Pipeline),
+      llvm::cl::values(
+          clEnumValN(
+              PassDisplayMode::List, "list",
+              "display the results in a merged list sorted by pass name"),
+          clEnumValN(PassDisplayMode::Pipeline, "pipeline",
                      "display the results with a nested pipeline view"))};
 
   /// Add a pass timing instrumentation if enabled by 'pass-timing' flags.
@@ -88,16 +109,17 @@ static llvm::ManagedStatic<llvm::Optional<PassManagerOptions>> options;
 
 /// Add an IR printing instrumentation if enabled by any 'print-ir' flags.
 void PassManagerOptions::addPrinterInstrumentation(PassManager &pm) {
-  std::function<bool(Pass *)> shouldPrintBeforePass, shouldPrintAfterPass;
+  std::function<bool(Pass *, Operation *)> shouldPrintBeforePass;
+  std::function<bool(Pass *, Operation *)> shouldPrintAfterPass;
 
   // Handle print-before.
   if (printBeforeAll) {
     // If we are printing before all, then just return true for the filter.
-    shouldPrintBeforePass = [](Pass *) { return true; };
+    shouldPrintBeforePass = [](Pass *, Operation *) { return true; };
   } else if (printBefore.hasAnyOccurrences()) {
     // Otherwise if there are specific passes to print before, then check to see
     // if the pass info for the current pass is included in the list.
-    shouldPrintBeforePass = [&](Pass *pass) {
+    shouldPrintBeforePass = [&](Pass *pass, Operation *) {
       auto *passInfo = pass->lookupPassInfo();
       return passInfo && printBefore.contains(passInfo);
     };
@@ -106,11 +128,11 @@ void PassManagerOptions::addPrinterInstrumentation(PassManager &pm) {
   // Handle print-after.
   if (printAfterAll) {
     // If we are printing after all, then just return true for the filter.
-    shouldPrintAfterPass = [](Pass *) { return true; };
+    shouldPrintAfterPass = [](Pass *, Operation *) { return true; };
   } else if (printAfter.hasAnyOccurrences()) {
     // Otherwise if there are specific passes to print after, then check to see
     // if the pass info for the current pass is included in the list.
-    shouldPrintAfterPass = [&](Pass *pass) {
+    shouldPrintAfterPass = [&](Pass *pass, Operation *) {
       auto *passInfo = pass->lookupPassInfo();
       return passInfo && printAfter.contains(passInfo);
     };
@@ -122,7 +144,7 @@ void PassManagerOptions::addPrinterInstrumentation(PassManager &pm) {
 
   // Otherwise, add the IR printing instrumentation.
   pm.enableIRPrinting(shouldPrintBeforePass, shouldPrintAfterPass,
-                      printModuleScope, llvm::errs());
+                      printModuleScope, printAfterChange, llvm::errs());
 }
 
 /// Add a pass timing instrumentation if enabled by 'pass-timing' flags.
@@ -146,6 +168,10 @@ void mlir::applyPassManagerCLOptions(PassManager &pm) {
   if ((*options)->disableThreads)
     pm.disableMultithreading();
 
+  // Enable statistics dumping.
+  if ((*options)->passStatistics)
+    pm.enableStatistics((*options)->passStatisticsDisplayMode);
+
   // Add the IR printing instrumentation.
   (*options)->addPrinterInstrumentation(pm);
 
diff --git a/third_party/mlir/lib/Pass/PassStatistics.cpp b/third_party/mlir/lib/Pass/PassStatistics.cpp
new file mode 100644
index 00000000000..461cf882bd2
--- /dev/null
+++ b/third_party/mlir/lib/Pass/PassStatistics.cpp
@@ -0,0 +1,258 @@
+//===- PassStatistics.cpp -------------------------------------------------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#include "PassDetail.h"
+#include "mlir/Pass/PassManager.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Format.h"
+
+using namespace mlir;
+using namespace mlir::detail;
+
+constexpr llvm::StringLiteral kPassStatsDescription =
+    "... Pass statistics report ...";
+
+namespace {
+/// Information pertaining to a specific statistic.
+struct Statistic {
+  const char *name, *desc;
+  unsigned value;
+};
+} // end anonymous namespace
+
+/// Utility to print a pass entry in the statistics output.
+static void printPassEntry(raw_ostream &os, unsigned indent, StringRef pass,
+                           MutableArrayRef<Statistic> stats = llvm::None) {
+  os.indent(indent) << pass << "\n";
+  if (stats.empty())
+    return;
+
+  // Make sure to sort the statistics by name.
+  llvm::array_pod_sort(stats.begin(), stats.end(),
+                       [](const auto *lhs, const auto *rhs) {
+                         return llvm::array_pod_sort_comparator<const char *>(
+                             &lhs->name, &rhs->name);
+                       });
+
+  // Collect the largest name and value length from each of the statistics.
+  size_t largestName = 0, largestValue = 0;
+  for (auto &stat : stats) {
+    largestName = std::max(largestName, (size_t)strlen(stat.name));
+    largestValue =
+        std::max(largestValue, (size_t)llvm::utostr(stat.value).size());
+  }
+
+  // Print each of the statistics.
+  for (auto &stat : stats) {
+    os.indent(indent + 2) << llvm::format("(S) %*u %-*s - %s\n", largestValue,
+                                          stat.value, largestName, stat.name,
+                                          stat.desc);
+  }
+}
+
+/// Print the statistics results in a list form, where each pass is sorted by
+/// name.
+static void printResultsAsList(raw_ostream &os, OpPassManager &pm) {
+  llvm::StringMap<std::vector<Statistic>> mergedStats;
+  std::function<void(Pass *)> addStats = [&](Pass *pass) {
+    auto *adaptor = getAdaptorPassBase(pass);
+
+    // If this is not an adaptor, add the stats to the list if there are any.
+    if (!adaptor) {
+      auto statistics = pass->getStatistics();
+      if (statistics.empty())
+        return;
+
+      auto &passEntry = mergedStats[pass->getName()];
+      if (passEntry.empty()) {
+        for (Pass::Statistic *it : pass->getStatistics())
+          passEntry.push_back({it->getName(), it->getDesc(), it->getValue()});
+      } else {
+        for (auto &it : llvm::enumerate(pass->getStatistics()))
+          passEntry[it.index()].value += it.value()->getValue();
+      }
+      return;
+    }
+
+    // Otherwise, recursively add each of the children.
+    for (auto &mgr : adaptor->getPassManagers())
+      for (Pass &pass : mgr.getPasses())
+        addStats(&pass);
+  };
+  for (Pass &pass : pm.getPasses())
+    addStats(&pass);
+
+  // Sort the statistics by pass name and then by record name.
+  std::vector<std::pair<StringRef, std::vector<Statistic>>> passAndStatistics;
+  for (auto &passIt : mergedStats)
+    passAndStatistics.push_back({passIt.first(), std::move(passIt.second)});
+  llvm::sort(passAndStatistics, [](const auto &lhs, const auto &rhs) {
+    return lhs.first.compare(rhs.first) < 0;
+  });
+
+  // Print the timing information sequentially.
+  for (auto &statData : passAndStatistics)
+    printPassEntry(os, /*indent=*/2, statData.first, statData.second);
+}
+
+/// Print the results in pipeline mode that mirrors the internal pass manager
+/// structure.
+static void printResultsAsPipeline(raw_ostream &os, OpPassManager &pm) {
+  std::function<void(unsigned, Pass *)> printPass = [&](unsigned indent,
+                                                        Pass *pass) {
+    // Handle the case of an adaptor pass.
+    if (auto *adaptor = getAdaptorPassBase(pass)) {
+      // If this adaptor has more than one internal pipeline, print an entry for
+      // it.
+      auto mgrs = adaptor->getPassManagers();
+      if (mgrs.size() > 1) {
+        printPassEntry(os, indent, adaptor->getName());
+        indent += 2;
+      }
+
+      // Print each of the children passes.
+      for (OpPassManager &mgr : mgrs) {
+        auto name = ("'" + mgr.getOpName().getStringRef() + "' Pipeline").str();
+        printPassEntry(os, indent, name);
+        for (Pass &pass : mgr.getPasses())
+          printPass(indent + 2, &pass);
+      }
+      return;
+    }
+
+    // Otherwise, we print the statistics for this pass.
+    std::vector<Statistic> stats;
+    for (Pass::Statistic *stat : pass->getStatistics())
+      stats.push_back({stat->getName(), stat->getDesc(), stat->getValue()});
+    printPassEntry(os, indent, pass->getName(), stats);
+  };
+  for (Pass &pass : pm.getPasses())
+    printPass(/*indent=*/0, &pass);
+}
+
+void printStatistics(OpPassManager &pm, PassDisplayMode displayMode) {
+  auto os = llvm::CreateInfoOutputFile();
+
+  // Print the stats header.
+  *os << "===" << std::string(73, '-') << "===\n";
+  // Figure out how many spaces for the description name.
+  unsigned padding = (80 - kPassStatsDescription.size()) / 2;
+  os->indent(padding) << kPassStatsDescription << '\n';
+  *os << "===" << std::string(73, '-') << "===\n";
+
+  // Defer to a specialized printer for each display mode.
+  switch (displayMode) {
+  case PassDisplayMode::List:
+    printResultsAsList(*os, pm);
+    break;
+  case PassDisplayMode::Pipeline:
+    printResultsAsPipeline(*os, pm);
+    break;
+  }
+  *os << "\n";
+  os->flush();
+}
+
+//===----------------------------------------------------------------------===//
+// PassStatistics
+//===----------------------------------------------------------------------===//
+
+Pass::Statistic::Statistic(Pass *owner, const char *name,
+                           const char *description)
+    : llvm::Statistic{/*DebugType=*/"", name, description} {
+#if LLVM_ENABLE_STATS
+  // Always set the 'initialized' bit to true so that this statistic isn't
+  // placed in the static registry.
+  // TODO: This is sort of hack as `llvm::Statistic`s can't be setup to avoid
+  // automatic registartion with the global registry. We should either add
+  // support for this in LLVM, or just write our own statistics classes.
+  Initialized = true;
+#endif
+
+  // Register this statistic with the parent.
+  owner->statistics.push_back(this);
+}
+
+auto Pass::Statistic::operator=(unsigned value) -> Statistic & {
+  llvm::Statistic::operator=(value);
+  return *this;
+}
+
+//===----------------------------------------------------------------------===//
+// PassManager
+//===----------------------------------------------------------------------===//
+
+/// Merge the pass statistics of this class into 'other'.
+void OpPassManager::mergeStatisticsInto(OpPassManager &other) {
+  auto passes = getPasses(), otherPasses = other.getPasses();
+
+  for (auto passPair : llvm::zip(passes, otherPasses)) {
+    Pass &pass = std::get<0>(passPair), &otherPass = std::get<1>(passPair);
+
+    // If this is an adaptor, then recursively merge the pass managers.
+    if (auto *adaptorPass = getAdaptorPassBase(&pass)) {
+      auto *otherAdaptorPass = getAdaptorPassBase(&otherPass);
+      for (auto mgrs : llvm::zip(adaptorPass->getPassManagers(),
+                                 otherAdaptorPass->getPassManagers()))
+        std::get<0>(mgrs).mergeStatisticsInto(std::get<1>(mgrs));
+      continue;
+    }
+    // Otherwise, merge the statistics for the current pass.
+    assert(pass.statistics.size() == otherPass.statistics.size());
+    for (unsigned i = 0, e = pass.statistics.size(); i != e; ++i) {
+      assert(pass.statistics[i]->getName() ==
+             StringRef(otherPass.statistics[i]->getName()));
+      *otherPass.statistics[i] += *pass.statistics[i];
+      *pass.statistics[i] = 0;
+    }
+  }
+}
+
+/// Prepare the statistics of passes within the given pass manager for
+/// consumption(e.g. dumping).
+static void prepareStatistics(OpPassManager &pm) {
+  for (Pass &pass : pm.getPasses()) {
+    OpToOpPassAdaptorBase *adaptor = getAdaptorPassBase(&pass);
+    if (!adaptor)
+      continue;
+    MutableArrayRef<OpPassManager> nestedPms = adaptor->getPassManagers();
+
+    // If this is a parallel adaptor, merge the statistics from the async
+    // pass managers into the main nested pass managers.
+    if (auto *parallelAdaptor = dyn_cast<OpToOpPassAdaptorParallel>(&pass)) {
+      for (auto &asyncPM : parallelAdaptor->getParallelPassManagers()) {
+        for (unsigned i = 0, e = asyncPM.size(); i != e; ++i)
+          asyncPM[i].mergeStatisticsInto(nestedPms[i]);
+      }
+    }
+
+    // Prepare the statistics of each of the nested passes.
+    for (OpPassManager &nestedPM : nestedPms)
+      prepareStatistics(nestedPM);
+  }
+}
+
+/// Dump the statistics of the passes within this pass manager.
+void PassManager::dumpStatistics() {
+  prepareStatistics(*this);
+  printStatistics(*this, *passStatisticsMode);
+}
+
+/// Dump the statistics for each pass after running.
+void PassManager::enableStatistics(PassDisplayMode displayMode) {
+  passStatisticsMode = displayMode;
+}
diff --git a/third_party/mlir/lib/Pass/PassTiming.cpp b/third_party/mlir/lib/Pass/PassTiming.cpp
index 69a2cb723e5..dd193a4d9a9 100644
--- a/third_party/mlir/lib/Pass/PassTiming.cpp
+++ b/third_party/mlir/lib/Pass/PassTiming.cpp
@@ -169,7 +169,7 @@ struct Timer {
 };
 
 struct PassTiming : public PassInstrumentation {
-  PassTiming(PassTimingDisplayMode displayMode) : displayMode(displayMode) {}
+  PassTiming(PassDisplayMode displayMode) : displayMode(displayMode) {}
   ~PassTiming() override { print(); }
 
   /// Setup the instrumentation hooks.
@@ -242,7 +242,7 @@ struct PassTiming : public PassInstrumentation {
   DenseMap<uint64_t, SmallVector<Timer *, 4>> activeThreadTimers;
 
   /// The display mode to use when printing the timing results.
-  PassTimingDisplayMode displayMode;
+  PassDisplayMode displayMode;
 
   /// A mapping of pipeline timers that need to be merged into the parent
   /// collection. The timers are mapped to the parent info to merge into.
@@ -289,15 +289,8 @@ void PassTiming::startPassTimer(Pass *pass) {
   auto kind = isAdaptorPass(pass) ? TimerKind::PipelineCollection
                                   : TimerKind::PassOrAnalysis;
   Timer *timer = getTimer(pass, kind, [pass]() -> std::string {
-    if (auto *adaptor = getAdaptorPassBase(pass)) {
-      std::string name = "Pipeline Collection : [";
-      llvm::raw_string_ostream os(name);
-      interleaveComma(adaptor->getPassManagers(), os, [&](OpPassManager &pm) {
-        os << '\'' << pm.getOpName() << '\'';
-      });
-      os << ']';
-      return os.str();
-    }
+    if (auto *adaptor = getAdaptorPassBase(pass))
+      return adaptor->getName();
     return pass->getName();
   });
 
@@ -330,7 +323,7 @@ void PassTiming::runAfterPass(Pass *pass, Operation *) {
     return;
   }
 
-  // Adapator passes aren't timed directly, so we don't need to stop their
+  // Adaptor passes aren't timed directly, so we don't need to stop their
   // timers.
   if (!isAdaptorPass(pass))
     timer->stop();
@@ -345,8 +338,8 @@ void PassTiming::runAfterAnalysis(llvm::StringRef, AnalysisID *, Operation *) {
 static void printTimerHeader(llvm::raw_ostream &os, TimeRecord total) {
   os << "===" << std::string(73, '-') << "===\n";
   // Figure out how many spaces to description name.
-  unsigned Padding = (80 - kPassTimingDescription.size()) / 2;
-  os.indent(Padding) << kPassTimingDescription << '\n';
+  unsigned padding = (80 - kPassTimingDescription.size()) / 2;
+  os.indent(padding) << kPassTimingDescription << '\n';
   os << "===" << std::string(73, '-') << "===\n";
 
   // Print the total time followed by the section headers.
@@ -379,10 +372,10 @@ void PassTiming::print() {
 
   // Defer to a specialized printer for each display mode.
   switch (displayMode) {
-  case PassTimingDisplayMode::List:
+  case PassDisplayMode::List:
     printResultsAsList(*os, rootTimer.get(), totalTime);
     break;
-  case PassTimingDisplayMode::Pipeline:
+  case PassDisplayMode::Pipeline:
     printResultsAsPipeline(*os, rootTimer.get(), totalTime);
     break;
   }
@@ -472,7 +465,7 @@ void PassTiming::printResultsAsPipeline(raw_ostream &os, Timer *root,
 
 /// Add an instrumentation to time the execution of passes and the computation
 /// of analyses.
-void PassManager::enableTiming(PassTimingDisplayMode displayMode) {
+void PassManager::enableTiming(PassDisplayMode displayMode) {
   // Check if pass timing is already enabled.
   if (passTiming)
     return;
diff --git a/third_party/mlir/lib/Transforms/CMakeLists.txt b/third_party/mlir/lib/Transforms/CMakeLists.txt
index 304e0547edb..d6c5bd88f7f 100644
--- a/third_party/mlir/lib/Transforms/CMakeLists.txt
+++ b/third_party/mlir/lib/Transforms/CMakeLists.txt
@@ -13,7 +13,6 @@ add_llvm_library(MLIRTransforms
   LoopTiling.cpp
   LoopUnrollAndJam.cpp
   LoopUnroll.cpp
-  MaterializeVectors.cpp
   MemRefDataFlowOpt.cpp
   PipelineDataTransfer.cpp
   SimplifyAffineStructures.cpp
diff --git a/third_party/mlir/lib/Transforms/CSE.cpp b/third_party/mlir/lib/Transforms/CSE.cpp
index c3d30bf65a1..18f9fce5e46 100644
--- a/third_party/mlir/lib/Transforms/CSE.cpp
+++ b/third_party/mlir/lib/Transforms/CSE.cpp
@@ -47,7 +47,7 @@ struct SimpleOperationInfo : public llvm::DenseMapInfo<Operation *> {
     //   - Result Types
     //   - Operands
     return hash_combine(
-        op->getName(), op->getAttrs(),
+        op->getName(), op->getAttrList().getDictionary(),
         hash_combine_range(op->result_type_begin(), op->result_type_end()),
         hash_combine_range(op->operand_begin(), op->operand_end()));
   }
@@ -68,7 +68,7 @@ struct SimpleOperationInfo : public llvm::DenseMapInfo<Operation *> {
         lhs->getNumResults() != rhs->getNumResults())
       return false;
     // Compare attributes.
-    if (lhs->getAttrs() != rhs->getAttrs())
+    if (lhs->getAttrList() != rhs->getAttrList())
       return false;
     // Compare operands.
     if (!std::equal(lhs->operand_begin(), lhs->operand_end(),
@@ -124,6 +124,10 @@ struct CSE : public OperationPass<CSE> {
 private:
   /// Operations marked as dead and to be erased.
   std::vector<Operation *> opsToErase;
+
+  /// Statistics for CSE.
+  Statistic numCSE{this, "num-cse'd", "Number of operations CSE'd"};
+  Statistic numDCE{this, "num-dce'd", "Number of operations trivially DCE'd"};
 };
 } // end anonymous namespace
 
@@ -143,6 +147,7 @@ LogicalResult CSE::simplifyOperation(ScopedMapTy &knownValues, Operation *op) {
   // If the operation is already trivially dead just add it to the erase list.
   if (op->use_empty()) {
     opsToErase.push_back(op);
+    ++numDCE;
     return success();
   }
 
@@ -160,6 +165,8 @@ LogicalResult CSE::simplifyOperation(ScopedMapTy &knownValues, Operation *op) {
         !op->getLoc().isa<UnknownLoc>()) {
       existing->setLoc(op->getLoc());
     }
+
+    ++numCSE;
     return success();
   }
 
diff --git a/third_party/mlir/lib/Transforms/DialectConversion.cpp b/third_party/mlir/lib/Transforms/DialectConversion.cpp
index b1feea68a92..6d34db90912 100644
--- a/third_party/mlir/lib/Transforms/DialectConversion.cpp
+++ b/third_party/mlir/lib/Transforms/DialectConversion.cpp
@@ -416,7 +416,7 @@ struct ConversionPatternRewriterImpl {
   /// This class represents one requested operation replacement via 'replaceOp'.
   struct OpReplacement {
     OpReplacement() = default;
-    OpReplacement(Operation *op, ArrayRef<Value *> newValues)
+    OpReplacement(Operation *op, ValueRange newValues)
         : op(op), newValues(newValues.begin(), newValues.end()) {}
 
     Operation *op;
@@ -501,8 +501,8 @@ struct ConversionPatternRewriterImpl {
                            TypeConverter::SignatureConversion &conversion);
 
   /// PatternRewriter hook for replacing the results of an operation.
-  void replaceOp(Operation *op, ArrayRef<Value *> newValues,
-                 ArrayRef<Value *> valuesToRemoveIfDead);
+  void replaceOp(Operation *op, ValueRange newValues,
+                 ValueRange valuesToRemoveIfDead);
 
   /// Notifies that a block was split.
   void notifySplitBlock(Block *block, Block *continuation);
@@ -687,9 +687,9 @@ Block *ConversionPatternRewriterImpl::applySignatureConversion(
   return nullptr;
 }
 
-void ConversionPatternRewriterImpl::replaceOp(
-    Operation *op, ArrayRef<Value *> newValues,
-    ArrayRef<Value *> valuesToRemoveIfDead) {
+void ConversionPatternRewriterImpl::replaceOp(Operation *op,
+                                              ValueRange newValues,
+                                              ValueRange valuesToRemoveIfDead) {
   assert(newValues.size() == op->getNumResults());
 
   // Create mappings for each of the new result values.
@@ -769,9 +769,8 @@ ConversionPatternRewriter::ConversionPatternRewriter(MLIRContext *ctx,
 ConversionPatternRewriter::~ConversionPatternRewriter() {}
 
 /// PatternRewriter hook for replacing the results of an operation.
-void ConversionPatternRewriter::replaceOp(
-    Operation *op, ArrayRef<Value *> newValues,
-    ArrayRef<Value *> valuesToRemoveIfDead) {
+void ConversionPatternRewriter::replaceOp(Operation *op, ValueRange newValues,
+                                          ValueRange valuesToRemoveIfDead) {
   LLVM_DEBUG(llvm::dbgs() << "** Replacing operation : " << op->getName()
                           << "\n");
   impl->replaceOp(op, newValues, valuesToRemoveIfDead);
@@ -826,7 +825,7 @@ Block *ConversionPatternRewriter::splitBlock(Block *block,
 
 /// PatternRewriter hook for merging a block into another.
 void ConversionPatternRewriter::mergeBlocks(Block *source, Block *dest,
-                                            ArrayRef<Value *> argValues) {
+                                            ValueRange argValues) {
   // TODO(riverriddle) This requires fixing the implementation of
   // 'replaceUsesOfBlockArgument', which currently isn't undoable.
   llvm_unreachable("block merging updates are currently not supported");
diff --git a/third_party/mlir/lib/Transforms/LoopFusion.cpp b/third_party/mlir/lib/Transforms/LoopFusion.cpp
index cda35297abc..6627e73056a 100644
--- a/third_party/mlir/lib/Transforms/LoopFusion.cpp
+++ b/third_party/mlir/lib/Transforms/LoopFusion.cpp
@@ -1561,10 +1561,10 @@ public:
               !canFuseSrcWhichWritesToLiveOut(srcId, dstId, srcStoreOp, mdg))
             continue;
 
-          // Dont create a private memref if 'writesToLiveInOrOut'.
+          // Don't create a private memref if 'writesToLiveInOrOut'.
           bool createPrivateMemref = !writesToLiveInOrOut;
-          // Dont create a private memref if 'srcNode' has in edges on 'memref',
-          // or if 'dstNode' has out edges on 'memref'.
+          // Don't create a private memref if 'srcNode' has in edges on
+          // 'memref', or if 'dstNode' has out edges on 'memref'.
           if (mdg->getIncomingMemRefAccesses(srcNode->id, memref) > 0 ||
               mdg->getOutEdgeCount(dstNode->id, memref) > 0) {
             createPrivateMemref = false;
diff --git a/third_party/mlir/lib/Transforms/MaterializeVectors.cpp b/third_party/mlir/lib/Transforms/MaterializeVectors.cpp
deleted file mode 100644
index 33f5927d88e..00000000000
--- a/third_party/mlir/lib/Transforms/MaterializeVectors.cpp
+++ /dev/null
@@ -1,778 +0,0 @@
-//===- MaterializeVectors.cpp - MaterializeVectors Pass Impl --------------===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This file implements target-dependent materialization of super-vectors to
-// vectors of the proper size for the hardware.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/Dominance.h"
-#include "mlir/Analysis/LoopAnalysis.h"
-#include "mlir/Analysis/NestedMatcher.h"
-#include "mlir/Analysis/SliceAnalysis.h"
-#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/Dialect/VectorOps/Utils.h"
-#include "mlir/Dialect/VectorOps/VectorOps.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/Location.h"
-#include "mlir/IR/OperationSupport.h"
-#include "mlir/IR/Types.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/Functional.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Transforms/Passes.h"
-
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-///
-/// Implements target-dependent materialization of virtual super-vectors to
-/// vectors of the proper size for the hardware.
-///
-/// While the physical vector size is target-dependent, the pass is written in
-/// a target-independent way: the target vector size is specified as a parameter
-/// to the pass. This pass is thus a partial lowering that opens the "greybox"
-/// that is the super-vector abstraction. In particular, this pass can turn the
-/// vector.transfer_read and vector.transfer_write ops in either:
-///   1. a loop nest with either scalar and vector load/store operations; or
-///   2. a loop-nest with DmaStartOp / DmaWaitOp; or
-///   3. a pre-existing blackbox library call that can be written manually or
-///      synthesized using search and superoptimization.
-/// An important feature that either of these 3 target lowering abstractions
-/// must handle is the handling of "non-effecting" padding with the proper
-/// neutral element in order to guarantee that all "partial tiles" are actually
-/// "full tiles" in practice.
-///
-/// In particular this pass is a MLIR-MLIR rewriting and does not concern itself
-/// with target-specific instruction-selection and register allocation. These
-/// will happen downstream in LLVM.
-///
-/// In this sense, despite performing lowering to a target-dependent size, this
-/// pass is still target-agnostic.
-///
-/// Implementation details
-/// ======================
-/// The current decisions made by the super-vectorization pass guarantee that
-/// use-def chains do not escape an enclosing vectorized AffineForOp. In other
-/// words, this pass operates on a scoped program slice. Furthermore, since we
-/// do not vectorize in the presence of conditionals for now, sliced chains are
-/// guaranteed not to escape the innermost scope, which has to be either the top
-/// Function scope or the innermost loop scope, by construction. As a
-/// consequence, the implementation just starts from vector.transfer_write
-/// operations and builds the slice scoped the innermost loop enclosing the
-/// current vector.transfer_write. These assumptions and the implementation
-/// details are subject to revision in the future.
-///
-/// Example
-/// ========
-/// In the following, the single vector.transfer_write op operates on a
-/// vector<4x4x4xf32>. Let's assume the HW supports vector<4x4xf32>.
-/// Materialization is achieved by instantiating each occurrence of the leading
-/// dimension of vector<4x4x4xf32> into a vector<4x4xf32>.
-/// The program transformation that implements this instantiation is a
-/// multi-loop unroll-and-jam (it can be partial or full depending on the ratio
-/// of super-vector shape to HW-vector shape).
-///
-/// As a simple case, the following:
-///
-/// ```mlir
-///    mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
-///      %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32>
-///      %f1 = constant dense<vector<4x4x4xf32>, 1.000000e+00> :
-///      vector<4x4x4xf32> affine.for %i0 = 0 to %M step 4 {
-///        affine.for %i1 = 0 to %N step 4 {
-///          affine.for %i2 = 0 to %O {
-///            affine.for %i3 = 0 to %P step 4 {
-///              vector.transfer_write %f1, %A[%i0, %i1, %i2, %i3]
-///                {permutation_map: (d0, d1, d2, d3) -> (d3, d1, d0)} :
-///                 vector<4x4x4xf32>, memref<?x?x?x?xf32>
-///      }}}}
-///      return
-///    }
-/// ```
-///
-/// is instantiated by unroll-and-jam (just unroll in this case) into:
-///
-/// ```mlir
-///    mlfunc @materialize(%M : index, %N : index, %O : index, %P : index) {
-///      %A = alloc (%M, %N, %O, %P) : memref<?x?x?x?xf32, 0>
-///      %f1 = constant dense<vector<4x4xf32>, 1.000000e+00> : vector<4x4x4xf32>
-///       affine.for %i0 = 0 to %arg0 step 4 {
-///         affine.for %i1 = 0 to %arg1 step 4 {
-///           affine.for %i2 = 0 to %arg2 {
-///             affine.for %i3 = 0 to %arg3 step 4 {
-///               vector.transfer_write f1, %0[%i0, %i1, %i2, %i3]
-///                 {permutation_map: (d0, d1, d2, d3) -> (d1, d0)} :
-///                 vector<4x4xf32>, memref<?x?x?x?xf32>
-///               %i3p1 = affine.apply (d0) -> (d0 + 1)(%i3)
-///               vector.transfer_write {{.*}}, %0[%i0, %i1, %i2, %i3p1]
-///                 {permutation_map: (d0, d1, d2, d3) -> (d1, d0)} :
-///                 vector<4x4xf32>, memref<?x?x?x?xf32>
-///               %i3p2 = affine.apply (d0) -> (d0 + 2)(%i3)
-///               vector.transfer_write {{.*}}, %0[%i0, %i1, %i2, %i3p2]
-///                 {permutation_map: (d0, d1, d2, d3) -> (d1, d0)} :
-///                 vector<4x4xf32>, memref<?x?x?x?xf32>
-///               %i3p3 = affine.apply (d0) -> (d0 + 3)(%i3)
-///               vector.transfer_write {{.*}}, %0[%i0, %i1, %i2, %i3p3]
-///                 {permutation_map: (d0, d1, d2, d3) -> (d1, d0)} :
-///                 vector<4x4xf32>, memref<?x?x?x?xf32>
-///      }}}}
-///      return
-///    }
-/// ```
-
-using llvm::dbgs;
-using llvm::SetVector;
-
-using namespace mlir;
-using vector::TransferReadOp;
-using vector::TransferWriteOp;
-
-using functional::makePtrDynCaster;
-using functional::map;
-
-static llvm::cl::list<int>
-    clVectorSize("vector-size",
-                 llvm::cl::desc("Specify the HW vector size for vectorization"),
-                 llvm::cl::ZeroOrMore);
-
-#define DEBUG_TYPE "materialize-vect"
-
-namespace {
-struct MaterializationState {
-  /// In practice, the determination of the HW-specific vector type to use when
-  /// lowering a super-vector type must be based on the elemental type. The
-  /// elemental type must be retrieved from the super-vector type. In the future
-  /// information about hardware vector type for a particular elemental type
-  /// will be part of the contract between MLIR and the backend.
-  ///
-  /// For example, 8xf32 has the same size as 16xf16 but the targeted HW itself
-  /// may exhibit the following property:
-  /// 1. have a special unit for a 128xf16 datapath;
-  /// 2. no F16 FPU support on the regular 8xf32/16xf16 vector datapath.
-  ///
-  /// For now, we just assume hwVectorSize has the proper information regardless
-  /// of the type and we assert everything is f32.
-  /// TODO(ntv): relax the assumptions on admissible element type once a
-  /// contract exists.
-  MaterializationState(SmallVector<int64_t, 8> sizes) : hwVectorSize(sizes) {}
-
-  SmallVector<int64_t, 8> hwVectorSize;
-  VectorType superVectorType;
-  VectorType hwVectorType;
-  SmallVector<int64_t, 8> hwVectorInstance;
-  DenseMap<Value *, Value *> *substitutionsMap;
-};
-
-/// Base state for the vector materialization pass.
-/// Command line arguments are preempted by non-empty pass arguments.
-struct MaterializeVectorsPass : public FunctionPass<MaterializeVectorsPass> {
-  MaterializeVectorsPass()
-      : hwVectorSize(clVectorSize.begin(), clVectorSize.end()) {}
-  MaterializeVectorsPass(ArrayRef<int64_t> hwVectorSize)
-      : MaterializeVectorsPass() {
-    if (!hwVectorSize.empty())
-      this->hwVectorSize.assign(hwVectorSize.begin(), hwVectorSize.end());
-  }
-
-  SmallVector<int64_t, 8> hwVectorSize;
-  void runOnFunction() override;
-};
-
-} // end anonymous namespace
-
-/// Given a shape with sizes greater than 0 along all dimensions,
-/// returns the distance, in number of elements, between a slice in a dimension
-/// and the next slice in the same dimension.
-///   e.g. shape[3, 4, 5] -> strides[20, 5, 1]
-static SmallVector<int64_t, 8> makeStrides(ArrayRef<int64_t> shape) {
-  SmallVector<int64_t, 8> tmp;
-  tmp.reserve(shape.size());
-  int64_t running = 1;
-  for (auto rit = shape.rbegin(), reit = shape.rend(); rit != reit; ++rit) {
-    assert(*rit > 0 && "size must be greater than 0 along all dimensions of "
-                       "shape");
-    tmp.push_back(running);
-    running *= *rit;
-  }
-  return SmallVector<int64_t, 8>(tmp.rbegin(), tmp.rend());
-}
-
-/// Given a shape with sizes greater than 0 along all dimensions, returns the
-/// delinearized components of linearIndex along shape.
-static SmallVector<int64_t, 8> delinearize(int64_t linearIndex,
-                                           ArrayRef<int64_t> shape) {
-  SmallVector<int64_t, 8> res;
-  res.reserve(shape.size());
-  auto strides = makeStrides(shape);
-  for (unsigned idx = 0; idx < strides.size(); ++idx) {
-    assert(strides[idx] > 0);
-    auto val = linearIndex / strides[idx];
-    res.push_back(val);
-    assert(val < shape[idx] && "delinearization is out of bounds");
-    linearIndex %= strides[idx];
-  }
-  // Sanity check.
-  assert(linearIndex == 0 && "linear index constructed from shape must "
-                             "have 0 remainder after delinearization");
-  return res;
-}
-
-static Operation *instantiate(OpBuilder b, Operation *opInst,
-                              VectorType hwVectorType,
-                              DenseMap<Value *, Value *> *substitutionsMap);
-
-/// Not all Values belong to a program slice scoped within the immediately
-/// enclosing loop.
-/// One simple example is constants defined outside the innermost loop scope.
-/// For such cases the substitutionsMap has no entry and we allow an additional
-/// insertion.
-/// For now, this is limited to ConstantOp because we do not vectorize loop
-/// indices and will need to be extended in the future.
-///
-/// If substitution fails, returns nullptr.
-static Value *substitute(Value *v, VectorType hwVectorType,
-                         DenseMap<Value *, Value *> *substitutionsMap) {
-  auto it = substitutionsMap->find(v);
-  if (it == substitutionsMap->end()) {
-    auto *opInst = v->getDefiningOp();
-    if (isa<ConstantOp>(opInst)) {
-      OpBuilder b(opInst);
-      auto *op = instantiate(b, opInst, hwVectorType, substitutionsMap);
-      auto res = substitutionsMap->insert(std::make_pair(v, op->getResult(0)));
-      assert(res.second && "Insertion failed");
-      return res.first->second;
-    }
-    v->getDefiningOp()->emitError("missing substitution");
-    return nullptr;
-  }
-  return it->second;
-}
-
-/// Returns a list of single result AffineApplyOps that reindex the
-/// `memRefIndices` by the multi-dimensional `hwVectorInstance`. This is used by
-/// the function that materializes a vector.transfer operation to use hardware
-/// vector types instead of super-vector types.
-///
-/// The general problem this function solves is as follows:
-/// Assume a vector.transfer operation at the super-vector granularity that has
-/// `l` enclosing loops (AffineForOp). Assume the vector transfer operation
-/// operates on a MemRef of rank `r`, a super-vector of rank `s` and a hardware
-/// vector of rank `h`. For the purpose of illustration assume l==4, r==3, s==2,
-/// h==1 and that the super-vector is vector<3x32xf32> and the hardware vector
-/// is vector<8xf32>. Assume the following MLIR snippet after
-/// super-vectorization has been applied:
-///
-/// ```mlir
-/// affine.for %i0 = 0 to %M {
-///   affine.for %i1 = 0 to %N step 3 {
-///     affine.for %i2 = 0 to %O {
-///       affine.for %i3 = 0 to %P step 32 {
-///         %r = vector.transfer_read(%A, map0(%i..), map1(%i..), map2(%i..)) :
-///              vector<3x32xf32>, memref<?x?x?xf32>
-///         ...
-/// }}}}
-/// ```
-///
-/// where map denotes an AffineMap operating on enclosing loops with properties
-/// compatible for vectorization (i.e. some contiguity left unspecified here).
-/// Note that the vectorized loops are %i1 and %i3.
-/// This function translates the vector.transfer_read operation to multiple
-/// instances of vector.transfer_read that operate on vector<8x32>.
-///
-/// Without loss of generality, we assume hwVectorInstance is: {2, 1}.
-/// The only constraints on hwVectorInstance is they belong to:
-///   [0, 2] x [0, 3], which is the span of ratio of super-vector shape to
-/// hardware vector shape in our example.
-///
-/// This function instantiates the iteration <2, 1> of vector.transfer_read
-/// into the set of operations in pseudo-MLIR:
-///
-/// ```mlir
-///   #map2 = (d0, d1, d2, d3) -> (d0, d1 + 2, d2, d3 + 1 * 8)
-///   #map3 = #map o #map2 // where o denotes composition
-///   aff0 = affine.apply #map3.0(%i..)
-///   aff1 = affine.apply #map3.1(%i..)
-///   aff2 = affine.apply #map3.2(%i..)
-///   %r = vector.transfer_read(%A, %aff0, %aff1, %aff2):
-//         vector<3x32xf32>, memref<?x?x?xf32>
-/// ```
-///
-/// Practical considerations
-/// ========================
-/// For now, `map` is assumed to be the identity map and the indices are
-/// specified just as vector.transfer_read%A[%i0, %i1, %i2, %i3]. This will be
-/// extended in the future once we have a proper Op for vector transfers.
-/// Additionally, the example above is specified in pseudo-MLIR form; once we
-/// have proper support for generic maps we can generate the code and show
-/// actual MLIR.
-///
-/// TODO(ntv): support a concrete AffineMap and compose with it.
-/// TODO(ntv): these implementation details should be captured in a
-/// vectorization trait at the op level directly.
-static SmallVector<mlir::Value *, 8>
-reindexAffineIndices(OpBuilder b, VectorType hwVectorType,
-                     ArrayRef<int64_t> hwVectorInstance,
-                     ArrayRef<Value *> memrefIndices) {
-  auto vectorShape = hwVectorType.getShape();
-  assert(hwVectorInstance.size() >= vectorShape.size());
-
-  unsigned numIndices = memrefIndices.size();
-  auto numMemRefIndices = numIndices - hwVectorInstance.size();
-  auto numVectorIndices = hwVectorInstance.size() - vectorShape.size();
-
-  SmallVector<AffineExpr, 8> affineExprs;
-  // TODO(ntv): support a concrete map and composition.
-  unsigned i = 0;
-  // The first numMemRefIndices correspond to AffineForOp that have not been
-  // vectorized, the transformation is the identity on those.
-  for (i = 0; i < numMemRefIndices; ++i) {
-    auto d_i = b.getAffineDimExpr(i);
-    affineExprs.push_back(d_i);
-  }
-  // The next numVectorIndices correspond to super-vector dimensions that
-  // do not have a hardware vector dimension counterpart. For those we only
-  // need to increment the index by the corresponding hwVectorInstance.
-  for (i = numMemRefIndices; i < numMemRefIndices + numVectorIndices; ++i) {
-    auto d_i = b.getAffineDimExpr(i);
-    auto offset = hwVectorInstance[i - numMemRefIndices];
-    affineExprs.push_back(d_i + offset);
-  }
-  // The remaining indices correspond to super-vector dimensions that
-  // have a hardware vector dimension counterpart. For those we to increment the
-  // index by "hwVectorInstance" multiples of the corresponding hardware
-  // vector size.
-  for (; i < numIndices; ++i) {
-    auto d_i = b.getAffineDimExpr(i);
-    auto offset = hwVectorInstance[i - numMemRefIndices];
-    auto stride = vectorShape[i - numMemRefIndices - numVectorIndices];
-    affineExprs.push_back(d_i + offset * stride);
-  }
-
-  // Create a bunch of single result AffineApplyOp.
-  SmallVector<mlir::Value *, 8> res;
-  res.reserve(affineExprs.size());
-  for (auto expr : affineExprs) {
-    auto map = AffineMap::get(numIndices, 0, expr);
-    res.push_back(makeComposedAffineApply(b, b.getInsertionPoint()->getLoc(),
-                                          map, memrefIndices));
-  }
-  return res;
-}
-
-/// Returns attributes with the following substitutions applied:
-///   - constant splat is replaced by constant splat of `hwVectorType`.
-/// TODO(ntv): add more substitutions on a per-need basis.
-static SmallVector<NamedAttribute, 1>
-materializeAttributes(Operation *opInst, VectorType hwVectorType) {
-  SmallVector<NamedAttribute, 1> res;
-  for (auto a : opInst->getAttrs()) {
-    if (auto splat = a.second.dyn_cast<SplatElementsAttr>()) {
-      auto attr = SplatElementsAttr::get(hwVectorType, splat.getSplatValue());
-      res.push_back(NamedAttribute(a.first, attr));
-    } else {
-      res.push_back(a);
-    }
-  }
-  return res;
-}
-
-/// Creates an instantiated version of `opInst`.
-/// Ops other than VectorTransferReadOp/VectorTransferWriteOp require no
-/// affine reindexing. Just substitute their Value operands and be done. For
-/// this case the actual instance is irrelevant. Just use the values in
-/// substitutionsMap.
-///
-/// If the underlying substitution fails, this fails too and returns nullptr.
-static Operation *instantiate(OpBuilder b, Operation *opInst,
-                              VectorType hwVectorType,
-                              DenseMap<Value *, Value *> *substitutionsMap) {
-  assert(!isa<TransferReadOp>(opInst) &&
-         "Should call the function specialized for VectorTransferReadOp");
-  assert(!isa<TransferWriteOp>(opInst) &&
-         "Should call the function specialized for VectorTransferWriteOp");
-  if (opInst->getNumRegions() != 0)
-    return nullptr;
-
-  bool fail = false;
-  auto operands = map(
-      [hwVectorType, substitutionsMap, &fail](Value *v) -> Value * {
-        auto *res =
-            fail ? nullptr : substitute(v, hwVectorType, substitutionsMap);
-        fail |= !res;
-        return res;
-      },
-      opInst->getOperands());
-  if (fail)
-    return nullptr;
-
-  auto attrs = materializeAttributes(opInst, hwVectorType);
-
-  OperationState state(opInst->getLoc(), opInst->getName().getStringRef(),
-                       operands, {hwVectorType}, attrs);
-  return b.createOperation(state);
-}
-
-/// Computes the permutationMap required for a VectorTransferOp from the memref
-/// to the `hwVectorType`.
-/// This is achieved by returning the projection of the permutationMap along the
-/// dimensions of the super-vector type that remain in the hwVectorType.
-/// In particular, if a dimension is fully instantiated (i.e. unrolled) then it
-/// is projected out in the final result.
-template <typename VectorTransferOpTy>
-static AffineMap projectedPermutationMap(VectorTransferOpTy transfer,
-                                         VectorType hwVectorType) {
-  static_assert(std::is_same<VectorTransferOpTy, TransferReadOp>::value ||
-                    std::is_same<VectorTransferOpTy, TransferWriteOp>::value,
-                "Must be called on a VectorTransferOp");
-  auto superVectorType = transfer.getVectorType();
-  auto optionalRatio = shapeRatio(superVectorType, hwVectorType);
-  assert(optionalRatio &&
-         (optionalRatio->size() == superVectorType.getShape().size()) &&
-         "Shape and ratio not of the same size");
-  unsigned dim = 0;
-  SmallVector<AffineExpr, 4> keep;
-  MLIRContext *context = transfer.getContext();
-  functional::zipApply(
-      [&dim, &keep, context](int64_t shape, int64_t ratio) {
-        assert(shape >= ratio && "shape dim must be greater than ratio dim");
-        if (shape != ratio) {
-          // HW vector is not full instantiated along this dim, keep it.
-          keep.push_back(getAffineDimExpr(dim, context));
-        }
-        ++dim;
-      },
-      superVectorType.getShape(), *optionalRatio);
-  auto permutationMap = transfer.permutation_map();
-  LLVM_DEBUG(permutationMap.print(dbgs() << "\npermutationMap: "));
-  if (keep.empty()) {
-    return permutationMap;
-  }
-  auto projectionMap = AffineMap::get(optionalRatio->size(), 0, keep);
-  LLVM_DEBUG(projectionMap.print(dbgs() << "\nprojectionMap: "));
-  return simplifyAffineMap(projectionMap.compose(permutationMap));
-}
-
-/// Creates an instantiated version of `read` for the instance of
-/// `hwVectorInstance` when lowering from a super-vector type to
-/// `hwVectorType`. `hwVectorInstance` represents one particular instance of
-/// `hwVectorType` int the covering of the super-vector type. For a more
-/// detailed description of the problem, see the description of
-/// reindexAffineIndices.
-static Operation *instantiate(OpBuilder b, TransferReadOp read,
-                              VectorType hwVectorType,
-                              ArrayRef<int64_t> hwVectorInstance,
-                              DenseMap<Value *, Value *> *substitutionsMap) {
-  SmallVector<Value *, 8> indices =
-      map(makePtrDynCaster<Value>(), read.indices());
-  auto affineIndices =
-      reindexAffineIndices(b, hwVectorType, hwVectorInstance, indices);
-  auto map = projectedPermutationMap(read, hwVectorType);
-  if (!map) {
-    return nullptr;
-  }
-  auto cloned = b.create<TransferReadOp>(
-      read.getLoc(), hwVectorType, read.memref(), affineIndices,
-      AffineMapAttr::get(map), read.padding());
-  return cloned.getOperation();
-}
-
-/// Creates an instantiated version of `write` for the instance of
-/// `hwVectorInstance` when lowering from a super-vector type to
-/// `hwVectorType`. `hwVectorInstance` represents one particular instance of
-/// `hwVectorType` int the covering of th3e super-vector type. For a more
-/// detailed description of the problem, see the description of
-/// reindexAffineIndices.
-static Operation *instantiate(OpBuilder b, TransferWriteOp write,
-                              VectorType hwVectorType,
-                              ArrayRef<int64_t> hwVectorInstance,
-                              DenseMap<Value *, Value *> *substitutionsMap) {
-  SmallVector<Value *, 8> indices =
-      map(makePtrDynCaster<Value>(), write.indices());
-  auto affineIndices =
-      reindexAffineIndices(b, hwVectorType, hwVectorInstance, indices);
-  auto cloned = b.create<TransferWriteOp>(
-      write.getLoc(),
-      substitute(write.vector(), hwVectorType, substitutionsMap),
-      write.memref(), affineIndices,
-      AffineMapAttr::get(projectedPermutationMap(write, hwVectorType)));
-  return cloned.getOperation();
-}
-
-/// Returns `true` if op instance is properly cloned and inserted, false
-/// otherwise.
-/// The multi-dimensional `hwVectorInstance` belongs to the shapeRatio of
-/// super-vector type to hw vector type.
-/// A cloned instance of `op` is formed as follows:
-///   1. vector.transfer_read: the return `superVectorType` is replaced by
-///      `hwVectorType`. Additionally, affine indices are reindexed with
-///      `reindexAffineIndices` using `hwVectorInstance` and vector type
-///      information;
-///   2. vector.transfer_write: the `valueToStore` type is simply substituted.
-///      Since we operate on a topologically sorted slice, a substitution must
-///      have been registered for non-constant ops. Additionally, affine indices
-///      are reindexed in the same way as for vector.transfer_read;
-///   3. constant ops are splats of the super-vector type by construction.
-///      They are cloned to a splat on the hw vector type with the same value;
-///   4. remaining ops are cloned to version of the op that returns a hw vector
-///      type, all operands are substituted according to `substitutions`. Thanks
-///      to the topological order of a slice, the substitution is always
-///      possible.
-///
-/// Returns true on failure.
-static bool instantiateMaterialization(Operation *op,
-                                       MaterializationState *state) {
-  LLVM_DEBUG(dbgs() << "\ninstantiate: " << *op);
-
-  // Create a builder here for unroll-and-jam effects.
-  OpBuilder b(op);
-  // AffineApplyOp are ignored: instantiating the proper vector op will take
-  // care of AffineApplyOps by composing them properly.
-  if (isa<AffineApplyOp>(op)) {
-    return false;
-  }
-  if (op->getNumRegions() != 0)
-    return op->emitError("NYI path Op with region"), true;
-
-  if (auto write = dyn_cast<TransferWriteOp>(op)) {
-    auto *clone = instantiate(b, write, state->hwVectorType,
-                              state->hwVectorInstance, state->substitutionsMap);
-    return clone == nullptr;
-  }
-  if (auto read = dyn_cast<TransferReadOp>(op)) {
-    auto *clone = instantiate(b, read, state->hwVectorType,
-                              state->hwVectorInstance, state->substitutionsMap);
-    if (!clone) {
-      return true;
-    }
-    state->substitutionsMap->insert(
-        std::make_pair(read.getResult(), clone->getResult(0)));
-    return false;
-  }
-  // The only op with 0 results reaching this point must, by construction, be
-  // VectorTransferWriteOps and have been caught above. Ops with >= 2 results
-  // are not yet supported. So just support 1 result.
-  if (op->getNumResults() != 1) {
-    return op->emitError("NYI: ops with != 1 results"), true;
-  }
-  if (op->getResult(0)->getType() != state->superVectorType) {
-    return op->emitError("op does not return a supervector."), true;
-  }
-  auto *clone =
-      instantiate(b, op, state->hwVectorType, state->substitutionsMap);
-  if (!clone) {
-    return true;
-  }
-  state->substitutionsMap->insert(
-      std::make_pair(op->getResult(0), clone->getResult(0)));
-  return false;
-}
-
-/// Takes a slice and rewrites the operations in it so that occurrences
-/// of `superVectorType` are replaced by `hwVectorType`.
-///
-/// Implementation
-/// ==============
-///   1. computes the shape ratio of super-vector to HW vector shapes. This
-///      gives for each op in the slice, how many instantiations are required
-///      in each dimension;
-///   2. performs the concrete materialization. Note that in a first
-///      implementation we use full unrolling because it pragmatically removes
-///      the need to explicitly materialize an AllocOp. Thanks to the properties
-///      of super-vectors, this unrolling is always possible and simple:
-///      vectorizing to a super-vector abstraction already achieved the
-///      equivalent of loop strip-mining + loop sinking and encoded this in the
-///      vector type.
-///
-/// Returns true on failure.
-///
-/// TODO(ntv): materialized allocs.
-/// TODO(ntv): full loops + materialized allocs.
-/// TODO(ntv): partial unrolling + materialized allocs.
-static bool emitSlice(MaterializationState *state,
-                      SetVector<Operation *> *slice) {
-  auto ratio = shapeRatio(state->superVectorType, state->hwVectorType);
-  assert(ratio.hasValue() &&
-         "ratio of super-vector to HW-vector shape is not integral");
-  // The number of integer points in a hyperrectangular region is:
-  // shape[0] * strides[0].
-  auto numValueToUnroll = (*ratio)[0] * makeStrides(*ratio)[0];
-  // Full unrolling to hardware vectors in a first approximation.
-  for (unsigned idx = 0; idx < numValueToUnroll; ++idx) {
-    // Fresh RAII instanceIndices and substitutionsMap.
-    MaterializationState scopedState = *state;
-    scopedState.hwVectorInstance = delinearize(idx, *ratio);
-    DenseMap<Value *, Value *> substitutionMap;
-    scopedState.substitutionsMap = &substitutionMap;
-    // slice are topologically sorted, we can just clone them in order.
-    for (auto *op : *slice) {
-      auto fail = instantiateMaterialization(op, &scopedState);
-      if (fail) {
-        op->emitError("unhandled super-vector materialization failure");
-        return true;
-      }
-    }
-  }
-
-  LLVM_DEBUG(dbgs() << "\nFunction is now\n");
-  LLVM_DEBUG((*slice)[0]->getParentOfType<FuncOp>().print(dbgs()));
-
-  // slice are topologically sorted, we can just erase them in reverse
-  // order. Reverse iterator does not just work simply with an operator*
-  // dereference.
-  for (int idx = slice->size() - 1; idx >= 0; --idx) {
-    LLVM_DEBUG(dbgs() << "\nErase: ");
-    LLVM_DEBUG((*slice)[idx]->print(dbgs()));
-    (*slice)[idx]->erase();
-  }
-  return false;
-}
-
-/// Materializes super-vector types into concrete hw vector types as follows:
-///   1. start from super-vector terminators (current vector.transfer_write
-///      ops);
-///   2. collect all the operations that can be reached by transitive use-defs
-///      chains;
-///   3. get the superVectorType for this particular terminator and the
-///      corresponding hardware vector type (for now limited to F32)
-///      TODO(ntv): be more general than F32.
-///   4. emit the transitive useDef set to operate on the finer-grain vector
-///      types.
-///
-/// Notes
-/// =====
-/// The `slice` is sorted in topological order by construction.
-/// Additionally, this set is limited to operations in the same lexical scope
-/// because we currently disallow vectorization of defs that come from another
-/// scope.
-/// TODO(ntv): please document return value.
-static bool materialize(FuncOp f, const SetVector<Operation *> &terminators,
-                        MaterializationState *state) {
-  DenseSet<Operation *> seen;
-  DominanceInfo domInfo(f);
-  for (auto *term : terminators) {
-    // Short-circuit test, a given terminator may have been reached by some
-    // other previous transitive use-def chains.
-    if (seen.count(term) > 0) {
-      continue;
-    }
-
-    auto terminator = cast<TransferWriteOp>(term);
-    LLVM_DEBUG(dbgs() << "\nFrom terminator:" << *term);
-
-    // Get the transitive use-defs starting from terminator, limited to the
-    // current enclosing scope of the terminator. See the top of the function
-    // Note for the justification of this restriction.
-    // TODO(ntv): relax scoping constraints.
-    auto *enclosingScope = term->getParentOp();
-    auto keepIfInSameScope = [enclosingScope, &domInfo](Operation *op) {
-      assert(op && "NULL op");
-      if (!enclosingScope) {
-        // by construction, everyone is always under the top scope (null scope).
-        return true;
-      }
-      return domInfo.properlyDominates(enclosingScope, op);
-    };
-    SetVector<Operation *> slice =
-        getSlice(term, keepIfInSameScope, keepIfInSameScope);
-    assert(!slice.empty());
-
-    // Sanity checks: transitive slice must be completely disjoint from
-    // what we have seen so far.
-    LLVM_DEBUG(dbgs() << "\nTransitive use-defs:");
-    for (auto *ud : slice) {
-      LLVM_DEBUG(dbgs() << "\nud:" << *ud);
-      assert(seen.count(ud) == 0 &&
-             "Transitive use-defs not disjoint from already seen");
-      seen.insert(ud);
-    }
-
-    // Emit the current slice.
-    // Set scoped super-vector and corresponding hw vector types.
-    state->superVectorType = terminator.getVectorType();
-    assert((state->superVectorType.getElementType() ==
-            FloatType::getF32(term->getContext())) &&
-           "Only f32 supported for now");
-    state->hwVectorType = VectorType::get(
-        state->hwVectorSize, state->superVectorType.getElementType());
-    auto fail = emitSlice(state, &slice);
-    if (fail) {
-      return true;
-    }
-    LLVM_DEBUG(dbgs() << "\nFunction is now\n");
-    LLVM_DEBUG(f.print(dbgs()));
-  }
-  return false;
-}
-
-void MaterializeVectorsPass::runOnFunction() {
-  // Thread-safe RAII local context, BumpPtrAllocator freed on exit.
-  NestedPatternContext mlContext;
-
-  // TODO(ntv): Check to see if this supports arbitrary top-level code.
-  FuncOp f = getFunction();
-  if (f.getBlocks().size() != 1)
-    return;
-
-  using matcher::Op;
-  LLVM_DEBUG(dbgs() << "\nMaterializeVectors on Function\n");
-  LLVM_DEBUG(f.print(dbgs()));
-
-  MaterializationState state(hwVectorSize);
-  // Get the hardware vector type.
-  // TODO(ntv): get elemental type from super-vector type rather than force f32.
-  auto subVectorType =
-      VectorType::get(hwVectorSize, FloatType::getF32(&getContext()));
-
-  // Capture terminators; i.e. vector.transfer_write ops involving a strict
-  // super-vector of subVectorType.
-  auto filter = [subVectorType](Operation &op) {
-    if (!isa<TransferWriteOp>(op)) {
-      return false;
-    }
-    return matcher::operatesOnSuperVectorsOf(op, subVectorType);
-  };
-  auto pat = Op(filter);
-  SmallVector<NestedMatch, 8> matches;
-  pat.match(f, &matches);
-  SetVector<Operation *> terminators;
-  for (auto m : matches) {
-    terminators.insert(m.getMatchedOperation());
-  }
-
-  if (materialize(f, terminators, &state))
-    signalPassFailure();
-}
-
-std::unique_ptr<OpPassBase<FuncOp>>
-mlir::createMaterializeVectorsPass(llvm::ArrayRef<int64_t> vectorSize) {
-  return std::make_unique<MaterializeVectorsPass>(vectorSize);
-}
-
-static PassRegistration<MaterializeVectorsPass>
-    pass("affine-materialize-vectors",
-         "Materializes super-vectors to vectors of the "
-         "proper size for the hardware");
-
-#undef DEBUG_TYPE
diff --git a/third_party/mlir/lib/Transforms/Utils/LoopUtils.cpp b/third_party/mlir/lib/Transforms/Utils/LoopUtils.cpp
index 0ee1220b720..f718d9a5637 100644
--- a/third_party/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/third_party/mlir/lib/Transforms/Utils/LoopUtils.cpp
@@ -73,9 +73,8 @@ void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
   }
 
   unsigned step = forOp.getStep();
-
-  SmallVector<Value *, 4> lbOperands(forOp.getLowerBoundOperands());
-  auto lb = b.create<AffineApplyOp>(forOp.getLoc(), lbMap, lbOperands);
+  auto lb = b.create<AffineApplyOp>(forOp.getLoc(), lbMap,
+                                    forOp.getLowerBoundOperands());
 
   // For each upper bound expr, get the range.
   // Eg: affine.for %i = lb to min (ub1, ub2),
@@ -979,7 +978,7 @@ TileLoops mlir::extractFixedOuterLoops(loop::ForOp rootForOp,
 static void
 replaceAllUsesExcept(Value *orig, Value *replacement,
                      const SmallPtrSetImpl<Operation *> &exceptions) {
-  for (auto &use : orig->getUses()) {
+  for (auto &use : llvm::make_early_inc_range(orig->getUses())) {
     if (exceptions.count(use.getOwner()) == 0)
       use.set(replacement);
   }
diff --git a/third_party/mlir/lib/Transforms/Vectorize.cpp b/third_party/mlir/lib/Transforms/Vectorize.cpp
index c1e0a9c0e13..036e53435ae 100644
--- a/third_party/mlir/lib/Transforms/Vectorize.cpp
+++ b/third_party/mlir/lib/Transforms/Vectorize.cpp
@@ -306,10 +306,10 @@ using namespace mlir;
 /// terminal processing out of the use-def chains starting from loads. In the
 /// following snippet, there is simply no load::
 /// ```mlir
-/// mlfunc @fill(%A : memref<128xf32>) -> () {
+/// func @fill(%A : memref<128xf32>) -> () {
 ///   %f1 = constant 1.0 : f32
 ///   affine.for %i0 = 0 to 32 {
-///     store %f1, %A[%i0] : memref<128xf32, 0>
+///     affine.store %f1, %A[%i0] : memref<128xf32, 0>
 ///   }
 ///   return
 /// }
@@ -322,7 +322,7 @@ using namespace mlir;
 /// vectorize by a factor 128, we want to transform the following input:
 /// ```mlir
 ///   affine.for %i = %M to %N {
-///     %a = load A[%i] : memref<?xf32>
+///     %a = affine.load %A[%i] : memref<?xf32>
 ///   }
 /// ```
 ///
@@ -332,7 +332,7 @@ using namespace mlir;
 /// ```mlir
 ///   affine.for %i = floor(%M, 128) to ceil(%N, 128) {
 ///     affine.for %ii = max(%M, 128 * %i) to min(%N, 128*%i + 127) {
-///       %a = load A[%ii] : memref<?xf32>
+///       %a = affine.load %A[%ii] : memref<?xf32>
 ///     }
 ///   }
 /// ```
@@ -341,7 +341,7 @@ using namespace mlir;
 /// scheduling, so we want to generate a pattern that resembles:
 /// ```mlir
 ///   affine.for %i = ? to ? step ? {
-///     %v_a = vector.transfer_read A[%i] : memref<?xf32>, vector<128xf32>
+///     %v_a = vector.transfer_read %A[%i] : memref<?xf32>, vector<128xf32>
 ///   }
 /// ```
 ///
@@ -361,7 +361,7 @@ using namespace mlir;
 /// abstraction of size 128 returns code similar to:
 /// ```mlir
 ///   affine.for %i = %M to %N step 128 {
-///     %v_a = vector.transfer_read A[%i] : memref<?xf32>, vector<128xf32>
+///     %v_a = vector.transfer_read %A[%i] : memref<?xf32>, vector<128xf32>
 ///   }
 /// ```
 ///
@@ -382,7 +382,7 @@ using namespace mlir;
 /// =========
 /// Consider the following Function:
 /// ```mlir
-/// mlfunc @vector_add_2d(%M : index, %N : index) -> f32 {
+/// func @vector_add_2d(%M : index, %N : index) -> f32 {
 ///   %A = alloc (%M, %N) : memref<?x?xf32, 0>
 ///   %B = alloc (%M, %N) : memref<?x?xf32, 0>
 ///   %C = alloc (%M, %N) : memref<?x?xf32, 0>
@@ -391,19 +391,19 @@ using namespace mlir;
 ///   affine.for %i0 = 0 to %M {
 ///     affine.for %i1 = 0 to %N {
 ///       // non-scoped %f1
-///       store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
+///       affine.store %f1, %A[%i0, %i1] : memref<?x?xf32, 0>
 ///     }
 ///   }
 ///   affine.for %i2 = 0 to %M {
 ///     affine.for %i3 = 0 to %N {
 ///       // non-scoped %f2
-///       store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
+///       affine.store %f2, %B[%i2, %i3] : memref<?x?xf32, 0>
 ///     }
 ///   }
 ///   affine.for %i4 = 0 to %M {
 ///     affine.for %i5 = 0 to %N {
-///       %a5 = load %A[%i4, %i5] : memref<?x?xf32, 0>
-///       %b5 = load %B[%i4, %i5] : memref<?x?xf32, 0>
+///       %a5 = affine.load %A[%i4, %i5] : memref<?x?xf32, 0>
+///       %b5 = affine.load %B[%i4, %i5] : memref<?x?xf32, 0>
 ///       %s5 = addf %a5, %b5 : f32
 ///       // non-scoped %f1
 ///       %s6 = addf %s5, %f1 : f32
@@ -411,7 +411,7 @@ using namespace mlir;
 ///       %s7 = addf %s5, %f2 : f32
 ///       // diamond dependency.
 ///       %s8 = addf %s7, %s6 : f32
-///       store %s8, %C[%i4, %i5] : memref<?x?xf32, 0>
+///       affine.store %s8, %C[%i4, %i5] : memref<?x?xf32, 0>
 ///     }
 ///   }
 ///   %c7 = constant 7 : index
@@ -421,15 +421,14 @@ using namespace mlir;
 /// }
 /// ```
 ///
-/// TODO(ntv): update post b/119731251.
-/// The -vectorize pass with the following arguments:
+/// The -affine-vectorize pass with the following arguments:
 /// ```
-/// -vectorize -virtual-vector-size 256 --test-fastest-varying=0
+/// -affine-vectorize -virtual-vector-size 256 --test-fastest-varying=0
 /// ```
 ///
 /// produces this standard innermost-loop vectorized code:
 /// ```mlir
-/// mlfunc @vector_add_2d(%arg0 : index, %arg1 : index) -> f32 {
+/// func @vector_add_2d(%arg0 : index, %arg1 : index) -> f32 {
 ///   %0 = alloc(%arg0, %arg1) : memref<?x?xf32>
 ///   %1 = alloc(%arg0, %arg1) : memref<?x?xf32>
 ///   %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
@@ -476,16 +475,15 @@ using namespace mlir;
 /// }
 /// ```
 ///
-/// TODO(ntv): update post b/119731251.
-/// The -vectorize pass with the following arguments:
+/// The -affine-vectorize pass with the following arguments:
 /// ```
-/// -vectorize -virtual-vector-size 32 -virtual-vector-size 256
+/// -affine-vectorize -virtual-vector-size 32 -virtual-vector-size 256
 /// --test-fastest-varying=1 --test-fastest-varying=0
 /// ```
 ///
 /// produces this more interesting mixed outer-innermost-loop vectorized code:
 /// ```mlir
-/// mlfunc @vector_add_2d(%arg0 : index, %arg1 : index) -> f32 {
+/// func @vector_add_2d(%arg0 : index, %arg1 : index) -> f32 {
 ///   %0 = alloc(%arg0, %arg1) : memref<?x?xf32>
 ///   %1 = alloc(%arg0, %arg1) : memref<?x?xf32>
 ///   %2 = alloc(%arg0, %arg1) : memref<?x?xf32>
@@ -777,7 +775,7 @@ void VectorizationState::registerReplacement(Value *key, Value *value) {
 
 // Apply 'map' with 'mapOperands' returning resulting values in 'results'.
 static void computeMemoryOpIndices(Operation *op, AffineMap map,
-                                   ArrayRef<Value *> mapOperands,
+                                   ValueRange mapOperands,
                                    SmallVectorImpl<Value *> &results) {
   OpBuilder builder(op);
   for (auto resultExpr : map.getResults()) {
@@ -824,15 +822,14 @@ static LogicalResult vectorizeRootOrTerminal(Value *iv,
   // as needed by various targets.
   if (auto load = dyn_cast<AffineLoadOp>(opInst)) {
     OpBuilder b(opInst);
-    SmallVector<Value *, 4> mapOperands(load.getMapOperands());
+    ValueRange mapOperands = load.getMapOperands();
     SmallVector<Value *, 8> indices;
     indices.reserve(load.getMemRefType().getRank());
     if (load.getAffineMap() !=
         b.getMultiDimIdentityMap(load.getMemRefType().getRank())) {
       computeMemoryOpIndices(opInst, load.getAffineMap(), mapOperands, indices);
     } else {
-      indices.append(load.getMapOperands().begin(),
-                     load.getMapOperands().end());
+      indices.append(mapOperands.begin(), mapOperands.end());
     }
     auto permutationMap =
         makePermutationMap(opInst, indices, state->strategy->loopToVectorDim);
@@ -1054,7 +1051,7 @@ static Operation *vectorizeOneOperation(Operation *opInst,
     auto *value = store.getValueToStore();
     auto *vectorValue = vectorizeOperand(value, opInst, state);
 
-    SmallVector<Value *, 4> mapOperands(store.getMapOperands());
+    ValueRange mapOperands = store.getMapOperands();
     SmallVector<Value *, 8> indices;
     indices.reserve(store.getMemRefType().getRank());
     if (store.getAffineMap() !=
@@ -1062,8 +1059,7 @@ static Operation *vectorizeOneOperation(Operation *opInst,
       computeMemoryOpIndices(opInst, store.getAffineMap(), mapOperands,
                              indices);
     } else {
-      indices.append(store.getMapOperands().begin(),
-                     store.getMapOperands().end());
+      indices.append(mapOperands.begin(), mapOperands.end());
     }
 
     auto permutationMap =
diff --git a/third_party/mlir/test/BUILD b/third_party/mlir/test/BUILD
index 25f7b8399eb..369ab93fa58 100644
--- a/third_party/mlir/test/BUILD
+++ b/third_party/mlir/test/BUILD
@@ -18,7 +18,7 @@ cc_library(
 )
 
 gentbl(
-    name = "TestTransformPatternsIncGen",
+    name = "TestLinalgTransformPatternsIncGen",
     tbl_outs = [
         (
             "-gen-rewriters",
@@ -32,6 +32,21 @@ gentbl(
     ],
 )
 
+gentbl(
+    name = "TestLinalgTilePermuteTransformPatternsIncGen",
+    tbl_outs = [
+        (
+            "-gen-rewriters",
+            "lib/DeclarativeTransforms/TestLinalgTilePermutePatterns.h.inc",
+        ),
+    ],
+    tblgen = "@local_config_mlir//:mlir-tblgen",
+    td_file = "lib/DeclarativeTransforms/TestLinalgTilePermutePatterns.td",
+    td_srcs = [
+        "@local_config_mlir//:LinalgTransformPatternsTdFiles",
+    ],
+)
+
 gentbl(
     name = "TestOpsIncGen",
     strip_include_prefix = "lib/TestDialect",
@@ -98,12 +113,15 @@ cc_library(
     name = "TestIR",
     srcs = [
         "lib/IR/TestFunc.cpp",
+        "lib/IR/TestMatchers.cpp",
         "lib/IR/TestSymbolUses.cpp",
     ],
     deps = [
+        ":TestDialect",
         "@llvm//:support",
         "@local_config_mlir//:IR",
         "@local_config_mlir//:Pass",
+        "@local_config_mlir//:StandardOps",
         "@local_config_mlir//:Support",
     ],
     alwayslink = 1,
@@ -129,6 +147,7 @@ cc_library(
         "lib/Transforms/TestCallGraph.cpp",
         "lib/Transforms/TestConstantFold.cpp",
         "lib/Transforms/TestInlining.cpp",
+        "lib/Transforms/TestLinalgTilePermuteTransforms.cpp",
         "lib/Transforms/TestLinalgTransforms.cpp",
         "lib/Transforms/TestLoopFusion.cpp",
         "lib/Transforms/TestLoopMapping.cpp",
@@ -142,7 +161,8 @@ cc_library(
     includes = ["lib/TestDialect"],
     deps = [
         ":TestDialect",
-        ":TestTransformPatternsIncGen",
+        ":TestLinalgTilePermuteTransformPatternsIncGen",
+        ":TestLinalgTransformPatternsIncGen",
         "@llvm//:support",
         "@local_config_mlir//:AffineOps",
         "@local_config_mlir//:Analysis",
diff --git a/third_party/mlir/test/lib/DeclarativeTransforms/CMakeLists.txt b/third_party/mlir/test/lib/DeclarativeTransforms/CMakeLists.txt
index 06e81a098f4..1ee62d82129 100644
--- a/third_party/mlir/test/lib/DeclarativeTransforms/CMakeLists.txt
+++ b/third_party/mlir/test/lib/DeclarativeTransforms/CMakeLists.txt
@@ -1,3 +1,7 @@
 set(LLVM_TARGET_DEFINITIONS TestLinalgTransformPatterns.td)
 mlir_tablegen(TestLinalgTransformPatterns.h.inc -gen-rewriters)
 add_public_tablegen_target(MLIRTestLinalgTransformPatternsIncGen)
+
+set(LLVM_TARGET_DEFINITIONS TestLinalgTilePermutePatterns.td)
+mlir_tablegen(TestLinalgTilePermutePatterns.h.inc -gen-rewriters)
+add_public_tablegen_target(MLIRTestLinalgTilePermutePatternsIncGen)
diff --git a/third_party/mlir/test/lib/DeclarativeTransforms/TestLinalgTilePermutePatterns.td b/third_party/mlir/test/lib/DeclarativeTransforms/TestLinalgTilePermutePatterns.td
new file mode 100644
index 00000000000..6d7bfffdf71
--- /dev/null
+++ b/third_party/mlir/test/lib/DeclarativeTransforms/TestLinalgTilePermutePatterns.td
@@ -0,0 +1,57 @@
+//===- TestLinalgTilePermutePatterns.td - Test patterns --*- tablegen ----*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This is the pattern definition file for declarative Linalg transformations
+// tests.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TEST_LINALG_TILEPERMUTE_PATTERNS
+#define TEST_LINALG_TILEPERMUTE_PATTERNS
+
+include "mlir/Dialect/Linalg/Transforms/LinalgTransformPatterns.td"
+
+//===----------------------------------------------------------------------===//
+// Linalg tiling and permutation patterns.
+//===----------------------------------------------------------------------===//
+def : Pat<(MatmulOp:$op $A, $B, $C),
+          (TileLinalgOp<[2000, 3000, 4000], "L2", [1,2,0]> $op),
+          [(Constraint<Or<[HasNoLinalgTransformMarker,
+                           HasLinalgTransformMarker<"MEM">]>> $op)]>;
+def : Pat<(MatmulOp:$op $A, $B, $C),
+          (TileLinalgOp<[200, 300, 400], "L1", [1,0,2]> $op),
+          [(Constraint<HasLinalgTransformMarker<"L2">> $op)]>;
+def : Pat<(MatmulOp:$op $A, $B, $C),
+          (TileLinalgOp<[20, 30, 40], "REG"> $op),
+          [(Constraint<HasLinalgTransformMarker<"L1">> $op)]>;
+
+
+def : Pattern<(MatvecOp:$op $A, $b, $c),
+              [(TileLinalgOp<[5, 6], "L1", [1,0]> $op)],
+              [(Constraint<HasNoLinalgTransformMarker> $op)]>;
+
+def : Pattern<(DotOp:$op $a, $b, $c),
+              [(TileLinalgOp<[8000], "L1"> $op)],
+              [(Constraint<Or<[HasNoLinalgTransformMarker,
+                               HasLinalgTransformMarker<"MEM">,
+                               HasLinalgTransformMarker<"L3">,
+                               HasLinalgTransformMarker<"L2">]>> $op)]>;
+def : Pattern<(DotOp:$op $a, $b, $c),
+              [(TileLinalgOp<[8], "REG"> $op)],
+              [(Constraint<HasLinalgTransformMarker<"L1">> $op)]>;
+
+#endif // TEST_LINALG_TILEPERMUTE_PATTERNS
diff --git a/third_party/mlir/test/lib/IR/CMakeLists.txt b/third_party/mlir/test/lib/IR/CMakeLists.txt
index 9e3b8fbf369..4ac6a911606 100644
--- a/third_party/mlir/test/lib/IR/CMakeLists.txt
+++ b/third_party/mlir/test/lib/IR/CMakeLists.txt
@@ -1,9 +1,12 @@
 add_llvm_library(MLIRTestIR
   TestFunc.cpp
+  TestMatchers.cpp
   TestSymbolUses.cpp
 
   ADDITIONAL_HEADER_DIRS
   )
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../TestDialect)
+include_directories(${CMAKE_CURRENT_BINARY_DIR}/../TestDialect)
 target_link_libraries(MLIRTestIR
   MLIRPass
   )
diff --git a/third_party/mlir/test/lib/IR/TestMatchers.cpp b/third_party/mlir/test/lib/IR/TestMatchers.cpp
new file mode 100644
index 00000000000..c0b92a8c433
--- /dev/null
+++ b/third_party/mlir/test/lib/IR/TestMatchers.cpp
@@ -0,0 +1,150 @@
+//===- TestMatchers.cpp - Pass to test matchers ---------------------------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#include "mlir/Dialect/StandardOps/Ops.h"
+#include "mlir/IR/Function.h"
+#include "mlir/IR/Matchers.h"
+#include "mlir/Pass/Pass.h"
+
+using namespace mlir;
+
+namespace {
+/// This is a test pass for verifying matchers.
+struct TestMatchers : public ModulePass<TestMatchers> {
+  void runOnModule() override;
+};
+} // end anonymous namespace
+
+// This could be done better but is not worth the variadic template trouble.
+template <typename Matcher> unsigned countMatches(FuncOp f, Matcher &matcher) {
+  unsigned count = 0;
+  f.walk([&count, &matcher](Operation *op) {
+    if (matcher.match(op)) {
+      // llvm::outs() << "matched " << *op << "\n";
+      ++count;
+    }
+  });
+  return count;
+}
+
+static void test1(FuncOp f) {
+  using mlir::matchers::m_any;
+  using mlir::matchers::m_val;
+
+  assert(f.getNumArguments() == 3 && "matcher test funcs must have 3 args");
+  auto a = m_val(f.getArgument(0));
+  auto b = m_val(f.getArgument(1));
+  auto c = m_val(f.getArgument(2));
+  (void)a;
+  (void)b;
+  (void)c;
+
+  llvm::outs() << f.getName();
+
+  auto p0 = m_Op<AddFOp>(); // using 0-arity matcher
+  llvm::outs() << "Pattern add(*) matched " << countMatches(f, p0)
+               << " times\n";
+
+  auto p1 = m_Op<MulFOp>(); // using 0-arity matcher
+  llvm::outs() << "Pattern mul(*) matched " << countMatches(f, p1)
+               << " times\n";
+
+  auto p2 = m_Op<AddFOp>(m_Op<AddFOp>(), m_any());
+  llvm::outs() << "Pattern add(add(*), *) matched " << countMatches(f, p2)
+               << " times\n";
+
+  auto p3 = m_Op<AddFOp>(m_any(), m_Op<AddFOp>());
+  llvm::outs() << "Pattern add(*, add(*)) matched " << countMatches(f, p3)
+               << " times\n";
+
+  auto p4 = m_Op<MulFOp>(m_Op<AddFOp>(), m_any());
+  llvm::outs() << "Pattern mul(add(*), *) matched " << countMatches(f, p4)
+               << " times\n";
+
+  auto p5 = m_Op<MulFOp>(m_any(), m_Op<AddFOp>());
+  llvm::outs() << "Pattern mul(*, add(*)) matched " << countMatches(f, p5)
+               << " times\n";
+
+  auto p6 = m_Op<MulFOp>(m_Op<MulFOp>(), m_any());
+  llvm::outs() << "Pattern mul(mul(*), *) matched " << countMatches(f, p6)
+               << " times\n";
+
+  auto p7 = m_Op<MulFOp>(m_Op<MulFOp>(), m_Op<MulFOp>());
+  llvm::outs() << "Pattern mul(mul(*), mul(*)) matched " << countMatches(f, p7)
+               << " times\n";
+
+  auto mul_of_mulmul = m_Op<MulFOp>(m_Op<MulFOp>(), m_Op<MulFOp>());
+  auto p8 = m_Op<MulFOp>(mul_of_mulmul, mul_of_mulmul);
+  llvm::outs()
+      << "Pattern mul(mul(mul(*), mul(*)), mul(mul(*), mul(*))) matched "
+      << countMatches(f, p8) << " times\n";
+
+  // clang-format off
+  auto mul_of_muladd = m_Op<MulFOp>(m_Op<MulFOp>(), m_Op<AddFOp>());
+  auto mul_of_anyadd = m_Op<MulFOp>(m_any(), m_Op<AddFOp>());
+  auto p9 = m_Op<MulFOp>(m_Op<MulFOp>(
+                     mul_of_muladd, m_Op<MulFOp>()),
+                   m_Op<MulFOp>(mul_of_anyadd, mul_of_anyadd));
+  // clang-format on
+  llvm::outs() << "Pattern mul(mul(mul(mul(*), add(*)), mul(*)), mul(mul(*, "
+                  "add(*)), mul(*, add(*)))) matched "
+               << countMatches(f, p9) << " times\n";
+
+  auto p10 = m_Op<AddFOp>(a, b);
+  llvm::outs() << "Pattern add(a, b) matched " << countMatches(f, p10)
+               << " times\n";
+
+  auto p11 = m_Op<AddFOp>(a, c);
+  llvm::outs() << "Pattern add(a, c) matched " << countMatches(f, p11)
+               << " times\n";
+
+  auto p12 = m_Op<AddFOp>(b, a);
+  llvm::outs() << "Pattern add(b, a) matched " << countMatches(f, p12)
+               << " times\n";
+
+  auto p13 = m_Op<AddFOp>(c, a);
+  llvm::outs() << "Pattern add(c, a) matched " << countMatches(f, p13)
+               << " times\n";
+
+  auto p14 = m_Op<MulFOp>(a, m_Op<AddFOp>(c, b));
+  llvm::outs() << "Pattern mul(a, add(c, b)) matched " << countMatches(f, p14)
+               << " times\n";
+
+  auto p15 = m_Op<MulFOp>(a, m_Op<AddFOp>(b, c));
+  llvm::outs() << "Pattern mul(a, add(b, c)) matched " << countMatches(f, p15)
+               << " times\n";
+
+  auto mul_of_aany = m_Op<MulFOp>(a, m_any());
+  auto p16 = m_Op<MulFOp>(mul_of_aany, m_Op<AddFOp>(a, c));
+  llvm::outs() << "Pattern mul(mul(a, *), add(a, c)) matched "
+               << countMatches(f, p16) << " times\n";
+
+  auto p17 = m_Op<MulFOp>(mul_of_aany, m_Op<AddFOp>(c, b));
+  llvm::outs() << "Pattern mul(mul(a, *), add(c, b)) matched "
+               << countMatches(f, p17) << " times\n";
+}
+
+void TestMatchers::runOnModule() {
+  auto m = getModule();
+  for (auto f : m.getOps<FuncOp>()) {
+    if (f.getName() == "test1")
+      test1(f);
+  }
+}
+
+static PassRegistration<TestMatchers> pass("test-matchers",
+                                           "Test C++ pattern matchers.");
diff --git a/third_party/mlir/test/lib/IR/TestSymbolUses.cpp b/third_party/mlir/test/lib/IR/TestSymbolUses.cpp
index c8e1da18760..8ef4bb48a1c 100644
--- a/third_party/mlir/test/lib/IR/TestSymbolUses.cpp
+++ b/third_party/mlir/test/lib/IR/TestSymbolUses.cpp
@@ -15,6 +15,7 @@
 // limitations under the License.
 // =============================================================================
 
+#include "TestDialect.h"
 #include "mlir/IR/Function.h"
 #include "mlir/Pass/Pass.h"
 
@@ -22,10 +23,11 @@ using namespace mlir;
 
 namespace {
 /// This is a symbol test pass that tests the symbol uselist functionality
-/// provided by the symbol table.
+/// provided by the symbol table along with erasing from the symbol table.
 struct SymbolUsesPass : public ModulePass<SymbolUsesPass> {
   void runOnModule() override {
     auto module = getModule();
+    std::vector<FuncOp> ops_to_delete;
 
     for (FuncOp func : module.getOps<FuncOp>()) {
       // Test computing uses on a non symboltable op.
@@ -45,6 +47,8 @@ struct SymbolUsesPass : public ModulePass<SymbolUsesPass> {
       // Test the functionality of symbolKnownUseEmpty.
       if (func.symbolKnownUseEmpty(module)) {
         func.emitRemark() << "function has no uses";
+        if (func.getBody().empty())
+          ops_to_delete.push_back(func);
         continue;
       }
 
@@ -58,6 +62,18 @@ struct SymbolUsesPass : public ModulePass<SymbolUsesPass> {
       func.emitRemark() << "function has " << llvm::size(*symbolUses)
                         << " uses";
     }
+
+    for (FuncOp func : ops_to_delete) {
+      // In order to test the SymbolTable::erase method, also erase completely
+      // useless functions.
+      SymbolTable table(module);
+      auto func_name = func.getName();
+      assert(table.lookup(func_name) && "expected no unknown operations");
+      table.erase(func);
+      assert(!table.lookup(func_name) &&
+             "expected erased operation to be unknown now");
+      module.emitRemark() << func_name << " function successfully erased";
+    }
   }
 };
 
diff --git a/third_party/mlir/test/lib/Pass/TestPassManager.cpp b/third_party/mlir/test/lib/Pass/TestPassManager.cpp
index aae83fb8993..d1e1a6d13ee 100644
--- a/third_party/mlir/test/lib/Pass/TestPassManager.cpp
+++ b/third_party/mlir/test/lib/Pass/TestPassManager.cpp
@@ -74,6 +74,18 @@ public:
 class TestCrashRecoveryPass : public OperationPass<TestCrashRecoveryPass> {
   void runOnOperation() final { abort(); }
 };
+
+/// A test pass that contains a statistic.
+struct TestStatisticPass : public OperationPass<TestStatisticPass> {
+  TestStatisticPass() = default;
+  TestStatisticPass(const TestStatisticPass &) {}
+
+  Statistic opCount{this, "num-ops", "Number of operations counted"};
+
+  void runOnOperation() final {
+    getOperation()->walk([&](Operation *) { ++opCount; });
+  }
+};
 } // end anonymous namespace
 
 static void testNestedPipeline(OpPassManager &pm) {
@@ -106,6 +118,9 @@ static PassRegistration<TestCrashRecoveryPass>
     unusedCrashP("test-pass-crash",
                  "Test a pass in the pass manager that always crashes");
 
+static PassRegistration<TestStatisticPass> unusedStatP("test-stats-pass",
+                                                       "Test pass statistics");
+
 static PassPipelineRegistration<>
     unused("test-pm-nested-pipeline",
            "Test a nested pipeline in the pass manager", testNestedPipeline);
diff --git a/third_party/mlir/test/lib/TestDialect/TestDialect.cpp b/third_party/mlir/test/lib/TestDialect/TestDialect.cpp
index 60a16d968dc..f470e6ab674 100644
--- a/third_party/mlir/test/lib/TestDialect/TestDialect.cpp
+++ b/third_party/mlir/test/lib/TestDialect/TestDialect.cpp
@@ -289,17 +289,17 @@ LogicalResult TestOpWithVariadicResultsAndFolder::fold(
   return success();
 }
 
-SmallVector<Type, 2> mlir::OpWithInferTypeInterfaceOp::inferReturnTypes(
-    llvm::Optional<Location> location, ArrayRef<Value *> operands,
-    ArrayRef<NamedAttribute> attributes, ArrayRef<Region> regions) {
+LogicalResult mlir::OpWithInferTypeInterfaceOp::inferReturnTypes(
+    llvm::Optional<Location> location, ValueRange operands,
+    ArrayRef<NamedAttribute> attributes, ArrayRef<Region> regions,
+    SmallVectorImpl<Type> &inferedReturnTypes) {
   if (operands[0]->getType() != operands[1]->getType()) {
-    if (location)
-      mlir::emitError(*location)
-          << "operand type mismatch " << operands[0]->getType() << " vs "
-          << operands[1]->getType();
-    return {nullptr};
+    return emitOptionalError(location, "operand type mismatch ",
+                             operands[0]->getType(), " vs ",
+                             operands[1]->getType());
   }
-  return {operands[0]->getType()};
+  inferedReturnTypes.assign({operands[0]->getType()});
+  return success();
 }
 
 // Static initialization for Test dialect registration.
diff --git a/third_party/mlir/test/lib/TestDialect/TestOps.td b/third_party/mlir/test/lib/TestDialect/TestOps.td
index 6952eaa7717..d998eb37e74 100644
--- a/third_party/mlir/test/lib/TestDialect/TestOps.td
+++ b/third_party/mlir/test/lib/TestDialect/TestOps.td
@@ -92,6 +92,12 @@ def SymbolScopeOp : TEST_Op<"symbol_scope",
   let regions = (region SizedRegion<1>:$region);
 }
 
+def SymbolTableRegionOp : TEST_Op<"symbol_table_region", [SymbolTable]> {
+  let summary =  "operation which defines a new symbol table without a "
+                 "restriction on a terminator";
+  let regions = (region SizedRegion<1>:$region);
+}
+
 //===----------------------------------------------------------------------===//
 // Test Operands
 //===----------------------------------------------------------------------===//
diff --git a/third_party/mlir/test/lib/TestDialect/TestPatterns.cpp b/third_party/mlir/test/lib/TestDialect/TestPatterns.cpp
index 92f132613b1..9d85c7d93f8 100644
--- a/third_party/mlir/test/lib/TestDialect/TestPatterns.cpp
+++ b/third_party/mlir/test/lib/TestDialect/TestPatterns.cpp
@@ -73,17 +73,31 @@ struct ReturnTypeOpMatch : public RewritePattern {
   PatternMatchResult matchAndRewrite(Operation *op,
                                      PatternRewriter &rewriter) const final {
     if (auto retTypeFn = dyn_cast<InferTypeOpInterface>(op)) {
-      SmallVector<Value *, 4> values;
-      values.reserve(op->getNumOperands());
-      for (auto &operand : op->getOpOperands())
-        values.push_back(operand.get());
-      auto res = retTypeFn.inferReturnTypes(op->getLoc(), values,
-                                            op->getAttrs(), op->getRegions());
-      SmallVector<Type, 1> result_types(op->getResultTypes());
-      if (!retTypeFn.isCompatibleReturnTypes(res, result_types))
+      SmallVector<Value *, 4> values(op->getOperands());
+      SmallVector<Type, 2> inferedReturnTypes;
+      if (failed(retTypeFn.inferReturnTypes(op->getLoc(), values,
+                                            op->getAttrs(), op->getRegions(),
+                                            inferedReturnTypes)))
+        return matchFailure();
+      SmallVector<Type, 1> resultTypes(op->getResultTypes());
+      if (!retTypeFn.isCompatibleReturnTypes(inferedReturnTypes, resultTypes))
         return op->emitOpError(
                    "inferred type incompatible with return type of operation"),
                matchFailure();
+
+      // TODO(jpienaar): Split this out to make the test more focused.
+      // Create new op with unknown location to verify building with
+      // InferTypeOpInterface is triggered.
+      auto fop = op->getParentOfType<FuncOp>();
+      if (values[0] == fop.getArgument(0)) {
+        // Use the 2nd function argument if the first function argument is used
+        // when constructing the new op so that a new return type is inferred.
+        values[0] = fop.getArgument(1);
+        values[1] = fop.getArgument(1);
+        // TODO(jpienaar): Expand to regions.
+        rewriter.create<OpWithInferTypeInterfaceOp>(
+            UnknownLoc::get(op->getContext()), values, op->getAttrs());
+      }
     }
     return matchFailure();
   }
@@ -216,8 +230,7 @@ struct TestSplitReturnType : public ConversionPattern {
     // results directly.
     auto *defOp = operands[0]->getDefiningOp();
     if (auto packerOp = llvm::dyn_cast_or_null<TestCastOp>(defOp)) {
-      SmallVector<Value *, 2> returnOperands(packerOp.getOperands());
-      rewriter.replaceOpWithNewOp<TestReturnOp>(op, returnOperands);
+      rewriter.replaceOpWithNewOp<TestReturnOp>(op, packerOp.getOperands());
       return matchSuccess();
     }
 
diff --git a/third_party/mlir/test/lib/Transforms/CMakeLists.txt b/third_party/mlir/test/lib/Transforms/CMakeLists.txt
index 8bc9c736187..8a7933451b8 100644
--- a/third_party/mlir/test/lib/Transforms/CMakeLists.txt
+++ b/third_party/mlir/test/lib/Transforms/CMakeLists.txt
@@ -4,6 +4,7 @@ add_llvm_library(MLIRTestTransforms
   TestLoopFusion.cpp
   TestInlining.cpp
   TestLinalgTransforms.cpp
+  TestLinalgTilePermuteTransforms.cpp
   TestLoopMapping.cpp
   TestLoopParametricTiling.cpp
   TestOpaqueLoc.cpp
@@ -21,6 +22,7 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../DeclarativeTransforms)
 include_directories(${CMAKE_CURRENT_BINARY_DIR}/../DeclarativeTransforms)
 add_dependencies(MLIRTestTransforms MLIRStandardOpsIncGen)
 add_dependencies(MLIRTestTransforms MLIRTestLinalgTransformPatternsIncGen)
+add_dependencies(MLIRTestTransforms MLIRTestLinalgTilePermutePatternsIncGen)
 target_link_libraries(MLIRTestTransforms
   MLIRAffineOps
   MLIRAnalysis
diff --git a/third_party/mlir/test/lib/Transforms/TestLinalgTilePermuteTransforms.cpp b/third_party/mlir/test/lib/Transforms/TestLinalgTilePermuteTransforms.cpp
new file mode 100644
index 00000000000..ec7fa4e71b4
--- /dev/null
+++ b/third_party/mlir/test/lib/Transforms/TestLinalgTilePermuteTransforms.cpp
@@ -0,0 +1,64 @@
+//===- TestLinalgTilePermuteTransforms.cpp - Test Linalg tile + permute ---===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file implements logic for testing Linalg transformations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
+#include "mlir/Dialect/Linalg/Transforms/LinalgTransforms.h"
+#include "mlir/Dialect/Linalg/Utils/Utils.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Pass/Pass.h"
+
+using namespace mlir;
+using namespace mlir::linalg;
+
+namespace mlir {
+namespace linalg {
+namespace {
+#include "TestLinalgTilePermutePatterns.h.inc"
+} // end namespace
+} // end namespace linalg
+} // end namespace mlir
+
+namespace {
+struct TestLinalgTilePermuteTransforms
+    : public FunctionPass<TestLinalgTilePermuteTransforms> {
+  void runOnFunction() override;
+};
+} // end anonymous namespace
+
+/// Apply transformations specified as patterns.
+void TestLinalgTilePermuteTransforms::runOnFunction() {
+  OwningRewritePatternList patterns;
+  auto funcOp = getFunction();
+
+  // Add the generated patterns to the list.
+  linalg::populateWithGenerated(&getContext(), &patterns);
+  applyPatternsGreedily(funcOp, patterns);
+
+  // Drop the marker.
+  funcOp.walk([](LinalgOp op) {
+    op.removeAttr(LinalgTransforms::kLinalgTransformMarker);
+  });
+}
+
+static PassRegistration<TestLinalgTilePermuteTransforms>
+    pass("test-linalg-tile-and-permute-patterns",
+         "Test Linalg transformation with permutation patterns by applying "
+         "them greedily.");
diff --git a/third_party/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/third_party/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
index 16894ad4cb3..06293900acc 100644
--- a/third_party/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/third_party/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@@ -541,6 +541,11 @@ private:
   // operand's type as all results' types.
   void genUseOperandAsResultTypeCollectiveParamBuilder();
 
+  // Generates the build() method that takes aggregate operands/attributes
+  // parameters. This build() method uses inferred types as result types.
+  // Requires: The type needs to be inferable via InferTypeOpInterface.
+  void genInferedTypeCollectiveParamBuilder();
+
   // Generates the build() method that takes each operand/attribute as a
   // stand-alone parameter. The generated build() method uses first attribute's
   // type as all result's types.
@@ -964,15 +969,10 @@ void OpEmitter::genUseOperandAsResultTypeCollectiveParamBuilder() {
   // Signature
   std::string params =
       std::string("Builder *, OperationState &") + builderOpState +
-      ", ArrayRef<Value *> operands, ArrayRef<NamedAttribute> attributes";
+      ", ValueRange operands, ArrayRef<NamedAttribute> attributes";
   auto &m = opClass.newMethod("void", "build", params, OpMethod::MP_Static);
   auto &body = m.body();
 
-  // Result types
-  SmallVector<std::string, 2> resultTypes(numResults, "operands[0]->getType()");
-  body << "  " << builderOpState << ".addTypes({"
-       << llvm::join(resultTypes, ", ") << "});\n\n";
-
   // Operands
   body << "  " << builderOpState << ".addOperands(operands);\n\n";
 
@@ -984,6 +984,31 @@ void OpEmitter::genUseOperandAsResultTypeCollectiveParamBuilder() {
     for (int i = 0; i < numRegions; ++i)
       m.body() << "  (void)" << builderOpState << ".addRegion();\n";
   }
+
+  // Result types
+  SmallVector<std::string, 2> resultTypes(numResults, "operands[0]->getType()");
+  body << "  " << builderOpState << ".addTypes({"
+       << llvm::join(resultTypes, ", ") << "});\n\n";
+}
+
+void OpEmitter::genInferedTypeCollectiveParamBuilder() {
+  // TODO(jpienaar): Expand to support regions.
+  const char *params =
+      "Builder *builder, OperationState &{0}, "
+      "ValueRange operands, ArrayRef<NamedAttribute> attributes";
+  auto &m =
+      opClass.newMethod("void", "build", formatv(params, builderOpState).str(),
+                        OpMethod::MP_Static);
+  auto &body = m.body();
+  body << formatv(R"(
+    SmallVector<Type, 2> inferedReturnTypes;
+    if (succeeded({0}::inferReturnTypes({1}.location, operands, attributes,
+                  /*regions=*/{{}, inferedReturnTypes)))
+      build(builder, tblgen_state, inferedReturnTypes, operands, attributes);
+    else
+      llvm::report_fatal_error("Failed to infer result type(s).");
+  )",
+                  opClass.getClassName(), builderOpState);
 }
 
 void OpEmitter::genUseOperandAsResultTypeSeparateParamBuilder() {
@@ -1011,7 +1036,7 @@ void OpEmitter::genUseOperandAsResultTypeSeparateParamBuilder() {
 void OpEmitter::genUseAttrAsResultTypeBuilder() {
   std::string params =
       std::string("Builder *, OperationState &") + builderOpState +
-      ", ArrayRef<Value *> operands, ArrayRef<NamedAttribute> attributes";
+      ", ValueRange operands, ArrayRef<NamedAttribute> attributes";
   auto &m = opClass.newMethod("void", "build", params, OpMethod::MP_Static);
   auto &body = m.body();
 
@@ -1026,15 +1051,17 @@ void OpEmitter::genUseAttrAsResultTypeBuilder() {
   } else {
     resultType = "attr.second.getType()";
   }
-  SmallVector<std::string, 2> resultTypes(op.getNumResults(), resultType);
-  body << "    " << builderOpState << ".addTypes({"
-       << llvm::join(resultTypes, ", ") << "});\n";
-  body << "  }\n";
 
   // Operands
   body << "  " << builderOpState << ".addOperands(operands);\n\n";
   // Attributes
   body << "  " << builderOpState << ".addAttributes(attributes);\n";
+
+  // Result types
+  SmallVector<std::string, 2> resultTypes(op.getNumResults(), resultType);
+  body << "    " << builderOpState << ".addTypes({"
+       << llvm::join(resultTypes, ", ") << "});\n";
+  body << "  }\n";
 }
 
 void OpEmitter::genBuilder() {
@@ -1082,7 +1109,7 @@ void OpEmitter::genBuilder() {
   genCollectiveParamBuilder();
   // 4. one having a stand-alone parameter for each operand and attribute,
   //    use the first operand or attribute's type as all result types
-  // to facilitate different call patterns.
+  //    to facilitate different call patterns.
   if (op.getNumVariadicResults() == 0) {
     if (op.getTrait("OpTrait::SameOperandsAndResultType")) {
       genUseOperandAsResultTypeSeparateParamBuilder();
@@ -1091,6 +1118,11 @@ void OpEmitter::genBuilder() {
     if (op.getTrait("OpTrait::FirstAttrDerivedResultType"))
       genUseAttrAsResultTypeBuilder();
   }
+  // TODO(jpienaar): Subsume this with general checking if type can be infered
+  // automatically.
+  // TODO(jpienaar): Expand to handle regions.
+  if (op.getTrait("InferTypeOpInterface::Trait") && op.getNumRegions() == 0)
+    genInferedTypeCollectiveParamBuilder();
 }
 
 void OpEmitter::genCollectiveParamBuilder() {
@@ -1102,10 +1134,10 @@ void OpEmitter::genCollectiveParamBuilder() {
   int numVariadicOperands = op.getNumVariadicOperands();
   int numNonVariadicOperands = numOperands - numVariadicOperands;
   // Signature
-  std::string params =
-      std::string("Builder *, OperationState &") + builderOpState +
-      ", ArrayRef<Type> resultTypes, ArrayRef<Value *> operands, "
-      "ArrayRef<NamedAttribute> attributes";
+  std::string params = std::string("Builder *, OperationState &") +
+                       builderOpState +
+                       ", ArrayRef<Type> resultTypes, ValueRange operands, "
+                       "ArrayRef<NamedAttribute> attributes";
   auto &m = opClass.newMethod("void", "build", params, OpMethod::MP_Static);
   auto &body = m.body();
 
@@ -1202,8 +1234,7 @@ void OpEmitter::buildParamList(std::string &paramList,
     auto argument = op.getArg(i);
     if (argument.is<tblgen::NamedTypeConstraint *>()) {
       const auto &operand = op.getOperand(numOperands);
-      paramList.append(operand.isVariadic() ? ", ArrayRef<Value *> "
-                                            : ", Value *");
+      paramList.append(operand.isVariadic() ? ", ValueRange " : ", Value *");
       paramList.append(getArgumentName(op, numOperands));
       ++numOperands;
     } else {
@@ -1439,7 +1470,7 @@ void OpEmitter::genVerifier() {
   auto sizeAttr = getAttrOfType<DenseIntElementsAttr>("{0}");
   auto numElements = sizeAttr.getType().cast<ShapedType>().getNumElements();
   if (numElements != {1}) {{
-    return emitOpError("'{0}' attribute for specifiying {2} segments "
+    return emitOpError("'{0}' attribute for specifying {2} segments "
                        "must have {1} elements");
   }
   )";
diff --git a/third_party/mlir/tools/mlir-tblgen/RewriterGen.cpp b/third_party/mlir/tools/mlir-tblgen/RewriterGen.cpp
index d321b204f4e..f229a349d27 100644
--- a/third_party/mlir/tools/mlir-tblgen/RewriterGen.cpp
+++ b/third_party/mlir/tools/mlir-tblgen/RewriterGen.cpp
@@ -685,7 +685,7 @@ std::string PatternEmitter::handleReplaceWithNativeCodeCall(DagNode tree) {
   }
   for (int i = 0, e = tree.getNumArgs(); i != e; ++i) {
     attrs[i] = handleOpArgument(tree.getArgAsLeaf(i), tree.getArgName(i));
-    LLVM_DEBUG(llvm::dbgs() << "NativeCodeCall argment #" << i
+    LLVM_DEBUG(llvm::dbgs() << "NativeCodeCall argument #" << i
                             << " replacement: " << attrs[i] << "\n");
   }
   return tgfmt(fmt, &fmtCtx, attrs[0], attrs[1], attrs[2], attrs[3], attrs[4],
@@ -769,7 +769,7 @@ std::string PatternEmitter::handleOpCreation(DagNode tree, int resultIndex,
 
   if (isSameOperandsAndResultType || useFirstAttr) {
     // We know how to deduce the result type for ops with these traits and we've
-    // generated builders taking aggregrate parameters. Use those builders to
+    // generated builders taking aggregate parameters. Use those builders to
     // create the ops.
 
     // First prepare local variables for op arguments used in builder call.
@@ -891,7 +891,7 @@ void PatternEmitter::supplyValuesForOpArgs(
   Operator &resultOp = node.getDialectOp(opMap);
   for (int argIndex = 0, numOpArgs = resultOp.getNumArgs();
        argIndex != numOpArgs; ++argIndex) {
-    // Start each argment on its own line.
+    // Start each argument on its own line.
     (os << ",\n").indent(8);
 
     Argument opArg = resultOp.getArg(argIndex);
diff --git a/third_party/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp b/third_party/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp
index f39295a22c8..422183ed948 100644
--- a/third_party/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp
+++ b/third_party/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp
@@ -687,7 +687,7 @@ static void emitEnumGetSymbolizeFnDefn(const EnumAttr &enumAttr,
 }
 
 static bool emitOpUtils(const RecordKeeper &recordKeeper, raw_ostream &os) {
-  llvm::emitSourceFileHeader("SPIR-V Op Utilites", os);
+  llvm::emitSourceFileHeader("SPIR-V Op Utilities", os);
 
   auto defs = recordKeeper.getAllDerivedDefinitions("EnumAttrInfo");
   os << "#ifndef SPIRV_OP_UTILS_H_\n";
diff --git a/third_party/mlir/utils/vim/syntax/mlir.vim b/third_party/mlir/utils/vim/syntax/mlir.vim
index 5774d10e774..d9b6a3b29a1 100644
--- a/third_party/mlir/utils/vim/syntax/mlir.vim
+++ b/third_party/mlir/utils/vim/syntax/mlir.vim
@@ -31,7 +31,11 @@ syn match mlirType /x\s*\zsvector/
 " Operations.
 " Core ops (not exhaustive yet).
 " TODO: the list is not exhaustive.
-syn keyword mlirOps alloc addf addi call call_indirect cmpi constant dealloc dma_start dma_wait dim extract_element for getTensor if load memref_cast mulf muli splat store select subf subi tensor_cast
+syn keyword mlirOps alloc alloca addf addi call call_indirect cmpi constant
+syn keyword mlirOps dealloc divf dma_start dma_wait dim extract_element
+syn keyword getTensor index_cast load memref_cast memref_shape_cast mulf muli
+syn keyword prefetch sitofp splat store select subf subi subview tensor_cast
+syn keyword view
 
 " Affine ops.
 syn match mlirOps /\<affine\.apply\>/
@@ -41,13 +45,17 @@ syn match mlirOps /\<affine\.for\>/
 syn match mlirOps /\<affine\.if\>/
 syn match mlirOps /\<affine\.load\>/
 syn match mlirOps /\<affine\.store\>/
+syn match mlirOps /\<loop\.for\>/
+syn match mlirOps /\<loop\.if\>/
 
 " TODO: dialect name prefixed ops (llvm or std).
 
 " Keywords.
 syn keyword mlirKeyword
+      \ dense
       \ else
       \ func
+      \ module
       \ return
       \ step
       \ to
diff --git a/third_party/nccl/build_defs.bzl.tpl b/third_party/nccl/build_defs.bzl.tpl
index 57191398553..e734e49f9dc 100644
--- a/third_party/nccl/build_defs.bzl.tpl
+++ b/third_party/nccl/build_defs.bzl.tpl
@@ -104,19 +104,21 @@ def _device_link_impl(ctx):
     tmp_fatbin = ctx.actions.declare_file("%s.fatbin" % name)
     fatbin_h = ctx.actions.declare_file("%s_fatbin.h" % name)
     bin2c = ctx.file._bin2c
-    ctx.actions.run(
-        outputs = [tmp_fatbin, fatbin_h],
-        inputs = cubins,
-        executable = ctx.file._fatbinary,
-        arguments = [
+    arguments_list = [
             "-64",
             "--cmdline=--compile-only",
             "--link",
             "--compress-all",
-            "--bin2c-path=%s" % bin2c.dirname,
             "--create=%s" % tmp_fatbin.path,
             "--embedded-fatbin=%s" % fatbin_h.path,
-        ] + images,
+        ]
+    if %{use_bin2c_path}:
+           arguments_list.append("--bin2c-path=%s" % bin2c.dirname)
+    ctx.actions.run(
+        outputs = [tmp_fatbin, fatbin_h],
+        inputs = cubins,
+        executable = ctx.file._fatbinary,
+        arguments = arguments_list + images,
         tools = [bin2c],
         mnemonic = "fatbinary",
     )
diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl
index 4b341d02d22..68e093c1e7b 100644
--- a/third_party/nccl/nccl_configure.bzl
+++ b/third_party/nccl/nccl_configure.bzl
@@ -72,6 +72,11 @@ def _nccl_configure_impl(repository_ctx):
         nccl_version = repository_ctx.os.environ[_TF_NCCL_VERSION].strip()
         nccl_version = nccl_version.split(".")[0]
 
+    cuda_config = find_cuda_config(repository_ctx, ["cuda"])
+    cuda_version = cuda_config["cuda_version"].split(".")
+    cuda_major = cuda_version[0]
+    cuda_minor = cuda_version[1]
+
     if nccl_version == "":
         # Alias to open source build from @nccl_archive.
         repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT)
@@ -84,9 +89,18 @@ def _nccl_configure_impl(repository_ctx):
 
         # Round-about way to make the list unique.
         gpu_architectures = dict(zip(gpu_architectures, gpu_architectures)).keys()
-        repository_ctx.template("build_defs.bzl", _label("build_defs.bzl.tpl"), {
+        config_wrap = {
             "%{gpu_architectures}": str(gpu_architectures),
-        })
+            "%{use_bin2c_path}": "False",
+        }
+        if (int(cuda_major), int(cuda_minor)) <= (10, 1):
+            config_wrap["%{use_bin2c_path}"] = "True"
+
+        repository_ctx.template(
+            "build_defs.bzl",
+            _label("build_defs.bzl.tpl"),
+            config_wrap,
+        )
     else:
         # Create target for locally installed NCCL.
         config = find_cuda_config(repository_ctx, ["nccl"])
diff --git a/third_party/systemlibs/protobuf.BUILD b/third_party/systemlibs/protobuf.BUILD
index ab96c253bb4..118135d1290 100644
--- a/third_party/systemlibs/protobuf.BUILD
+++ b/third_party/systemlibs/protobuf.BUILD
@@ -31,7 +31,6 @@ HEADERS = [
     "google/protobuf/io/zero_copy_stream.h",
     "google/protobuf/io/zero_copy_stream_impl_lite.h",
     "google/protobuf/map.h",
-    "google/protobuf/port_def.inc",
     "google/protobuf/repeated_field.h",
     "google/protobuf/text_format.h",
     "google/protobuf/timestamp.pb.h",
diff --git a/third_party/systemlibs/protobuf.bzl b/third_party/systemlibs/protobuf.bzl
index 6818e4a4c0b..bb807e904a3 100644
--- a/third_party/systemlibs/protobuf.bzl
+++ b/third_party/systemlibs/protobuf.bzl
@@ -274,8 +274,8 @@ def internal_gen_well_known_protos_java(srcs):
     Args:
       srcs: the well known protos
     """
-    root = Label("%s//protobuf_java" % (REPOSITORY_NAME)).workspace_root
-    pkg = PACKAGE_NAME + "/" if PACKAGE_NAME else ""
+    root = Label("%s//protobuf_java" % (native.repository_name())).workspace_root
+    pkg = native.package_name() + "/" if native.package_name() else ""
     if root == "":
         include = " -I%ssrc " % pkg
     else: