From 8dc98f73247b12062b7983d2235a0407e9aaf195 Mon Sep 17 00:00:00 2001
From: James Bernardi <33673759+j-bernardi@users.noreply.github.com>
Date: Fri, 27 Nov 2020 00:20:33 +0000
Subject: [PATCH 01/60] Making label smoothing documentation more helpful

At present, label_smoothing documentation is vague for both Categorical and BinaryCrossentropy losses.

The CategoricalCrossentropy class' documentation is currently confusing, as smoothing is implemented in a non-intuitive way. It also appears to be referring to only 2 classes (as if it were BinaryCrossentropy). The new documentation states its functionality more generally, with an example.
---
 tensorflow/python/keras/losses.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tensorflow/python/keras/losses.py b/tensorflow/python/keras/losses.py
index d739c16f116..24d77ffd254 100644
--- a/tensorflow/python/keras/losses.py
+++ b/tensorflow/python/keras/losses.py
@@ -640,9 +640,9 @@ class CategoricalCrossentropy(LossFunctionWrapper):
         default, we assume that `y_pred` encodes a probability distribution.
         **Note - Using from_logits=True is more numerically stable.**
       label_smoothing: Float in [0, 1]. When > 0, label values are smoothed,
-        meaning the confidence on label values are relaxed. e.g.
-        `label_smoothing=0.2` means that we will use a value of `0.1` for label
-        `0` and `0.9` for label `1`"
+        meaning the confidence on label values are relaxed. For example, if
+        `0.1`, use `0.1 / num_classes` for non-target labels and 
+        `0.9 + 0.1 / num_classes` for target labels.
       reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
         loss. Default value is `AUTO`. `AUTO` indicates that the reduction
         option will be determined by the usage context. For almost all cases
@@ -1518,7 +1518,9 @@ def categorical_crossentropy(y_true,
     y_pred: Tensor of predicted targets.
     from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
       we assume that `y_pred` encodes a probability distribution.
-    label_smoothing: Float in [0, 1]. If > `0` then smooth the labels.
+    label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
+      example, if `0.1`, use `0.1 / num_classes` for non-target labels
+      and `0.9 + 0.1 / num_classes` for target labels.
 
   Returns:
     Categorical crossentropy loss value.
@@ -1589,7 +1591,9 @@ def binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0):
     y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
     from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
       we assume that `y_pred` encodes a probability distribution.
-    label_smoothing: Float in [0, 1]. If > `0` then smooth the labels.
+    label_smoothing: Float in [0, 1]. If > `0` then smooth the labels by 
+      squeezing them towards 0.5 That is, using `1. - 0.5 * label_smoothing`
+      for the target class and `0.5 * label_smoothing` for the non-target class.
 
   Returns:
     Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`.

From 458846324413ee5e351d11812fb5ba303b4d5c2b Mon Sep 17 00:00:00 2001
From: "Wang, Yanzhang" <yanzhang.wang@intel.com>
Date: Wed, 2 Dec 2020 14:53:27 +0800
Subject: [PATCH 02/60] feat: add MatMul+BiasAdd+Add fusion

---
 .../grappler/optimizers/mkl_remapper_test.cc  |  78 ++++++++++
 .../core/grappler/optimizers/remapper.cc      |  52 ++++---
 .../core/kernels/mkl/mkl_fused_ops_test.cc    | 145 +++++++++++-------
 .../core/kernels/mkl/mkl_matmul_op_fused.cc   |  82 ++++++++--
 .../core/kernels/mkl/mkl_matmul_ops_common.h  |  30 ++--
 5 files changed, 290 insertions(+), 97 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/mkl_remapper_test.cc b/tensorflow/core/grappler/optimizers/mkl_remapper_test.cc
index f8574a4e0d3..e9270ff4e54 100644
--- a/tensorflow/core/grappler/optimizers/mkl_remapper_test.cc
+++ b/tensorflow/core/grappler/optimizers/mkl_remapper_test.cc
@@ -446,6 +446,84 @@ TEST_F(MklRemapperTest, FuseBatchNormWithRelu) {
     }
   }
 }
+
+TEST_F(MklRemapperTest, FuseMatMulWithBiasAddAndAdd) {
+  using ::tensorflow::ops::Placeholder;
+
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+
+  auto input_shape = ops::Placeholder::Shape({4, 32});
+  auto input_shape_add = ops::Placeholder::Shape({4, 8});
+  auto filter_shape = ops::Placeholder::Shape({32, 8});
+  auto bias_shape = ops::Placeholder::Shape({8});
+
+  auto input = Placeholder(s.WithOpName("input"), DT_FLOAT, input_shape);
+  auto input_add =
+      Placeholder(s.WithOpName("input_add"), DT_FLOAT, input_shape_add);
+  auto filter = Placeholder(s.WithOpName("filter"), DT_FLOAT, filter_shape);
+  auto bias = Placeholder(s.WithOpName("bias"), DT_FLOAT, bias_shape);
+
+  auto matmul = ops::MatMul(s.WithOpName("matmul"), input, filter);
+  auto bias_add = ops::BiasAdd(s.WithOpName("bias_add"), matmul, bias);
+
+  auto fetch = s.WithOpName("fetch");
+  auto add = ops::Add(s.WithOpName("add"), bias_add, input_add);
+
+  ops::Identity(fetch, add);
+
+  auto input_tensor = GenerateRandomTensor<DT_FLOAT>(
+      TensorShape(input_shape.shape_.dim_sizes()));
+  auto input_add_tensor = GenerateRandomTensor<DT_FLOAT>(
+      TensorShape(input_shape_add.shape_.dim_sizes()));
+  auto filter_tensor = GenerateRandomTensor<DT_FLOAT>(
+      TensorShape(filter_shape.shape_.dim_sizes()));
+  auto bias_tensor = GenerateRandomTensor<DT_FLOAT>(
+      TensorShape(bias_shape.shape_.dim_sizes()));
+
+  GrapplerItem item;
+  item.fetch = {"fetch"};
+  item.feed = {{"input", input_tensor},
+               {"filter", filter_tensor},
+               {"bias", bias_tensor},
+               {"input_add", input_add_tensor}};
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  // Place all nodes on CPU.
+  for (int i = 0; i < item.graph.node_size(); ++i) {
+    item.graph.mutable_node(i)->set_device("/device:CPU:0");
+  }
+
+  Remapper optimizer(RewriterConfig::AGGRESSIVE);
+  GraphDef output;
+  TF_CHECK_OK(optimizer.Optimize(nullptr, item, &output));
+
+  int found = 0;
+  for (const NodeDef& node : output.node()) {
+    auto fetch_node_name = "add";
+    if (node.name() == fetch_node_name) {
+      EXPECT_EQ("_FusedMatMul", node.op());
+      EXPECT_EQ("input", node.input(0));
+      EXPECT_EQ("filter", node.input(1));
+
+      EXPECT_EQ(2, node.attr().at("num_args").i());
+      EXPECT_EQ("bias", node.input(2));
+      EXPECT_EQ("input_add", node.input(3));
+
+      const auto fused_ops = node.attr().at("fused_ops").list().s();
+      EXPECT_EQ(2, fused_ops.size());
+      EXPECT_EQ("BiasAdd", fused_ops[0]);
+      EXPECT_EQ("Add", fused_ops[1]);
+      found++;
+    }
+  }
+  EXPECT_EQ(1, found);
+
+  auto tensors_expected = EvaluateNodes(item.graph, item.fetch, item.feed);
+  auto tensors = EvaluateNodes(output, item.fetch, item.feed);
+  EXPECT_EQ(1, tensors_expected.size());
+  EXPECT_EQ(1, tensors.size());
+  test::ExpectClose(tensors_expected[0], tensors[0], 0, 1e-6);
+}
 #endif  // ENABLE_MKLDNN_V1
 
 }  // namespace grappler
diff --git a/tensorflow/core/grappler/optimizers/remapper.cc b/tensorflow/core/grappler/optimizers/remapper.cc
index b9bd6430991..60d51888215 100644
--- a/tensorflow/core/grappler/optimizers/remapper.cc
+++ b/tensorflow/core/grappler/optimizers/remapper.cc
@@ -1283,28 +1283,34 @@ Status AddFusedContractionNode(RemapperContext* ctx,
   const NodeDef& contraction = graph->node(matched.contraction);
   const NodeDef& bias_add = graph->node(matched.bias_add);
 
-  // MKL version only support fusion for Conv2D
-  DCHECK(IsConv2D(contraction));
+  // MKL version only support fusion for Conv2D and MatMul
+  DCHECK(IsConv2D(contraction) || IsMatMul(contraction));
 
-  NodeDef fused_conv2d;
+  NodeDef contraction_node;
   const NodeDef& add = graph->node(matched.add);
-  fused_conv2d.set_name(add.name());
-  fused_conv2d.set_op(kFusedConv2D);
-  fused_conv2d.set_device(contraction.device());
-  fused_conv2d.add_input(contraction.input(0));  // 0: input
-  fused_conv2d.add_input(contraction.input(1));  // 1: filter
-  fused_conv2d.add_input(bias_add.input(1));     // 2: bias
+  contraction_node.set_name(add.name());
+  contraction_node.set_device(contraction.device());
+  contraction_node.add_input(contraction.input(0));  // 0: input
+  contraction_node.add_input(contraction.input(1));  // 1: filter
+  contraction_node.add_input(bias_add.input(1));     // 2: bias
 
-  // Add OP has two inputs, one is conv+bias pattern matched previously,
-  // the other input to add is fused here.
-  fused_conv2d.add_input(add.input(1 - matched.port_id));
+  // Add OP has two inputs, one is conv+bias/matmul+bias pattern matched
+  // previously, the other input to add is fused here.
+  contraction_node.add_input(add.input(1 - matched.port_id));
 
-  CopyConv2DAttributes(contraction, &fused_conv2d);
-  SetFusedOpAttributes(&fused_conv2d, {"BiasAdd", "Add"}, 2);
+  if (IsConv2D(contraction)) {
+    contraction_node.set_op(kFusedConv2D);
+    CopyConv2DAttributes(contraction, &contraction_node);
+  } else if (IsMatMul(contraction)) {
+    contraction_node.set_op(kFusedMatMul);
+    CopyMatMulAttributes(contraction, &contraction_node);
+  }
+
+  SetFusedOpAttributes(&contraction_node, {"BiasAdd", "Add"}, 2);
 
   utils::Mutation* mutation = ctx->graph_view.GetMutationBuilder();
   Status status;
-  mutation->AddNode(std::move(fused_conv2d), &status);
+  mutation->AddNode(std::move(contraction_node), &status);
   TF_RETURN_IF_ERROR(status);
   TF_RETURN_IF_ERROR(mutation->Apply());
 
@@ -1621,19 +1627,25 @@ Status AddBatchNormNodes(RemapperContext* ctx, const FusedBatchNorm& matched) {
 }
 
 #ifdef INTEL_MKL
-bool IsConv2DWithAdd(const RemapperContext& ctx, int node_index) {
+bool IsConv2DOrMatMul(const NodeDef& node) {
+  return IsConv2D(node) || IsMatMul(node);
+}
+
+bool IsContractionWithAdd(const RemapperContext& ctx, int node_index) {
   const auto* node_view = ctx.graph_view.GetNode(node_index);
   const auto* node_def = node_view->node();
 
   // Candidate for Conv2D + Add or Conv2D + BiasAdd + Add fusion.
+  //               MatMul + Add or MatMul + BiasAdd + Add fusion.
   auto is_supported_add_input = [](const auto* node_view) -> bool {
-    if (IsConv2D(*node_view->node())) return true;
+    // Currently only support Conv2D and MatMul
+    if (IsConv2DOrMatMul(*node_view->node())) return true;
     if (IsBiasAdd(*node_view->node())) {
       if (node_view->NumRegularFanins() < 2) return false;
       const auto& bias_add_fanin_0 = node_view->GetRegularFanin(0);
       const auto& bias_add_fanin_1 = node_view->GetRegularFanin(1);
-      return IsConv2D(*bias_add_fanin_0.node_view()->node()) ||
-             IsConv2D(*bias_add_fanin_1.node_view()->node());
+      return IsConv2DOrMatMul(*bias_add_fanin_0.node_view()->node()) ||
+             IsConv2DOrMatMul(*bias_add_fanin_1.node_view()->node());
     }
     return false;
   };
@@ -1739,7 +1751,7 @@ bool RequiresInferredShapes(const RemapperContext& ctx, int node_index) {
 
 #ifdef INTEL_MKL
   return is_batch_norm_candidate() || is_batch_norm_fusion_candidate() ||
-         IsConv2DWithAdd(ctx, node_index);
+         IsContractionWithAdd(ctx, node_index);
 #else
   return is_relu_biasadd_conv2d_candidate() || is_batch_norm_candidate() ||
          is_batch_norm_fusion_candidate();
diff --git a/tensorflow/core/kernels/mkl/mkl_fused_ops_test.cc b/tensorflow/core/kernels/mkl/mkl_fused_ops_test.cc
index 7bd47e9d014..9bb26535cbf 100644
--- a/tensorflow/core/kernels/mkl/mkl_fused_ops_test.cc
+++ b/tensorflow/core/kernels/mkl/mkl_fused_ops_test.cc
@@ -955,6 +955,71 @@ TEST_F(FilterCacheTest, Conv2DFilterCacheTest) {
 // Testing fusion of MatMul and BiasAdd
 template <typename T>
 class MklFusedMatMulOpTest : public OpsTestBase {
+ private:
+  void RunMklFusedMatMulOp(const Tensor& input, const Tensor& weight,
+                           const std::vector<Tensor>& args,
+                           const std::vector<string>& fused_ops,
+                           Tensor* output) {
+    DataType dtype = DataTypeToEnum<T>::v();
+    const int num_args = args.size();
+    if (!NativeFormatEnabled()) {
+      TF_EXPECT_OK(NodeDefBuilder("MklFusedMatMul", "_MklFusedMatMul")
+                       .Input(FakeInput(dtype))
+                       .Input(FakeInput(dtype))
+                       .Input(FakeInput(num_args, dtype))
+                       .Input(FakeInput(DT_UINT8))
+                       .Input(FakeInput(DT_UINT8))
+                       .Input(FakeInput(num_args, DT_UINT8))
+                       .Attr("T", dtype)
+                       .Attr("transpose_a", false)
+                       .Attr("transpose_b", false)
+                       .Attr("num_args", num_args)
+                       .Attr("fused_ops", fused_ops)
+                       .Attr("epsilon", 0.0001)
+                       .Attr("_kernel", "MklLayoutDependentOp")
+                       .Finalize(node_def()));
+    } else {
+      TF_EXPECT_OK(NodeDefBuilder("MklFusedMatMul", "_MklNativeFusedMatMul")
+                       .Input(FakeInput(dtype))
+                       .Input(FakeInput(dtype))
+                       .Input(FakeInput(num_args, dtype))
+                       .Attr("T", dtype)
+                       .Attr("transpose_a", false)
+                       .Attr("transpose_b", false)
+                       .Attr("num_args", num_args)
+                       .Attr("fused_ops", fused_ops)
+                       .Attr("epsilon", 0.0001)
+                       .Attr("_kernel", "MklNameChangeOp")
+                       .Finalize(node_def()));
+    }
+
+    TF_EXPECT_OK(InitOp());
+
+    AddInputFromArray<T>(input.shape(), input.flat<T>());
+    AddInputFromArray<T>(weight.shape(), weight.flat<T>());
+    for (const Tensor& arg : args)
+      AddInputFromArray<T>(arg.shape(), arg.flat<T>());
+    if (!NativeFormatEnabled()) {
+      // Add MKL meta input for input, filter and bias.
+      AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
+      AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
+      for (const Tensor& arg : args)
+        AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
+    }
+
+    TF_ASSERT_OK(RunOpKernel());
+
+    const Tensor& output_tensor = *GetOutput(0);
+    if (!NativeFormatEnabled()) {
+      const Tensor& output_meta_tensor = *GetOutput(1);
+      CommonTestUtilities<T> test_util;
+      test_util.PerformConversion(dtype, output_tensor, output_meta_tensor,
+                                  output);
+    } else {
+      *output = output_tensor;
+    }
+  }
+
  protected:
   void VerifyFusedMatMul(const int kBatch, const int kInputChannel,
                          const int kOutputChannel,
@@ -1002,70 +1067,24 @@ class MklFusedMatMulOpTest : public OpsTestBase {
             next_op = ops::Tanh(root.WithOpName(last_op), next_op);
           }
 
+          if (std::find(fused_ops.begin(), fused_ops.end(), "Add") !=
+              fused_ops.end()) {
+            last_op = "with_add";
+            next_op = ops::Add(root.WithOpName("with_add"), next_op, input_op);
+          }
+
           CommonTestUtilities<T>::RunAndFetch(root, last_op, output);
         };
 
     const FusedGraphRunner run_fused =
         [this](const Tensor& input, const Tensor& weight, const Tensor& bias,
                const std::vector<string>& fused_ops, Tensor* output) {
-          DataType dtype = DataTypeToEnum<T>::v();
-          const int num_args = 1;
-
-          if (!NativeFormatEnabled()) {
-            TF_EXPECT_OK(NodeDefBuilder("MklFusedMatMul", "_MklFusedMatMul")
-                             .Input(FakeInput(dtype))
-                             .Input(FakeInput(dtype))
-                             .Input(FakeInput(num_args, dtype))
-                             .Input(FakeInput(DT_UINT8))
-                             .Input(FakeInput(DT_UINT8))
-                             .Input(FakeInput(num_args, DT_UINT8))
-                             .Attr("T", dtype)
-                             .Attr("transpose_a", false)
-                             .Attr("transpose_b", false)
-                             .Attr("num_args", num_args)
-                             .Attr("fused_ops", fused_ops)
-                             .Attr("epsilon", 0.0001)
-                             .Attr("_kernel", "MklLayoutDependentOp")
-                             .Finalize(node_def()));
-          } else {
-            TF_EXPECT_OK(
-                NodeDefBuilder("MklFusedMatMul", "_MklNativeFusedMatMul")
-                    .Input(FakeInput(dtype))
-                    .Input(FakeInput(dtype))
-                    .Input(FakeInput(num_args, dtype))
-                    .Attr("T", dtype)
-                    .Attr("transpose_a", false)
-                    .Attr("transpose_b", false)
-                    .Attr("num_args", num_args)
-                    .Attr("fused_ops", fused_ops)
-                    .Attr("epsilon", 0.0001)
-                    .Attr("_kernel", "MklNameChangeOp")
-                    .Finalize(node_def()));
-          }
-
-          TF_EXPECT_OK(InitOp());
-
-          AddInputFromArray<T>(input.shape(), input.flat<T>());
-          AddInputFromArray<T>(weight.shape(), weight.flat<T>());
-          AddInputFromArray<T>(bias.shape(), bias.flat<T>());
-          if (!NativeFormatEnabled()) {
-            // Add MKL meta input for input, filter and bias.
-            AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
-            AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
-            AddInputFromArray<uint8>(dummy_shape, dummy_tensor);
-          }
-
-          TF_ASSERT_OK(RunOpKernel());
-
-          const Tensor& output_tensor = *GetOutput(0);
-          if (!NativeFormatEnabled()) {
-            const Tensor& output_meta_tensor = *GetOutput(1);
-            CommonTestUtilities<T> test_util;
-            test_util.PerformConversion(dtype, output_tensor,
-                                        output_meta_tensor, output);
-          } else {
-            *output = output_tensor;
+          std::vector<Tensor> fused_input = {bias};
+          if (std::find(fused_ops.begin(), fused_ops.end(), "Add") !=
+              fused_ops.end()) {
+            fused_input.push_back(input);
           }
+          RunMklFusedMatMulOp(input, weight, fused_input, fused_ops, output);
         };
 
     CommonTestUtilities<T>::VerifyFusedMatrixClose(kInputChannel, kBatch,
@@ -1120,12 +1139,22 @@ TYPED_TEST_P(MklFusedMatMulOpTest, WithBiasAndTanh) {
                           {"BiasAdd", "Tanh"});
 }
 
+TYPED_TEST_P(MklFusedMatMulOpTest, WithBiasAndAdd) {
+  const int batch = 3;
+  const int input_channel = 4;
+  const int output_channel = 4;
+
+  this->VerifyFusedMatMul(batch, input_channel, output_channel,
+                          {"BiasAdd", "Add"});
+}
+
 REGISTER_TYPED_TEST_SUITE_P(MklFusedMatMulOpTest,  //
                             WithBias,              //
                             WithBiasAndRelu,       //
                             WithBiasAndRelu6,      //
                             WithBiasAndElu,        //
-                            WithBiasAndTanh);
+                            WithBiasAndTanh,       //
+                            WithBiasAndAdd);
 
 using MklFusedMatMulDataTypes = ::testing::Types<float>;
 INSTANTIATE_TYPED_TEST_SUITE_P(Test, MklFusedMatMulOpTest,
diff --git a/tensorflow/core/kernels/mkl/mkl_matmul_op_fused.cc b/tensorflow/core/kernels/mkl/mkl_matmul_op_fused.cc
index 905abbfeef2..246efacb615 100644
--- a/tensorflow/core/kernels/mkl/mkl_matmul_op_fused.cc
+++ b/tensorflow/core/kernels/mkl/mkl_matmul_op_fused.cc
@@ -45,6 +45,7 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase<T, T> {
         ctx, fused_ops_[0] == "BiasAdd",
         errors::InvalidArgument(
             "The 1st post-argument of MklFusedMatMul must be BiasAdd."));
+    if (fused_ops_.size() > 1 && fused_ops_[1] == "Add") fuse_add_ = true;
     OP_REQUIRES(
         ctx, transpose_a_ == false,
         errors::InvalidArgument("In[0] of MklMatMul can't be transposed."));
@@ -114,7 +115,8 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase<T, T> {
     //   2. var, keep the original format to avoid reordering.
     MklDnnMatMulFwdParams matmul_params(
         src_dims, weight_dims, bias_dims, dst_dims, src_format,
-        (this->is_weight_const_) ? MEMORY_FORMAT::any : weight_format);
+        (this->is_weight_const_) ? MEMORY_FORMAT::any : weight_format,
+        MEMORY_FORMAT::nc);
 
     // Extend the basic parameters for data types and fusions.
     ExtendMklDnnMatMulFwdParams(ctx, matmul_params);
@@ -126,15 +128,70 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase<T, T> {
     std::shared_ptr<mkldnn::inner_product_forward::primitive_desc> matmul_pd =
         matmul_prim->GetPrimitiveDesc();
 
-    if (src_mkl_shape.IsMklTensor()) {
-      this->AllocateOutputTensor(ctx, *matmul_pd, dst_dims,
-                                 MKL_TENSOR_FORMAT_NC, &dst_tensor);
+    // The output shape of MatMul is same both for MKL and TF version.
+    // They are all NC format, no matter what's the format of input.
+    // And the shape of AddOp is also the same with output's shape.
+    auto dst_pd = matmul_pd->PRIMITIVE_DESC_DST;
+
+    MklDnnShape output_mkl_shape;
+    output_mkl_shape.SetMklTensor(false);
+
+    TensorShape output_tf_shape({batch, channel});
+
+    if (fuse_add_) {
+      const Tensor& add_tensor = MklGetInput(ctx, kInputIndex_Add);
+      MklDnnShape add_mkl_shape;
+      GetMklShape(ctx, kInputIndex_Add, &add_mkl_shape, native_format);
+
+      // For native format, we need not to set metadata.
+      if (native_format && ctx->forward_input_to_output_with_shape(
+                               kInputIndex_Add, kOutputIndex_Dst,
+                               output_tf_shape, &dst_tensor)) {
+        ;  // Need to do nothing for native format
+      } else if (!native_format && ForwardMklTensorInToOutWithMklShape(
+                                       ctx, kInputIndex_Add, kOutputIndex_Dst,
+                                       &dst_tensor, output_mkl_shape, false)) {
+        ;  // If it's not native format, need to forward and set meta first
+      } else {
+        // If forward is not successful, we should use reorder to copy add
+        // tensor to dst tensor
+        AllocateOutputSetMklShape(ctx, kOutputIndex_Dst, &dst_tensor,
+                                  output_tf_shape, output_mkl_shape,
+                                  native_format);
+        auto output_format_tag =
+            MklTensorFormatToMklDnnDataFormat(MKL_TENSOR_FORMAT_NC);
+        auto add_md =
+            add_mkl_shape.IsMklTensor()
+                ? add_mkl_shape.GetMklLayout()
+                : memory::desc(dst_dims, MklDnnType<T>(), output_format_tag);
+        auto dst_md =
+            memory::desc(dst_dims, MklDnnType<T>(), output_format_tag);
+
+        void* add_buf =
+            static_cast<void*>(const_cast<T*>(add_tensor.flat<T>().data()));
+        void* dst_buf = static_cast<void*>((dst_tensor)->flat<T>().data());
+
+        if (native_format) {
+          // We are simply deep copying the add_tensor to dst_tensor without
+          // changing memory layout, hence using same memory descriptor.
+          add_md = dst_md =
+              memory::desc({add_tensor.NumElements()}, MklDnnType<T>(),
+                           mkldnn::memory::format_tag::x);
+        }
+
+        auto fuse_add_src_ =
+            MEMORY_CONSTRUCTOR(ADD_MD, this->cpu_engine_, add_buf);
+        auto fuse_add_dst_ =
+            MEMORY_CONSTRUCTOR(DST_MD, this->cpu_engine_, dst_buf);
+        auto reorder_desc =
+            REORDER_PD_CONSTRUCTOR(ADD_MD, DST_MD, this->cpu_engine_);
+
+        CreateAndExecuteReorder(reorder_desc, fuse_add_src_, fuse_add_dst_,
+                                this->cpu_engine_, ctx);
+      }
     } else {
-      TensorShape dst_tensor_shape({batch, channel});
-      MklDnnShape dst_mkl_shape;
-      dst_mkl_shape.SetMklTensor(false);
-      AllocateOutputSetMklShape(ctx, 0, &dst_tensor, dst_tensor_shape,
-                                dst_mkl_shape, native_format);
+      AllocateOutputSetMklShape(ctx, 0, &dst_tensor, output_tf_shape,
+                                output_mkl_shape, native_format);
     }
 
     // if there's nothing to compute, just return.
@@ -228,6 +285,8 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase<T, T> {
         params.post_op_params.push_back({"elu", {1.0, 1.0, 0.0}});
       } else if (post_op == "Tanh") {
         params.post_op_params.push_back({"tanh", {1.0, 0.0, 0.0}});
+      } else if (post_op == "Add") {
+        params.post_op_params.push_back({"sum", {1.0}});
       } else {
         OP_REQUIRES_OK(
             ctx, errors::InvalidArgument(
@@ -237,10 +296,13 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase<T, T> {
   }
 
  private:
+  bool fuse_add_ = false;
   bool transpose_a_;
   bool transpose_b_;
   std::vector<string> fused_ops_;
-};
+  const int kInputIndex_Add = 3;
+  const int kOutputIndex_Dst = 0;
+};  // namespace tensorflow
 
 // Register mkl kernels for supported operations and types.
 #define REGISTER_FUSEDMATMUL_MKL_SUPPORTED_KERNELS_TYPES(type)                \
diff --git a/tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h b/tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h
index d1e82bf6f02..375047d290f 100644
--- a/tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h
+++ b/tensorflow/core/kernels/mkl/mkl_matmul_ops_common.h
@@ -48,8 +48,9 @@ struct MklDnnMatMulFwdParams {
   memory::dims weight_dims;
   memory::dims bias_dims;
   memory::dims dst_dims;
-  memory::format_tag src_format;
-  memory::format_tag weight_format;
+  MEMORY_FORMAT src_format;
+  MEMORY_FORMAT weight_format;
+  MEMORY_FORMAT dst_format;
   string dtypes = string("");
   struct PostOpParam {
     string name;
@@ -57,17 +58,18 @@ struct MklDnnMatMulFwdParams {
   };
   std::vector<PostOpParam> post_op_params;
 
-  MklDnnMatMulFwdParams(
-      memory::dims src_dims, memory::dims weight_dims, memory::dims bias_dims,
-      memory::dims dst_dims,
-      memory::format_tag src_format = memory::format_tag::any,
-      memory::format_tag weight_format = memory::format_tag::any)
+  MklDnnMatMulFwdParams(memory::dims src_dims, memory::dims weight_dims,
+                        memory::dims bias_dims, memory::dims dst_dims,
+                        MEMORY_FORMAT src_format = MEMORY_FORMAT::any,
+                        MEMORY_FORMAT weight_format = MEMORY_FORMAT::any,
+                        MEMORY_FORMAT dst_format = MEMORY_FORMAT::any)
       : src_dims(src_dims),
         weight_dims(weight_dims),
         bias_dims(bias_dims),
         dst_dims(dst_dims),
         src_format(src_format),
-        weight_format(weight_format) {}
+        weight_format(weight_format),
+        dst_format(dst_format) {}
 };
 
 // With quantization, input, weight, bias, and output can have different types.
@@ -184,7 +186,7 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive {
 
     context_.dst_md.reset(new memory::desc({matmul_fwd_params.dst_dims},
                                            MklDnnType<Toutput>(),
-                                           memory::format_tag::any));
+                                           matmul_fwd_params.dst_format));
 
     context_.bias_md.reset(new memory::desc({matmul_fwd_params.bias_dims},
                                             MklDnnType<Tbias>(),
@@ -236,11 +238,17 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive {
           std::vector<float> scales;
           scales.push_back(post_op_param.param[0]);
           post_ops_attr.set_output_scales(0, scales);
+        } else if (post_op_param.name == "sum") {
+          DCHECK_EQ(post_op_param.param.size(), 1);
+          float op_scale = post_op_param.param[0];
+          post_ops.append_sum(op_scale);
+
         } else {
           DCHECK((post_op_param.name == "relu") ||
                  (post_op_param.name == "relu6") ||
                  (post_op_param.name == "elu") ||
                  (post_op_param.name == "tanh") ||
+                 (post_op_param.name == "sum") ||
                  (post_op_param.name == "output_scale"));
         }
       }
@@ -340,6 +348,10 @@ class MklDnnMatMulFwdPrimitiveFactory : public MklPrimitiveFactory<T> {
         key_creator.AddAsKey(post_op_param.param[0]);
         key_creator.AddAsKey(post_op_param.param[1]);
         key_creator.AddAsKey(post_op_param.param[2]);
+      } else if (post_op_param.name == "sum") {
+        DCHECK_EQ(post_op_param.param.size(), 1);
+        key_creator.AddAsKey(post_op_param.name);
+        key_creator.AddAsKey(post_op_param.param[0]);
       } else if (post_op_param.name == "output_scale") {
         DCHECK_EQ(post_op_param.param.size(), 1);
         key_creator.AddAsKey(post_op_param.name);

From 548da65909d2a04fd20e7973889749f7b47ceaf2 Mon Sep 17 00:00:00 2001
From: ravikyram <51902062+ravikyram@users.noreply.github.com>
Date: Fri, 4 Dec 2020 16:39:42 +0530
Subject: [PATCH 03/60] Update local.py

Removing redundant line in the documentation for strides argument .
---
 tensorflow/python/keras/layers/local.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tensorflow/python/keras/layers/local.py b/tensorflow/python/keras/layers/local.py
index b0d287e302e..84fcac3b364 100644
--- a/tensorflow/python/keras/layers/local.py
+++ b/tensorflow/python/keras/layers/local.py
@@ -66,8 +66,6 @@ class LocallyConnected1D(Layer):
           specifying the length of the 1D convolution window.
       strides: An integer or tuple/list of a single integer,
           specifying the stride length of the convolution.
-          Specifying any stride value != 1 is incompatible with specifying
-          any `dilation_rate` value != 1.
       padding: Currently only supports `"valid"` (case-insensitive).
           `"same"` may be supported in the future.
           `"valid"` means no padding.

From d4ddd883957f003ac8bd402c5647ac41c4b0a89c Mon Sep 17 00:00:00 2001
From: "Wang, Yanzhang" <yanzhang.wang@intel.com>
Date: Fri, 11 Dec 2020 11:37:04 +0800
Subject: [PATCH 04/60] fix: modify comment

---
 tensorflow/core/grappler/optimizers/remapper.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/core/grappler/optimizers/remapper.cc b/tensorflow/core/grappler/optimizers/remapper.cc
index 60d51888215..3c76b2b01c2 100644
--- a/tensorflow/core/grappler/optimizers/remapper.cc
+++ b/tensorflow/core/grappler/optimizers/remapper.cc
@@ -1290,8 +1290,8 @@ Status AddFusedContractionNode(RemapperContext* ctx,
   const NodeDef& add = graph->node(matched.add);
   contraction_node.set_name(add.name());
   contraction_node.set_device(contraction.device());
-  contraction_node.add_input(contraction.input(0));  // 0: input
-  contraction_node.add_input(contraction.input(1));  // 1: filter
+  contraction_node.add_input(contraction.input(0));  // 0: input(conv) / a (matmul)
+  contraction_node.add_input(contraction.input(1));  // 1: filter(conv) / b (matmul)
   contraction_node.add_input(bias_add.input(1));     // 2: bias
 
   // Add OP has two inputs, one is conv+bias/matmul+bias pattern matched

From 3b6f823ea4d3a45d3f3d5740529d8b58b8cc434a Mon Sep 17 00:00:00 2001
From: DEKHTIARJonathan <jdekhtiar@nvidia.com>
Date: Thu, 10 Dec 2020 15:00:33 -0800
Subject: [PATCH 05/60] [TFTRT] Rename ParameterizedOpConverterTest classes
 with descriptive names

---
 .../tf2tensorrt/convert/convert_nodes_test.cc | 55 ++++++++++---------
 1 file changed, 29 insertions(+), 26 deletions(-)

diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
index 1d60ebbd047..ac779bad2e3 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
@@ -1893,27 +1893,30 @@ class ParameterizedOpConverterTestBase
 //   how TRT handles the precision inside the TRT network, but should not matter
 //   for the TF -> TRT conversion. Therefore it should be sufficient to test
 //   for FP32.
-class OpConverterTest1 : public ParameterizedOpConverterTestBase {};
+class OpConverter_FP32_Test : 
+  public ParameterizedOpConverterTestBase {};  
+// Base class for tests that need to be tested for both FP32 and FP16.
+class OpConverter_FP32_FP16_Test : 
+  public ParameterizedOpConverterTestBase {};
+// Base class for tests that need to be tested for FP32, FP16, and INT32
+class OpConverter_FP32_FP16_INT32_Test : 
+  public ParameterizedOpConverterTestBase {};
 
-// Instantiate parameter combinations to OpConverterTest1
+// Instantiate parameter combinations to OpConverter_<DT_X...>_Test
 INSTANTIATE_TEST_CASE_P(
-    OpConvTestInstantiation, OpConverterTest1,
+    OpConvTestInstantiation, OpConverter_FP32_Test,
     ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
                        ::testing::Values(DT_FLOAT),
                        ::testing::Values(TrtPrecisionMode::FP32)));
 
-// Base class for tests that need to be tested for both FP32 and FP16.
-class OpConverterTest2 : public ParameterizedOpConverterTestBase {};
 INSTANTIATE_TEST_CASE_P(
-    OpConvTestInstantiation, OpConverterTest2,
+    OpConvTestInstantiation, OpConverter_FP32_FP16_Test,
     ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
                        ::testing::Values(DT_FLOAT, DT_HALF),
                        ::testing::Values(TrtPrecisionMode::FP32)));
 
-// Base class for tests that need to be tested for FP32, FP16, and INT32
-class OpConverterTest3 : public ParameterizedOpConverterTestBase {};
 INSTANTIATE_TEST_CASE_P(
-    OpConvTestInstantiation3, OpConverterTest3,
+    OpConvTestInstantiation, OpConverter_FP32_FP16_INT32_Test,
     ::testing::Combine(::testing::ValuesIn(ValidTrtModes),
                        ::testing::Values(DT_FLOAT, DT_HALF, DT_INT32),
                        ::testing::Values(TrtPrecisionMode::FP32)));
@@ -2078,7 +2081,7 @@ NodeDef CreateFusedBatchNormOp(DataType tf_type, std::string data_format,
       ->def();
 }
 
-TEST_P(OpConverterTest1, ConvertFusedBatchNorm) {
+TEST_P(OpConverter_FP32_Test, ConvertFusedBatchNorm) {
   using OpFunc = std::function<NodeDef(DataType, std::string, bool, float)>;
   std::vector<OpFunc> get_node_def_vec{
       CreateFusedBatchNormOp<ops::FusedBatchNorm>,
@@ -2191,7 +2194,7 @@ TEST_P(OpConverterTest1, ConvertFusedBatchNorm) {
   }
 }
 
-TEST_P(OpConverterTest1, ConvertTranspose) {
+TEST_P(OpConverter_FP32_Test, ConvertTranspose) {
   // Get the NodeDef for Transpose.
   Scope s = Scope::NewRootScope();
   auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
@@ -2349,7 +2352,7 @@ TEST_F(OpConverterTest, ConvertReshape) {
   }
 }
 
-TEST_P(OpConverterTest1, ConvertShape) {
+TEST_P(OpConverter_FP32_Test, ConvertShape) {
   // Get the NodeDef for Shape op.
   Scope s = Scope::NewRootScope();
   auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
@@ -2637,7 +2640,7 @@ TEST_F(OpConverterTest, ConvertBatchMatMul) {
   TestMatMulHelper(this, get_batch_matmul_nodedef, "BatchMatMul");
 }
 
-TEST_P(OpConverterTest2, ConvertBiasAdd) {
+TEST_P(OpConverter_FP32_FP16_Test, ConvertBiasAdd) {
   // Note that kINT32 is not supported by IScaleLayer, so we don't test
   // DT_INT32 type here. DT_FLOAT and DT_HALF are tested.
   // Get the NodeDef for BiasAdd.
@@ -2710,7 +2713,7 @@ NodeDef GetBinaryOpNodeDef(DataType dtype) {
   return op.operation.node()->def();
 }
 
-TEST_P(OpConverterTest2, ConvertBinary) {
+TEST_P(OpConverter_FP32_FP16_Test, ConvertBinary) {
   {
     AttrValue dtype;
     dtype.set_type(tf_type_);
@@ -2974,7 +2977,7 @@ TEST_F(OpConverterTest, ConvertQuantize) {
   }
 }
 
-TEST_P(OpConverterTest2, ConvertSquare) {
+TEST_P(OpConverter_FP32_FP16_Test, ConvertSquare) {
   {
     // Input is weights, should fail.
     Reset();
@@ -3127,7 +3130,7 @@ NodeDef CreateUnaryOp<ops::internal::LeakyRelu>(DataType tf_type) {
       ->def();
 }
 
-TEST_P(OpConverterTest1, ConvertActivation) {
+TEST_P(OpConverter_FP32_Test, ConvertActivation) {
   {
     // Input is weights, should fail.
     Reset();
@@ -3213,7 +3216,7 @@ TEST_P(OpConverterTest1, ConvertActivation) {
   }
 }
 
-TEST_P(OpConverterTest1, ConvertExpandDims) {
+TEST_P(OpConverter_FP32_Test, ConvertExpandDims) {
   // Get the NodeDef for ExpandDims.
   Scope s = Scope::NewRootScope();
   auto input = ops::Placeholder(s.WithOpName("input"), tf_type_);
@@ -3290,7 +3293,7 @@ TEST_P(OpConverterTest1, ConvertExpandDims) {
   }
 }
 
-TEST_P(OpConverterTest1, ConvertSqueeze) {
+TEST_P(OpConverter_FP32_Test, ConvertSqueeze) {
   const bool use_implicit_batch = (trt_mode_ == TrtTestMode::kImplicitBatch);
   // Get the NodeDef for Squeeze.
   auto get_squeeze_nodedef = [](std::vector<int> axes,
@@ -4141,7 +4144,7 @@ TEST_F(OpConverterTest, ConvertSlice) {
   }
 }
 
-TEST_P(OpConverterTest1, ConvertConv2D) {
+TEST_P(OpConverter_FP32_Test, ConvertConv2D) {
   // Get nodedef for Conv2D layer.
   DataType tf_type = tf_type_;
   auto get_conv2d_nodedef =
@@ -4835,7 +4838,7 @@ NodeDef CreatePoolOp(DataType tf_type, std::vector<int> ksize,
       .operation.node()
       ->def();
 }
-TEST_P(OpConverterTest1, ConvertPool) {
+TEST_P(OpConverter_FP32_Test, ConvertPool) {
   // Get nodedef for MaxPool and AvgPool layers (2D or 3D).
   auto get_pool_nodedef =
       [](DataType tf_type, int nDim, std::vector<int> ksize = {},
@@ -5049,7 +5052,7 @@ TEST_F(OpConverterTest, ConvertTopK) {
   }
 }
 
-TEST_P(OpConverterTest3, ConvertGather) {
+TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertGather) {
   // Get the NodeDef for GatherV2.
   Scope s = Scope::NewRootScope();
   auto params = ops::Placeholder(s.WithOpName("params"), tf_type_);
@@ -5302,7 +5305,7 @@ std::vector<float> CalcReduce(string op_name, std::vector<float> input, int m,
   }
   return output;
 }
-TEST_P(OpConverterTest1, ConvertReduce) {
+TEST_P(OpConverter_FP32_Test, ConvertReduce) {
   {
     // Input is weights, should fail.
     Reset();
@@ -5428,7 +5431,7 @@ NodeDef CreateCastOp(DataType tf_type) {
       ->def();
 }
 
-TEST_P(OpConverterTest1, ConvertUnary) {
+TEST_P(OpConverter_FP32_Test, ConvertUnary) {
   {
     // Input is weights, should fail.
     Reset();
@@ -6041,9 +6044,9 @@ NodeDef GetPackNodeDef(DataType dtype, int num_inputs, int axis) {
 }
 
 #if IS_TRT_VERSION_GE(6, 0, 0, 0)
-TEST_P(OpConverterTest3, ConvertPack) {
+TEST_P(OpConverter_FP32_FP16_INT32_Test, ConvertPack) {
 #else
-TEST_P(OpConverterTest2, ConvertPack) {
+TEST_P(OpConverter_FP32_FP16_Test, ConvertPack) {
 #endif
   struct TestParams {
     std::vector<std::vector<int>> input_shapes;
@@ -6725,7 +6728,7 @@ NodeDef GetSquaredDifferenceNodeDef(DataType dtype) {
   return squared_diff.operation.node()->def();
 }
 
-TEST_P(OpConverterTest2, ConvertSquaredDifference) {
+TEST_P(OpConverter_FP32_FP16_Test, ConvertSquaredDifference) {
   {
     // Input is a weight, should fail.
     Reset();

From 5a51f9bbedd3b45a7e02873138a9dcea03912a07 Mon Sep 17 00:00:00 2001
From: Saurabh Saxena <srbs@google.com>
Date: Fri, 11 Dec 2020 15:30:07 -0800
Subject: [PATCH 06/60] Enable gradient_checker_test on TAP.

PiperOrigin-RevId: 347087545
Change-Id: I643fe65ff0eeb59c193e5f1d8faa337ce4f26c9a
---
 tensorflow/c/eager/BUILD | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD
index 0572a2ccc4e..09d5e654107 100644
--- a/tensorflow/c/eager/BUILD
+++ b/tensorflow/c/eager/BUILD
@@ -400,8 +400,9 @@ cc_library(
         "//tensorflow:internal",
     ],
     deps = [
+        ":abstract_tensor_handle",
         ":unified_api_testutil",
-        "//tensorflow/c/eager:abstract_tensor_handle",
+        "//tensorflow/c:tf_tensor_internal",
         "//tensorflow/c/experimental/ops:math_ops",
         "@com_google_absl//absl/types:span",
     ],
@@ -417,14 +418,15 @@ tf_cuda_cc_test(
     linkstatic = tf_kernel_tests_linkstatic(),
     tags = tf_cuda_tests_tags() + [
         "no_cuda_asan",  # b/175330074
-        "notap",  # b/175330074
     ],
     deps = [
+        ":abstract_tensor_handle",
+        ":c_api_experimental",
         ":gradient_checker",
         ":unified_api_testutil",
         "//tensorflow/c:tf_status_helper",
-        "//tensorflow/c/eager:abstract_tensor_handle",
-        "//tensorflow/c/eager:c_api_experimental",
+        "//tensorflow/c:tf_tensor_internal",
+        "//tensorflow/c/experimental/ops",
         "//tensorflow/core:test",
         "//tensorflow/core:test_main",
         "@com_google_absl//absl/types:span",

From 28c7e4d9f28f71abdc113d93f6dc270c1a61ea52 Mon Sep 17 00:00:00 2001
From: Meghna Natraj <mnatraj@google.com>
Date: Fri, 11 Dec 2020 15:56:24 -0800
Subject: [PATCH 07/60] Update usage of `tf.keras.losses.BinaryCrossEntropy`

PiperOrigin-RevId: 347092623
Change-Id: I956364fdda51f099f950faf411612b8604d7d194
---
 tensorflow/python/keras/losses.py | 209 +++++++++++++++---------------
 1 file changed, 107 insertions(+), 102 deletions(-)

diff --git a/tensorflow/python/keras/losses.py b/tensorflow/python/keras/losses.py
index d739c16f116..6c6dc44b3d3 100644
--- a/tensorflow/python/keras/losses.py
+++ b/tensorflow/python/keras/losses.py
@@ -12,8 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Built-in loss functions.
-"""
+"""Built-in loss functions."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -92,8 +91,8 @@ class Loss(object):
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial](
-          https://www.tensorflow.org/tutorials/distribute/custom_training)
-        for more details.
+          https://www.tensorflow.org/tutorials/distribute/custom_training) for
+            more details.
       name: Optional name for the op.
     """
     losses_utils.ReductionV2.validate(reduction)
@@ -122,15 +121,15 @@ class Loss(object):
         sparse loss functions such as sparse categorical crossentropy where
         shape = `[batch_size, d0, .. dN-1]`
       y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`
-      sample_weight: Optional `sample_weight` acts as a
-        coefficient for the loss. If a scalar is provided, then the loss is
-        simply scaled by the given value. If `sample_weight` is a tensor of size
-        `[batch_size]`, then the total loss for each sample of the batch is
-        rescaled by the corresponding element in the `sample_weight` vector. If
-        the shape of `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be
-        broadcasted to this shape), then each loss element of `y_pred` is scaled
+      sample_weight: Optional `sample_weight` acts as a coefficient for the
+        loss. If a scalar is provided, then the loss is simply scaled by the
+        given value. If `sample_weight` is a tensor of size `[batch_size]`, then
+        the total loss for each sample of the batch is rescaled by the
+        corresponding element in the `sample_weight` vector. If the shape of
+        `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be broadcasted to
+        this shape), then each loss element of `y_pred` is scaled
         by the corresponding value of `sample_weight`. (Note on`dN-1`: all loss
-        functions reduce by 1 dimension, usually axis=-1.)
+          functions reduce by 1 dimension, usually axis=-1.)
 
     Returns:
       Weighted loss float `Tensor`. If `reduction` is `NONE`, this has
@@ -230,8 +229,8 @@ class LossFunctionWrapper(Loss):
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial](
-          https://www.tensorflow.org/tutorials/distribute/custom_training)
-        for more details.
+          https://www.tensorflow.org/tutorials/distribute/custom_training) for
+            more details.
       name: (Optional) name for the loss.
       **kwargs: The keyword arguments that are passed on to `fn`.
     """
@@ -250,8 +249,7 @@ class LossFunctionWrapper(Loss):
       Loss values per sample.
     """
     if tensor_util.is_tensor(y_pred) and tensor_util.is_tensor(y_true):
-      y_pred, y_true = losses_utils.squeeze_or_expand_dimensions(
-          y_pred, y_true)
+      y_pred, y_true = losses_utils.squeeze_or_expand_dimensions(y_pred, y_true)
     ag_fn = autograph.tf_convert(self.fn, ag_ctx.control_status_ctx())
     return ag_fn(y_true, y_pred, **self._fn_kwargs)
 
@@ -314,8 +312,8 @@ class MeanSquaredError(LossFunctionWrapper):
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial](
-          https://www.tensorflow.org/tutorials/distribute/custom_training)
-        for more details.
+          https://www.tensorflow.org/tutorials/distribute/custom_training) for
+            more details.
       name: Optional name for the op. Defaults to 'mean_squared_error'.
     """
     super(MeanSquaredError, self).__init__(
@@ -373,8 +371,8 @@ class MeanAbsoluteError(LossFunctionWrapper):
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial](
-          https://www.tensorflow.org/tutorials/distribute/custom_training)
-        for more details.
+          https://www.tensorflow.org/tutorials/distribute/custom_training) for
+            more details.
       name: Optional name for the op. Defaults to 'mean_absolute_error'.
     """
     super(MeanAbsoluteError, self).__init__(
@@ -433,8 +431,8 @@ class MeanAbsolutePercentageError(LossFunctionWrapper):
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial](
-          https://www.tensorflow.org/tutorials/distribute/custom_training)
-        for more details.
+          https://www.tensorflow.org/tutorials/distribute/custom_training) for
+            more details.
       name: Optional name for the op. Defaults to
         'mean_absolute_percentage_error'.
     """
@@ -494,8 +492,8 @@ class MeanSquaredLogarithmicError(LossFunctionWrapper):
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial](
-          https://www.tensorflow.org/tutorials/distribute/custom_training)
-        for more details.
+          https://www.tensorflow.org/tutorials/distribute/custom_training) for
+            more details.
       name: Optional name for the op. Defaults to
         'mean_squared_logarithmic_error'.
     """
@@ -507,44 +505,64 @@ class MeanSquaredLogarithmicError(LossFunctionWrapper):
 class BinaryCrossentropy(LossFunctionWrapper):
   """Computes the cross-entropy loss between true labels and predicted labels.
 
-  Use this cross-entropy loss when there are only two label classes (assumed to
-  be 0 and 1). For each example, there should be a single floating-point value
-  per prediction.
+  Use this cross-entropy loss for binary (0 or 1) classification applications.
+  The loss function requires the following inputs:
 
-  In the snippet below, each of the four examples has only a single
-  floating-pointing value, and both `y_pred` and `y_true` have the shape
-  `[batch_size]`.
+  - `y_true` (true label): This is either 0 or 1.
+  - `y_pred` (predicted value): This is the model's prediction, i.e, a single
+    floating-point value which either represents a
+    [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf]
+    when `from_logits=True`) or a probability (i.e, value in [0., 1.] when
+    `from_logits=False`).
 
-  Standalone usage:
+  **Recommended Usage:** (set `from_logits=True`)
 
-  >>> y_true = [[0., 1.], [0., 0.]]
-  >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
-  >>> # Using 'auto'/'sum_over_batch_size' reduction type.
-  >>> bce = tf.keras.losses.BinaryCrossentropy()
-  >>> bce(y_true, y_pred).numpy()
-  0.815
-
-  >>> # Calling with 'sample_weight'.
-  >>> bce(y_true, y_pred, sample_weight=[1, 0]).numpy()
-  0.458
-
-   >>> # Using 'sum' reduction type.
-  >>> bce = tf.keras.losses.BinaryCrossentropy(
-  ...     reduction=tf.keras.losses.Reduction.SUM)
-  >>> bce(y_true, y_pred).numpy()
-  1.630
-
-  >>> # Using 'none' reduction type.
-  >>> bce = tf.keras.losses.BinaryCrossentropy(
-  ...     reduction=tf.keras.losses.Reduction.NONE)
-  >>> bce(y_true, y_pred).numpy()
-  array([0.916 , 0.714], dtype=float32)
-
-  Usage with the `tf.keras` API:
+  With `tf.keras` API:
 
   ```python
-  model.compile(optimizer='sgd', loss=tf.keras.losses.BinaryCrossentropy())
+  model.compile(
+    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
+    ....
+  )
   ```
+
+  As a standalone function:
+
+  >>> # Example 1: (batch_size = 1, number of samples = 4)
+  >>> y_true = [0, 1, 0, 0]
+  >>> y_pred = [-18.6, 0.51, 2.94, -12.8]
+  >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
+  >>> bce(y_true, y_pred).numpy()
+  0.865
+
+  >>> # Example 2: (batch_size = 2, number of samples = 4)
+  >>> y_true = [[0, 1], [0, 0]]
+  >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]]
+  >>> # Using default 'auto'/'sum_over_batch_size' reduction type.
+  >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
+  >>> bce(y_true, y_pred).numpy()
+  0.865
+  >>> # Using 'sample_weight' attribute
+  >>> bce(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
+  0.243
+  >>> # Using 'sum' reduction` type.
+  >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,
+  ...     reduction=tf.keras.losses.Reduction.SUM)
+  >>> bce(y_true, y_pred).numpy()
+  1.730
+  >>> # Using 'none' reduction type.
+  >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,
+  ...     reduction=tf.keras.losses.Reduction.NONE)
+  >>> bce(y_true, y_pred).numpy()
+  array([0.235, 1.496], dtype=float32)
+
+  **Default Usage:** (set `from_logits=False`)
+
+  >>> # Make the following updates to the above "Recommended Usage" section
+  >>> # 1. Set `from_logits=False`
+  >>> tf.keras.losses.BinaryCrossentropy() # OR ...('from_logits=False')
+  >>> # 2. Update `y_pred` to use probabilities instead of logits
+  >>> y_pred = [0.6, 0.3, 0.2, 0.8] # OR [[0.6, 0.3], [0.2, 0.8]]
   """
 
   def __init__(self,
@@ -570,8 +588,8 @@ class BinaryCrossentropy(LossFunctionWrapper):
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial](
-          https://www.tensorflow.org/tutorials/distribute/custom_training)
-        for more details.
+          https://www.tensorflow.org/tutorials/distribute/custom_training) for
+            more details.
       name: (Optional) Name for the op. Defaults to 'binary_crossentropy'.
     """
     super(BinaryCrossentropy, self).__init__(
@@ -650,8 +668,8 @@ class CategoricalCrossentropy(LossFunctionWrapper):
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial](
-          https://www.tensorflow.org/tutorials/distribute/custom_training)
-        for more details.
+          https://www.tensorflow.org/tutorials/distribute/custom_training) for
+            more details.
       name: Optional name for the op. Defaults to 'categorical_crossentropy'.
     """
     super(CategoricalCrossentropy, self).__init__(
@@ -727,8 +745,8 @@ class SparseCategoricalCrossentropy(LossFunctionWrapper):
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial](
-          https://www.tensorflow.org/tutorials/distribute/custom_training)
-        for more details.
+          https://www.tensorflow.org/tutorials/distribute/custom_training) for
+            more details.
       name: Optional name for the op. Defaults to
         'sparse_categorical_crossentropy'.
     """
@@ -791,8 +809,8 @@ class Hinge(LossFunctionWrapper):
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial](
-          https://www.tensorflow.org/tutorials/distribute/custom_training)
-        for more details.
+          https://www.tensorflow.org/tutorials/distribute/custom_training) for
+            more details.
       name: Optional name for the op. Defaults to 'hinge'.
     """
     super(Hinge, self).__init__(hinge, name=name, reduction=reduction)
@@ -852,8 +870,8 @@ class SquaredHinge(LossFunctionWrapper):
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial](
-          https://www.tensorflow.org/tutorials/distribute/custom_training)
-        for more details.
+          https://www.tensorflow.org/tutorials/distribute/custom_training) for
+            more details.
       name: Optional name for the op. Defaults to 'squared_hinge'.
     """
     super(SquaredHinge, self).__init__(
@@ -912,8 +930,8 @@ class CategoricalHinge(LossFunctionWrapper):
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial](
-          https://www.tensorflow.org/tutorials/distribute/custom_training)
-        for more details.
+          https://www.tensorflow.org/tutorials/distribute/custom_training) for
+            more details.
       name: Optional name for the op. Defaults to 'categorical_hinge'.
     """
     super(CategoricalHinge, self).__init__(
@@ -969,8 +987,8 @@ class Poisson(LossFunctionWrapper):
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial](
-          https://www.tensorflow.org/tutorials/distribute/custom_training)
-        for more details.
+          https://www.tensorflow.org/tutorials/distribute/custom_training) for
+            more details.
       name: Optional name for the op. Defaults to 'poisson'.
     """
     super(Poisson, self).__init__(poisson, name=name, reduction=reduction)
@@ -1026,8 +1044,8 @@ class LogCosh(LossFunctionWrapper):
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial](
-          https://www.tensorflow.org/tutorials/distribute/custom_training)
-        for more details.
+          https://www.tensorflow.org/tutorials/distribute/custom_training) for
+            more details.
       name: Optional name for the op. Defaults to 'log_cosh'.
     """
     super(LogCosh, self).__init__(log_cosh, name=name, reduction=reduction)
@@ -1086,8 +1104,8 @@ class KLDivergence(LossFunctionWrapper):
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial](
-          https://www.tensorflow.org/tutorials/distribute/custom_training)
-        for more details.
+          https://www.tensorflow.org/tutorials/distribute/custom_training) for
+            more details.
       name: Optional name for the op. Defaults to 'kl_divergence'.
     """
     super(KLDivergence, self).__init__(
@@ -1154,20 +1172,17 @@ class Huber(LossFunctionWrapper):
         `tf.distribute.Strategy`, outside of built-in training loops such as
         `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
         will raise an error. Please see this custom training [tutorial](
-          https://www.tensorflow.org/tutorials/distribute/custom_training)
-        for more details.
+          https://www.tensorflow.org/tutorials/distribute/custom_training) for
+            more details.
       name: Optional name for the op. Defaults to 'huber_loss'.
     """
     super(Huber, self).__init__(
         huber, name=name, reduction=reduction, delta=delta)
 
 
-@keras_export('keras.metrics.mean_squared_error',
-              'keras.metrics.mse',
-              'keras.metrics.MSE',
-              'keras.losses.mean_squared_error',
-              'keras.losses.mse',
-              'keras.losses.MSE')
+@keras_export('keras.metrics.mean_squared_error', 'keras.metrics.mse',
+              'keras.metrics.MSE', 'keras.losses.mean_squared_error',
+              'keras.losses.mse', 'keras.losses.MSE')
 @dispatch.add_dispatch_support
 def mean_squared_error(y_true, y_pred):
   """Computes the mean squared error between labels and predictions.
@@ -1198,12 +1213,9 @@ def mean_squared_error(y_true, y_pred):
   return K.mean(math_ops.squared_difference(y_pred, y_true), axis=-1)
 
 
-@keras_export('keras.metrics.mean_absolute_error',
-              'keras.metrics.mae',
-              'keras.metrics.MAE',
-              'keras.losses.mean_absolute_error',
-              'keras.losses.mae',
-              'keras.losses.MAE')
+@keras_export('keras.metrics.mean_absolute_error', 'keras.metrics.mae',
+              'keras.metrics.MAE', 'keras.losses.mean_absolute_error',
+              'keras.losses.mae', 'keras.losses.MAE')
 @dispatch.add_dispatch_support
 def mean_absolute_error(y_true, y_pred):
   """Computes the mean absolute error between labels and predictions.
@@ -1232,11 +1244,9 @@ def mean_absolute_error(y_true, y_pred):
 
 
 @keras_export('keras.metrics.mean_absolute_percentage_error',
-              'keras.metrics.mape',
-              'keras.metrics.MAPE',
+              'keras.metrics.mape', 'keras.metrics.MAPE',
               'keras.losses.mean_absolute_percentage_error',
-              'keras.losses.mape',
-              'keras.losses.MAPE')
+              'keras.losses.mape', 'keras.losses.MAPE')
 @dispatch.add_dispatch_support
 def mean_absolute_percentage_error(y_true, y_pred):
   """Computes the mean absolute percentage error between `y_true` and `y_pred`.
@@ -1269,11 +1279,9 @@ def mean_absolute_percentage_error(y_true, y_pred):
 
 
 @keras_export('keras.metrics.mean_squared_logarithmic_error',
-              'keras.metrics.msle',
-              'keras.metrics.MSLE',
+              'keras.metrics.msle', 'keras.metrics.MSLE',
               'keras.losses.mean_squared_logarithmic_error',
-              'keras.losses.msle',
-              'keras.losses.MSLE')
+              'keras.losses.msle', 'keras.losses.MSLE')
 @dispatch.add_dispatch_support
 def mean_squared_logarithmic_error(y_true, y_pred):
   """Computes the mean squared logarithmic error between `y_true` and `y_pred`.
@@ -1609,12 +1617,9 @@ def binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0):
 
 
 @keras_export('keras.metrics.kl_divergence',
-              'keras.metrics.kullback_leibler_divergence',
-              'keras.metrics.kld',
-              'keras.metrics.KLD',
-              'keras.losses.kl_divergence',
-              'keras.losses.kullback_leibler_divergence',
-              'keras.losses.kld',
+              'keras.metrics.kullback_leibler_divergence', 'keras.metrics.kld',
+              'keras.metrics.KLD', 'keras.losses.kl_divergence',
+              'keras.losses.kullback_leibler_divergence', 'keras.losses.kld',
               'keras.losses.KLD')
 @dispatch.add_dispatch_support
 def kl_divergence(y_true, y_pred):

From 80e40ee5cbe380fd796297f1e9d538a4596c069a Mon Sep 17 00:00:00 2001
From: Rahul Joshi <jurahul@google.com>
Date: Fri, 11 Dec 2020 16:33:14 -0800
Subject: [PATCH 08/60] [XLA:GPU] Support window_reveral in XLA HLO -> LHLO GPU
 -> ConvolutionThunk path. - Change XLA HLO -> LHLO GPU conversion to
 propagate this attribute to LHLO GPU   convolution operations. - Change LHLO
 GPU Convolution -> Convolution thunk to handle window reversal.

PiperOrigin-RevId: 347098895
Change-Id: I0654cb71d2c55cafa93c768cac3e57cc5e72656f
---
 .../hlo_to_lhlo_with_xla/hlo_text_to_lhlo_no_opt.hlotxt  | 4 +++-
 .../mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc         | 9 +++++++++
 .../compiler/xla/service/gpu/ir_emitter_unnested.cc      | 9 ++-------
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/hlo_text_to_lhlo_no_opt.hlotxt b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/hlo_text_to_lhlo_no_opt.hlotxt
index e4d6ad61b9b..a212a7c7534 100644
--- a/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/hlo_text_to_lhlo_no_opt.hlotxt
+++ b/tensorflow/compiler/mlir/xla/tests/hlo_to_lhlo_with_xla/hlo_text_to_lhlo_no_opt.hlotxt
@@ -242,13 +242,14 @@ HloModule ConvForward
 // CHECK-SAME: padding = dense<0> : tensor<2xi64>
 // CHECK_SAME: result_scale = 1.000000e+00 : f64
 // CHECK_SAME: rhs_dilation = dense<1> : tensor<2xi64>
+// CHECK-SAME: window_reversal = dense<true> : tensor<2xi1>
 // CHECK-SAME: window_strides = dense<1> : tensor<2xi64>
 // CHECK: (memref<4x256x3x3xf32>, memref<256x256x2x2xf32>, memref<4x256x2x2xf32>, memref<65536xui8>)
 ENTRY main {
   %input = f32[4,256,3,3]{3,2,1,0} parameter(0)
   %filter = f32[256,256,2,2]{3,2,1,0} parameter(1)
   ROOT %custom-call.1 = (f32[4,256,2,2]{3,2,1,0}, u8[65536]{0}) custom-call(f32[4,256,3,3]{3,2,1,0} %input, f32[256,256,2,2]{3,2,1,0} %filter),
-                        window={size=2x2}, dim_labels=bf01_oi01->bf01,
+                        window={size=2x2 rhs_reversal=1x1}, dim_labels=bf01_oi01->bf01,
                         custom_call_target="__cudnn$convForward",
                         backend_config="{\"algorithm\":\"2\",\"tensor_ops_enabled\":false,\"conv_result_scale\":1,\"activation_mode\":\"0\",\"side_input_scale\":0}"
 }
@@ -276,6 +277,7 @@ ENTRY main {
 // CHECK-SAME: precision_config = ["DEFAULT", "DEFAULT", "DEFAULT"]
 // CHECK-SAME: result_scale = 1.000000e+00 : f64
 // CHECK-SAME: rhs_dilation = dense<1> : tensor<2xi64>
+// CHECK-SAME: window_reversal = dense<false> : tensor<2xi1>
 // CHECK-SAME: window_strides = dense<1> : tensor<2xi64>
 // CHECK-SAME: (memref<1x17x9x9xf16, #map{{.*}}>, memref<3x3x17x32xf16, #map{{.*}}>, memref<32xf16>, memref<1x32x9x9xf16, #{{.*}}>, memref<0xui8>) -> ()
 
diff --git a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc
index baa7ab0db3c..d19a8c76aff 100644
--- a/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/mhlo_to_lhlo_with_xla.cc
@@ -681,6 +681,15 @@ StatusOr<Operation*> LhloDialectEmitter::EmitDnnConvolution(
         GetWindowElements(window, [](const ::xla::WindowDimension& dim) {
           return static_cast<int64_t>(dim.window_dilation());
         }));
+    // Setup window reversal.
+    auto window_reversal = llvm::to_vector<4>(llvm::map_range(
+        window.dimensions(), [](const ::xla::WindowDimension& dim) {
+          return dim.window_reversal();
+        }));
+    auto type = RankedTensorType::get(op.window_strides()->getType().getShape(),
+                                      builder_.getIntegerType(/*width=*/1));
+    op.window_reversalAttr(DenseElementsAttr::get(type, window_reversal));
+
     op.dimension_numbersAttr(xla::ConvertConvDimensionNumbers(
         custom_call->convolution_dimension_numbers(), &builder_));
     op.feature_group_countAttr(
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index b79100e6d57..5cba8e9e5d2 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -1052,13 +1052,6 @@ Status IrEmitterUnnested::HandleCustomCall(HloInstruction* custom_call) {
                 mlir::lmhlo_gpu::ConvForwardFusedSideInputOp,
                 mlir::lmhlo_gpu::ConvBackwardFilterOp,
                 mlir::lmhlo_gpu::ConvBackwardInputOp>(input.op)) {
-    // TODO(jurahul): Window reveral is not yet supported in HLO. Fallback to
-    // HLO based thunk for that case.
-    if (absl::c_any_of(
-            custom_call->window().dimensions(),
-            [](const WindowDimension& dim) { return dim.window_reversal(); })) {
-      return ThunkEmitter(this).HandleCustomCall(custom_call);
-    }
     return EmitConvolutionThunkFromMlir(input);
   }
 
@@ -1109,6 +1102,7 @@ Status IrEmitterUnnested::EmitConvolutionThunkFromMlir(MlirEmitterInput input) {
     mlir::DenseIntElementsAttr padding = op.padding().getValue();
     mlir::DenseIntElementsAttr lhs_dilation = op.lhs_dilation().getValue();
     mlir::DenseIntElementsAttr rhs_dilation = op.rhs_dilation().getValue();
+    mlir::DenseElementsAttr window_reversal = op.window_reversal().getValue();
     for (auto index : llvm::seq<int>(0, window_strides.getNumElements())) {
       WindowDimension* dim = descriptor.window.add_dimensions();
       // Window size for a convolution is the same as the kernel size.
@@ -1122,6 +1116,7 @@ Status IrEmitterUnnested::EmitConvolutionThunkFromMlir(MlirEmitterInput input) {
       dim->set_padding_high(padding.getValue<int64>(index));
       dim->set_base_dilation(lhs_dilation.getValue<int64>(index));
       dim->set_window_dilation(rhs_dilation.getValue<int64>(index));
+      dim->set_window_reversal(window_reversal.getValue<bool>(index));
     }
     descriptor.feature_group_count = op.feature_group_count();
     descriptor.backend_config.set_algorithm(

From 32106dbb0dacae97c6052ba684db89eb4564906b Mon Sep 17 00:00:00 2001
From: Karim Nosir <karimnosseir@google.com>
Date: Fri, 11 Dec 2020 16:55:51 -0800
Subject: [PATCH 09/60] Fix DepthToSpace unit-test on hexagon to use correct
 version condition.

PiperOrigin-RevId: 347102122
Change-Id: If04ee1a798a3d7a739f7425f48654792679ff7ff
---
 tensorflow/lite/delegates/hexagon/utils.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/lite/delegates/hexagon/utils.cc b/tensorflow/lite/delegates/hexagon/utils.cc
index 397400c81f0..1a179a3462e 100644
--- a/tensorflow/lite/delegates/hexagon/utils.cc
+++ b/tensorflow/lite/delegates/hexagon/utils.cc
@@ -94,6 +94,7 @@ bool CheckOpVersion(const TfLiteRegistration* registration) {
     case kTfLiteBuiltinSlice:
     case kTfLiteBuiltinSoftmax:
     case kTfLiteBuiltinSpaceToDepth:
+    case kTfLiteBuiltinDepthToSpace:
     case kTfLiteBuiltinSplit:
     case kTfLiteBuiltinStridedSlice:
     case kTfLiteBuiltinSub:

From 0d882ea469dcffc62f32b9e981e4602fb3b3c43a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 11 Dec 2020 17:01:08 -0800
Subject: [PATCH 10/60] Allow state tensors to use device memories in NNAPI
 delegate.

PiperOrigin-RevId: 347102826
Change-Id: I9059612fd8019284416a814d240452b1f8f77b86
---
 .../lite/delegates/nnapi/nnapi_delegate.cc    | 449 +++++-------------
 .../lite/delegates/nnapi/nnapi_delegate.h     |  57 ---
 .../delegates/nnapi/nnapi_delegate_kernel.h   |  29 --
 .../delegates/nnapi/nnapi_delegate_test.cc    | 143 ++----
 4 files changed, 149 insertions(+), 529 deletions(-)

diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
index a73b44bfcbd..89846501789 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc
@@ -449,78 +449,6 @@ ANeuralNetworksOperandType ConvertTensorTypeToNNType(
   return nn_operand_type;
 }
 
-// Copy the CPU buffer of the input tensor to a shared memory address. Will
-// apply data type conversion if needed. The returned tensor_size is the size
-// after the potential data type conversion.
-TfLiteStatus CopyOrConvertInputData(TfLiteContext* context,
-                                    TfLiteType ann_type_equivalent,
-                                    bool use_int8_asymm_signed,
-                                    TfLiteTensor* tensor, uint8_t* dst,
-                                    int* tensor_size) {
-  if (ann_type_equivalent != kTfLiteNoType) {
-    const auto num_elements = NumElements(tensor);
-    if (tensor->type == kTfLiteUInt8 && ann_type_equivalent == kTfLiteInt32) {
-      for (int i = 0; i < num_elements; ++i) {
-        reinterpret_cast<int32_t*>(dst)[i] =
-            static_cast<const int32_t>(tensor->data.uint8[i]);
-      }
-    } else if (tensor->type == kTfLiteInt8 &&
-               ann_type_equivalent == kTfLiteUInt8) {
-      // Explicitly convert int8 values to uint8 values.
-      for (int i = 0; i < num_elements; ++i) {
-        dst[i] = static_cast<const uint8_t>(
-            static_cast<int32_t>(tensor->data.int8[i]) + 128);
-      }
-    } else if (tensor->type == kTfLiteInt8 &&
-               ann_type_equivalent == kTfLiteInt32) {
-      if (use_int8_asymm_signed) {
-        for (int i = 0; i < num_elements; ++i) {
-          reinterpret_cast<int32_t*>(dst)[i] =
-              static_cast<const int32_t>(tensor->data.int8[i]);
-        }
-      } else {
-        for (int i = 0; i < num_elements; ++i) {
-          reinterpret_cast<int32_t*>(dst)[i] =
-              static_cast<const int32_t>(tensor->data.int8[i]) + 128;
-        }
-      }
-    } else {
-      TF_LITE_KERNEL_LOG(
-          context,
-          "NN API Delegate: unsupported tensor types conversion: "
-          "from type code %d to type code %d.\n",
-          tensor->type, ann_type_equivalent);
-      return kTfLiteError;
-    }
-    size_t type_size;
-    TF_LITE_ENSURE_OK(context,
-                      GetSizeOfType(context, ann_type_equivalent, &type_size));
-    *tensor_size = NumElements(tensor) * type_size;
-  } else {
-    // copy data to pre-allocated shared memory.
-    memcpy(dst, tensor->data.raw, tensor->bytes);
-    *tensor_size = tensor->bytes;
-  }
-  return kTfLiteOk;
-}
-
-// Copy into the CPU buffer of the output tensor from a shared memory address.
-// Will apply data type conversion if needed.
-TfLiteStatus CopyOrConvertOutputData(TfLiteType ann_type_equivalent,
-                                     const uint8_t* src, TfLiteTensor* tensor) {
-  if (tensor->type == kTfLiteInt8 && ann_type_equivalent == kTfLiteUInt8) {
-    // Explicitly convert uint8 values to int8 values.
-    int8_t* output_ptr = tensor->data.int8;
-    const auto num_elements = NumElements(tensor);
-    for (int i = 0; i < num_elements; ++i) {
-      output_ptr[i] = static_cast<int8_t>(static_cast<int32_t>(src[i]) - 128);
-    }
-  } else {
-    memcpy(tensor->data.raw, src, tensor->bytes);
-  }
-  return kTfLiteOk;
-}
-
 constexpr size_t kDefaultByteAlignmentForNNAPI = 16;
 
 static size_t getNumPaddingBytes(size_t byte_size) {
@@ -3714,7 +3642,6 @@ TfLiteStatus NNAPIDelegateKernel::Prepare(TfLiteContext* context,
     return kTfLiteOk;
   }
 
-  const auto& delegate_data = StatefulNnApiDelegate::GetData(node->delegate);
   ANeuralNetworksCompilation* compilation = nullptr;
   if (!nnapi_devices_.empty()) {
     // Compile for the selected accelerator.
@@ -3782,67 +3709,6 @@ TfLiteStatus NNAPIDelegateKernel::Prepare(TfLiteContext* context,
   }
   RETURN_TFLITE_ERROR_IF_NN_ERROR(context, finish_result,
                                   "completing NNAPI compilation", nnapi_errno);
-
-  const bool use_device_memory_for_state_tensors =
-      nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13 &&
-      delegate_data.use_device_memory_for_state_tensors &&
-      delegate_data.single_partition_delegated &&
-      // State tensors with dynamic shapes are currently not supported.
-      std::all_of(model_state_tfl_inputs_.begin(),
-                  model_state_tfl_inputs_.end(), [&context](int tfl_index) {
-                    TfLiteTensor* tensor = &context->tensors[tfl_index];
-                    return !IsDynamicTensor(tensor);
-                  });
-  if (use_device_memory_for_state_tensors) {
-    for (int tfl_index : model_state_tfl_inputs_) {
-      auto& info = nn_state_tensor_info_map_.at(tfl_index);
-
-      // prepare device memory descriptor
-      ANeuralNetworksMemoryDesc* desc = nullptr;
-      RETURN_TFLITE_ERROR_IF_NN_ERROR(
-          context, nnapi_->ANeuralNetworksMemoryDesc_create(&desc),
-          "creating device memory descriptor", nnapi_errno);
-      RETURN_TFLITE_ERROR_IF_NN_ERROR(
-          context,
-          nnapi_->ANeuralNetworksMemoryDesc_addInputRole(
-              desc, compilation, info.nn_input_index, 1.0f),
-          "adding input role to the device memory descriptor", nnapi_errno);
-      RETURN_TFLITE_ERROR_IF_NN_ERROR(
-          context,
-          nnapi_->ANeuralNetworksMemoryDesc_addOutputRole(
-              desc, compilation, info.nn_output_index, 1.0f),
-          "adding output role to the device memory descriptor", nnapi_errno);
-      RETURN_TFLITE_ERROR_IF_NN_ERROR(
-          context, nnapi_->ANeuralNetworksMemoryDesc_finish(desc),
-          "finishing device memory descriptor", nnapi_errno);
-
-      // allocate two device memories for each state tensor
-      ANeuralNetworksMemory* state_input_memory = nullptr;
-      RETURN_TFLITE_ERROR_IF_NN_ERROR(
-          context,
-          nnapi_->ANeuralNetworksMemory_createFromDesc(desc,
-                                                       &state_input_memory),
-          "creating input device memory from the descriptor", nnapi_errno);
-      info.nn_input_memory_handle.reset(state_input_memory);
-
-      ANeuralNetworksMemory* state_output_memory = nullptr;
-      RETURN_TFLITE_ERROR_IF_NN_ERROR(
-          context,
-          nnapi_->ANeuralNetworksMemory_createFromDesc(desc,
-                                                       &state_output_memory),
-          "creating output device memory from the descriptor", nnapi_errno);
-      info.nn_output_memory_handle.reset(state_output_memory);
-      nnapi_->ANeuralNetworksMemoryDesc_free(desc);
-
-      // we need a temporary buffer to sync states to raw pointers
-      TfLiteTensor* tensor = &context->tensors[tfl_index];
-      if (tensor->buffer_handle == kTfLiteNullBufferHandle) {
-        info.nn_temp_buffer.reset(
-            new NNMemory(nnapi_, "temp state tensor", info.tensor_size));
-      }
-    }
-  }
-
   nn_compilation_.reset(compilation);
 
   return kTfLiteOk;
@@ -3904,7 +3770,6 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
   // Set compilation timeout if applicable.
   const auto delegate_options =
       StatefulNnApiDelegate::GetOptions(node->delegate);
-  const auto& delegate_data = StatefulNnApiDelegate::GetData(node->delegate);
   if (nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13) {
     if (delegate_options.max_execution_timeout_duration_ns > 0) {
       RETURN_TFLITE_ERROR_IF_NN_ERROR(
@@ -3970,24 +3835,14 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
     }
   }
 
-  const bool use_device_memory_for_state_tensors =
-      nnapi_->android_sdk_version >= kMinSdkVersionForNNAPI13 &&
-      delegate_data.use_device_memory_for_state_tensors &&
-      // TODO(b/174612931): Even if the model is not fully supported, we can
-      // still use device memories for state tensors if they are only used in
-      // one single partition.
-      delegate_data.single_partition_delegated &&
-      std::all_of(model_state_tfl_inputs_.begin(),
-                  model_state_tfl_inputs_.end(), [&context](int tfl_index) {
-                    TfLiteTensor* tensor = &context->tensors[tfl_index];
-                    return !IsDynamicTensor(tensor);
-                  });
-
   // Set the input tensor buffers. Note: we access tflite tensors using
   // absolute indices but NN api indices inputs by relative indices.
   int relative_input_index = 0;
 
-  size_t input_offset_accumulator = 0;
+  const bool use_int8_asymm_signed =
+      target_sdk_version_ >= kMinSdkVersionForNNAPI13;
+
+  size_t input_offset = 0;
   for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) {
     if (absolute_input_index == kTfLiteOptionalTensor) {
       continue;
@@ -4005,58 +3860,90 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
       input_nn_operand_type_ptr = &input_nn_operand_type;
     }
     if (tensor->allocation_type != kTfLiteMmapRo) {
-      ANeuralNetworksMemory* input_memory_handle = nullptr;
-      uint32_t input_offset = 0;
-      uint32_t input_length = 0;
-      const bool is_state_tensor =
-          nn_state_tensor_info_map_.count(absolute_input_index) > 0;
-      if (is_state_tensor && use_device_memory_for_state_tensors &&
-          // If the client requests to sync states to device, we will use the
-          // shared memory directly as input instead of explicitly copying into
-          // the device memory.
-          !delegate_data.sync_states_to_device) {
-        const auto& state_tensor_info =
-            nn_state_tensor_info_map_.at(absolute_input_index);
-        input_memory_handle = state_tensor_info.nn_input_memory_handle.get();
-        input_offset = 0;
-        input_length = 0;
-      } else if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
-                 tensor->buffer_handle < tensor_memory_map_->size()) {
-        input_memory_handle =
-            tensor_memory_map_->at(tensor->buffer_handle).memory;
-        input_offset = 0;
-        input_length = tensor->bytes;
-      } else {
-        int tensor_size = 0;
-        // copy or convert tensor data to pre-allocated shared memory.
-        const bool use_int8_asymm_signed =
-            target_sdk_version_ >= kMinSdkVersionForNNAPI13;
-        TF_LITE_ENSURE_OK(
+      if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
+          tensor->buffer_handle < tensor_memory_map_->size()) {
+        RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
             context,
-            CopyOrConvertInputData(
-                context, ann_type_equivalent, use_int8_asymm_signed, tensor,
-                nn_input_memory_->get_data_ptr() + input_offset_accumulator,
-                &tensor_size));
-        input_memory_handle = nn_input_memory_->get_handle();
-        input_offset = input_offset_accumulator;
-        input_length = tensor_size;
-        input_offset_accumulator += tensor_size;
-        input_offset_accumulator += getNumPaddingBytes(tensor_size);
+            nnapi_->ANeuralNetworksExecution_setInputFromMemory(
+                execution, relative_input_index, input_nn_operand_type_ptr,
+                tensor_memory_map_->at(tensor->buffer_handle).memory, 0,
+                tensor->bytes),
+            "associating NNAPI execution input with a memory object", tensor,
+            nnapi_errno);
+        relative_input_index++;
+        continue;
       }
-      RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
-          context,
-          nnapi_->ANeuralNetworksExecution_setInputFromMemory(
-              execution, relative_input_index, input_nn_operand_type_ptr,
-              input_memory_handle, input_offset, input_length),
-          "associating NNAPI execution input with a memory object", tensor,
-          nnapi_errno);
+      int tensor_size = 0;
+      if (ann_type_equivalent != kTfLiteNoType) {
+        const auto num_elements = NumElements(tensor);
+        uint8_t* input_ptr = nn_input_memory_->get_data_ptr() + input_offset;
+        if (tensor->type == kTfLiteUInt8 &&
+            ann_type_equivalent == kTfLiteInt32) {
+          for (int i = 0; i < num_elements; ++i) {
+            reinterpret_cast<int32_t*>(input_ptr)[i] =
+                static_cast<const int32_t>(tensor->data.uint8[i]);
+          }
+        } else if (tensor->type == kTfLiteInt8 &&
+                   ann_type_equivalent == kTfLiteUInt8) {
+          // Explicitly convert int8 values to uint8 values.
+          for (int i = 0; i < num_elements; ++i) {
+            input_ptr[i] = static_cast<const uint8_t>(
+                static_cast<int32_t>(tensor->data.int8[i]) + 128);
+          }
+        } else if (tensor->type == kTfLiteInt8 &&
+                   ann_type_equivalent == kTfLiteInt32) {
+          if (use_int8_asymm_signed) {
+            for (int i = 0; i < num_elements; ++i) {
+              reinterpret_cast<int32_t*>(input_ptr)[i] =
+                  static_cast<const int32_t>(tensor->data.int8[i]);
+            }
+          } else {
+            for (int i = 0; i < num_elements; ++i) {
+              reinterpret_cast<int32_t*>(input_ptr)[i] =
+                  static_cast<const int32_t>(tensor->data.int8[i]) + 128;
+            }
+          }
+        } else {
+          context->ReportError(
+              context,
+              "NN API Delegate: unsupported tensor types conversion: "
+              "from type code %d to type code %d.\n",
+              tensor->type, ann_type_equivalent);
+          return kTfLiteError;
+        }
+        size_t type_size;
+        TF_LITE_ENSURE_OK(
+            context, GetSizeOfType(context, ann_type_equivalent, &type_size));
+        tensor_size = NumElements(tensor) * type_size;
+        RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
+            context,
+            nnapi_->ANeuralNetworksExecution_setInputFromMemory(
+                execution, relative_input_index, input_nn_operand_type_ptr,
+                nn_input_memory_->get_handle(), input_offset, tensor_size),
+            "associating NNAPI execution input with a memory object", tensor,
+            nnapi_errno);
+      } else {
+        // copy data to pre-allocated shared memory.
+        memcpy(nn_input_memory_->get_data_ptr() + input_offset,
+               tensor->data.raw, tensor->bytes);
+        RETURN_TFLITE_ERROR_IF_NN_ERROR_FOR_TENSOR(
+            context,
+            nnapi_->ANeuralNetworksExecution_setInputFromMemory(
+                execution, relative_input_index, input_nn_operand_type_ptr,
+                nn_input_memory_->get_handle(), input_offset, tensor->bytes),
+            "associating NNAPI execution input with a memory object", tensor,
+            nnapi_errno);
+        tensor_size = tensor->bytes;
+      }
+      input_offset += tensor_size;
+      input_offset += getNumPaddingBytes(tensor_size);
       relative_input_index++;
     }
   }
 
   // Set the output tensor buffers.
   int relative_output_index = 0;
-  size_t output_offset_accumulator = 0;
+  size_t output_offset = 0;
   for (auto output_index : TfLiteIntArrayView(node->outputs)) {
     // If the NNAPI implementation doesn't have some of the outputs
     // they are left unmapped and we should not try to read their value here
@@ -4090,12 +3977,11 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
           context,
           nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
               execution, relative_output_index, output_nn_operand_type_ptr,
-              nn_output_memory_->get_handle(), output_offset_accumulator,
-              tensor->bytes),
+              nn_output_memory_->get_handle(), output_offset, tensor->bytes),
           "associating NNAPI execution output to a memory object", tensor,
           nnapi_errno);
-      output_offset_accumulator += tensor->bytes;
-      output_offset_accumulator += getNumPaddingBytes(tensor->bytes);
+      output_offset += tensor->bytes;
+      output_offset += getNumPaddingBytes(tensor->bytes);
     }
     relative_output_index++;
   }
@@ -4104,27 +3990,16 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
   // current invocation.
   for (size_t i = 0; i < model_state_tfl_inputs_.size(); i++) {
     int state_tensor_idx = model_state_tfl_inputs_[i];
-    if (use_device_memory_for_state_tensors) {
-      auto* device_memory = nn_state_tensor_info_map_.at(state_tensor_idx)
-                                .nn_output_memory_handle.get();
-      RETURN_TFLITE_ERROR_IF_NN_ERROR(
-          context,
-          nnapi_->ANeuralNetworksExecution_setOutputFromMemory(
-              execution, relative_output_index, nullptr, device_memory, 0, 0),
-          "associating NNAPI execution output with a device memory object",
-          nnapi_errno);
-    } else {
-      TfLiteTensor* tensor = &context->tensors[state_tensor_idx];
-      // Here we are using a deep copy for state_in tensors so that we are not
-      // reading and writing into the same buffer during a invocation.
-      // TODO(b/110369471): using double shared buffer to minimize the copies.
-      RETURN_TFLITE_ERROR_IF_NN_ERROR(
-          context,
-          nnapi_->ANeuralNetworksExecution_setOutput(
-              execution, relative_output_index, nullptr, tensor->data.raw,
-              tensor->bytes),
-          "associating NNAPI execution output to a buffer", nnapi_errno);
-    }
+    TfLiteTensor* tensor = &context->tensors[state_tensor_idx];
+    // Here we are using a deep copy for state_in tensors so that we are not
+    // reading and writing into the same buffer during a invocation.
+    // TODO(b/110369471): using double shared buffer to minimize the copies.
+    RETURN_TFLITE_ERROR_IF_NN_ERROR(
+        context,
+        nnapi_->ANeuralNetworksExecution_setOutput(
+            execution, relative_output_index, nullptr, tensor->data.raw,
+            tensor->bytes),
+        "associating NNAPI execution output to a buffer", nnapi_errno);
     relative_output_index++;
   }
   // Invoke ANN in blocking fashion.
@@ -4147,70 +4022,39 @@ TfLiteStatus NNAPIDelegateKernel::Invoke(TfLiteContext* context,
   }
 
   // copy results from shared memory to the destination.
-  output_offset_accumulator = 0;
+  output_offset = 0;
   for (auto output_index : TfLiteIntArrayView(node->outputs)) {
     TfLiteTensor* tensor = &context->tensors[output_index];
     if (tensor->buffer_handle != kTfLiteNullBufferHandle) {
       continue;
     }
-    const TfLiteType ann_type_equivalent =
+    TfLiteType ann_type_equivalent =
         operand_mapping_.lite_index_to_ann_type_conversion(output_index);
-    TF_LITE_ENSURE_OK(
-        context, CopyOrConvertOutputData(ann_type_equivalent,
-                                         nn_output_memory_->get_data_ptr() +
-                                             output_offset_accumulator,
-                                         tensor));
-    output_offset_accumulator += tensor->bytes;
-    output_offset_accumulator += getNumPaddingBytes(tensor->bytes);
-  }
-
-  // sync state tensors from device memories
-  if (use_device_memory_for_state_tensors &&
-      delegate_data.sync_states_from_device) {
-    for (auto& [tfl_index, info] : nn_state_tensor_info_map_) {
-      TfLiteTensor* tensor = &context->tensors[tfl_index];
-      if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
-          tensor->buffer_handle < tensor_memory_map_->size()) {
-        RETURN_TFLITE_ERROR_IF_NN_ERROR(
-            context,
-            nnapi_->ANeuralNetworksMemory_copy(
-                info.nn_output_memory_handle.get(),
-                tensor_memory_map_->at(tensor->buffer_handle).memory),
-            "syncing device memory from device", nnapi_errno);
-      } else {
-        // For pointer tensor data, we need to copy twice:
-        // 1. device memory -> shared memory
-        // 2. shared memory -> raw pointer
-        // The second copy may also need type conversion from uint8 -> int8.
-        RETURN_TFLITE_ERROR_IF_NN_ERROR(context,
-                                        nnapi_->ANeuralNetworksMemory_copy(
-                                            info.nn_output_memory_handle.get(),
-                                            info.nn_temp_buffer->get_handle()),
-                                        "syncing device memory from device",
-                                        nnapi_errno);
-        const TfLiteType ann_type_equivalent =
-            operand_mapping_.lite_index_to_ann_type_conversion(tfl_index);
-        TF_LITE_ENSURE_OK(context,
-                          CopyOrConvertOutputData(
-                              ann_type_equivalent,
-                              info.nn_temp_buffer->get_data_ptr(), tensor));
+    if (tensor->type == kTfLiteInt8 && ann_type_equivalent == kTfLiteUInt8) {
+      // Explicitly convert uint8 values to int8 values.
+      uint8_t* output_ptr = reinterpret_cast<uint8_t*>(
+          nn_output_memory_->get_data_ptr() + output_offset);
+      const auto num_elements = NumElements(tensor);
+      for (int i = 0; i < num_elements; ++i) {
+        output_ptr[i] =
+            static_cast<uint8_t>(static_cast<int32_t>(output_ptr[i]) - 128);
       }
     }
-  }
-
-  // swap device memory handles so that the state output of the current
-  // invocation will be used as the state input of the next invocation
-  if (use_device_memory_for_state_tensors) {
-    for (auto& [tfl_index, info] : nn_state_tensor_info_map_) {
-      std::swap(info.nn_input_memory_handle, info.nn_output_memory_handle);
-    }
+    memcpy(tensor->data.raw, nn_output_memory_->get_data_ptr() + output_offset,
+           tensor->bytes);
+    output_offset += tensor->bytes;
+    output_offset += getNumPaddingBytes(tensor->bytes);
   }
 
   // copy output of all output tensors in feedback_loops_ into the
   // associated input
-  for (auto [output_tensor_idx, input_tensor_idx] : feedback_loops_) {
+  for (auto feedback_loop : feedback_loops_) {
+    int output_tensor_idx;
+    int input_tensor_idx;
+    std::tie(output_tensor_idx, input_tensor_idx) = feedback_loop;
     TfLiteTensor& src = context->tensors[output_tensor_idx];
     TfLiteTensor& dest = context->tensors[input_tensor_idx];
+
     memcpy(dest.data.raw, src.data.raw, src.bytes);
   }
 
@@ -4778,17 +4622,6 @@ TfLiteStatus NNAPIDelegateKernel::BuildGraph(
   std::vector<uint32_t> outputs;
   outputs.reserve(output_tensors->size);
 
-  for (int tfl_index : model_state_tfl_inputs_) {
-    NNStateTensorInfo info = {
-        .nn_input_memory_handle =
-            std::unique_ptr<ANeuralNetworksMemory, NNFreeMemory>(
-                nullptr, NNFreeMemory(nnapi_)),
-        .nn_output_memory_handle =
-            std::unique_ptr<ANeuralNetworksMemory, NNFreeMemory>(
-                nullptr, NNFreeMemory(nnapi_))};
-    nn_state_tensor_info_map_.emplace(tfl_index, std::move(info));
-  }
-
   size_t total_input_byte_size = 0;
   // Make the TensorFlow Lite inputs and outputs to ann_indices.
   for (int i : TfLiteIntArrayView(input_tensors)) {
@@ -4798,6 +4631,10 @@ TfLiteStatus NNAPIDelegateKernel::BuildGraph(
         // The delegate might not have mapped this input (this can
         // happen if one tensor is split in several ones)
         operand_mapping_.lite_index_to_ann(i) != -1) {
+      inputs.push_back(operand_mapping_.lite_index_to_ann(i));
+      if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
+        continue;
+      }
       const TfLiteType nn_type_conversion =
           operand_mapping_.lite_index_to_ann_type_conversion(i);
       int tensor_size = 0;
@@ -4809,15 +4646,6 @@ TfLiteStatus NNAPIDelegateKernel::BuildGraph(
             context, GetSizeOfType(context, nn_type_conversion, &type_size));
         tensor_size = NumElements(&context->tensors[i]) * type_size;
       }
-      if (auto it = nn_state_tensor_info_map_.find(i);
-          it != nn_state_tensor_info_map_.end()) {
-        it->second.nn_input_index = inputs.size();
-        it->second.tensor_size = tensor_size;
-      }
-      inputs.push_back(operand_mapping_.lite_index_to_ann(i));
-      if (context->tensors[i].buffer_handle != kTfLiteNullBufferHandle) {
-        continue;
-      }
       total_input_byte_size += tensor_size;
       total_input_byte_size += getNumPaddingBytes(tensor_size);
     }
@@ -4838,11 +4666,8 @@ TfLiteStatus NNAPIDelegateKernel::BuildGraph(
   }
 
   // Add state output tensors as model outputs.
-  for (int i = 0; i < model_state_outputs_.size(); i++) {
-    const int tfl_index = model_state_tfl_inputs_[i];
-    const int nn_model_index = model_state_outputs_[i];
-    nn_state_tensor_info_map_.at(tfl_index).nn_output_index = outputs.size();
-    outputs.push_back(nn_model_index);
+  for (int i : model_state_outputs_) {
+    outputs.push_back(i);
   }
 
   // Tell ANN to declare inputs/outputs
@@ -4947,8 +4772,6 @@ StatefulNnApiDelegate::StatefulNnApiDelegate(const NnApi* nnapi,
   if (nnapi->android_sdk_version >= kMinSdkVersionForNNAPI11) {
     delegate_data_.allow_dynamic_dimensions = options.allow_dynamic_dimensions;
   }
-  delegate_data_.use_device_memory_for_state_tensors =
-      options.use_device_memory_for_state_tensors;
   TFLITE_LOG_PROD_ONCE(tflite::TFLITE_LOG_INFO,
                        "Created TensorFlow Lite delegate for NNAPI.");
   Prepare = DoPrepare;
@@ -4991,17 +4814,9 @@ const StatefulNnApiDelegate::Options StatefulNnApiDelegate::GetOptions(
   options.max_execution_loop_timeout_duration_ns =
       delegate_data->max_execution_loop_timeout_duration_ns;
   options.allow_dynamic_dimensions = delegate_data->allow_dynamic_dimensions;
-  options.use_device_memory_for_state_tensors =
-      delegate_data->use_device_memory_for_state_tensors;
   return options;
 }
 
-const StatefulNnApiDelegate::Data& StatefulNnApiDelegate::GetData(
-    TfLiteDelegate* delegate) {
-  auto* delegate_data = reinterpret_cast<Data*>(delegate->data_);
-  return *delegate_data;
-}
-
 const std::vector<StatefulNnApiDelegate::MemoryRegistration>&
 StatefulNnApiDelegate::GetTensorMemoryMap(TfLiteDelegate* delegate) {
   auto delegate_data = reinterpret_cast<Data*>(delegate->data_);
@@ -5062,24 +4877,6 @@ int StatefulNnApiDelegate::GetNnApiErrno() const {
   return delegate_data_.nnapi_errno;
 }
 
-TfLiteStatus StatefulNnApiDelegate::SetSyncStatesToDevice(
-    bool sync_states_to_device) {
-  if (!delegate_data_.use_device_memory_for_state_tensors) {
-    return kTfLiteError;
-  }
-  delegate_data_.sync_states_to_device = sync_states_to_device;
-  return kTfLiteOk;
-}
-
-TfLiteStatus StatefulNnApiDelegate::SetSyncStatesFromDevice(
-    bool sync_states_from_device) {
-  if (!delegate_data_.use_device_memory_for_state_tensors) {
-    return kTfLiteError;
-  }
-  delegate_data_.sync_states_from_device = sync_states_from_device;
-  return kTfLiteOk;
-}
-
 // static
 TfLiteStatus StatefulNnApiDelegate::GetNodesSupportedByAccelerator(
     TfLiteContext* context, TfLiteDelegate* delegate, const NnApi* nnapi,
@@ -5111,9 +4908,9 @@ TfLiteStatus StatefulNnApiDelegate::GetNodesSupportedByAccelerator(
                                    supported_partition_nodes.begin(),
                                    supported_partition_nodes.end());
 
-    bool single_partition_delegated = (supported_partition_nodes.size() ==
-                                       partition_params.nodes_to_replace->size);
-    if (single_partition_delegated) {
+    bool model_fully_supported = (supported_partition_nodes.size() ==
+                                  partition_params.nodes_to_replace->size);
+    if (model_fully_supported) {
       delegate_data->CacheDelegateKernel(&partition_params,
                                          kernel_state.release());
     }
@@ -5328,10 +5125,6 @@ TfLiteStatus StatefulNnApiDelegate::DoPrepare(TfLiteContext* context,
                                    params_array, params_array + num_partitions),
                                &nodes_to_delegate));
 
-  if (!nodes_to_delegate.empty() && num_partitions == 1) {
-    delegate_data->single_partition_delegated = true;
-  }
-
   if (nodes_to_delegate.empty()) {
     return kTfLiteOk;
   } else {
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate.h
index dbc92f7d5a4..4b12b0d0d18 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.h
@@ -125,33 +125,6 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
     // accelerator. This should only be enabled if the target device supports
     // dynamic dimensions of the model.
     bool allow_dynamic_dimensions = false;
-
-    // When set to true, the delegate will allocate device memory for state
-    // tensors to reduce data copying and transformation overhead. In such a
-    // case, the user must explicitly specify whether they would like to sync
-    // states between host and device before and after each invocation by
-    // SetSyncStatesToDevice and SetSyncStatesFromDevice. The following code
-    // snippet demonstrates the usage:
-    //
-    //   StatefulNnapiDelegate::Options options;
-    //   options.use_device_memory_for_state_tensors = true;
-    //   ...
-    //
-    //   for (int i = 0; i < sequence_size; i++) {
-    //     ...
-    //
-    //     // Push initial states to the device before the first invocation.
-    //     delegate->SetSyncStatesToDevice(i == 0);
-    //
-    //     // Get states data back to the host CPU buffer after the final
-    //     // invocation.
-    //     delegate->SetSyncStatesFromDevice(i == sequence_size - 1);
-    //
-    //     interpreter->Invoke();
-    //   }
-    //
-    // WARNING: This is an experimental interface that is subject to change.
-    bool use_device_memory_for_state_tensors = false;
   };
 
   // Uses default options.
@@ -213,23 +186,7 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
   // (i.e. when calling interpreter.ModifyGraphWithDelegate(delegate)).
   int GetNnApiErrno() const;
 
-  // Specifies whether the device memories should be initialized from the
-  // content of CPU buffers of state tensors before the execution or not.
-  // Will return an error if the delegate is not initialized with
-  // use_device_memory_for_state_tensors set to true.
-  // WARNING: This is an experimental interface that is subject to change.
-  TfLiteStatus SetSyncStatesToDevice(bool sync_states_to_device);
-
-  // Specifies whether the device memories should be copied to the content of
-  // CPU buffers of state tensors after the execution or not.
-  // Will return an error if the delegate is not initialized with
-  // use_device_memory_for_state_tensors set to true.
-  // WARNING: This is an experimental interface that is subject to change.
-  TfLiteStatus SetSyncStatesFromDevice(bool sync_states_from_device);
-
  private:
-  friend NNAPIDelegateKernel;
-
   // Encapsulates all delegate data.
   struct Data {
     // Pointer to NNAPI implementation to be used by this delegate as
@@ -278,17 +235,6 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
     uint64_t max_execution_loop_timeout_duration_ns = 0;
     // Whether to allow dynamic dimension sizes without re-compilation.
     bool allow_dynamic_dimensions = false;
-    // When set to true, the delegate will allocate device memories for state
-    // tensors to reduce data copying and transformation overhead.
-    bool use_device_memory_for_state_tensors = false;
-    // When set to true, the device memories will be initialized from the
-    // content of CPU buffers of state tensors before the execution.
-    bool sync_states_to_device = false;
-    // When set to true, the device memories will be copied to the content of
-    // CPU buffers of state tensors after the execution.
-    bool sync_states_from_device = false;
-    // Whether the model is fully supported by the delegate.
-    bool single_partition_delegated = false;
 
     explicit Data(const NnApi* nnapi);
     ~Data();
@@ -302,9 +248,6 @@ class StatefulNnApiDelegate : public TfLiteDelegate {
         const TfLiteDelegateParams* delegate_params);
   };
 
-  // Returns the delegate data.
-  static const Data& GetData(TfLiteDelegate* delegate);
-
   // Implements TfLiteDelegate::Prepare. Please refer to TFLiteDelegate
   // documentation for more info.
   static TfLiteStatus DoPrepare(TfLiteContext* context,
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
index 60c32a1ef0f..36c1dd32efb 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
@@ -22,7 +22,6 @@ limitations under the License.
 #include "tensorflow/lite/allocation.h"
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
-#include "tensorflow/lite/nnapi/NeuralNetworksTypes.h"
 #include "tensorflow/lite/nnapi/nnapi_implementation.h"
 
 namespace tflite {
@@ -155,18 +154,6 @@ class NNFreeExecution {
   // NnApi instance to use. Not owned by this object.
   const NnApi* nnapi_;
 };
-// RAII NN API Memory Destructor for use with std::unique_ptr
-class NNFreeMemory {
- public:
-  explicit NNFreeMemory(const NnApi* nnapi) : nnapi_(nnapi) {}
-  void operator()(ANeuralNetworksMemory* memory) {
-    nnapi_->ANeuralNetworksMemory_free(memory);
-  }
-
- private:
-  // NnApi instance to use. Not owned by this object.
-  const NnApi* nnapi_;
-};
 
 // Manage NNAPI shared memory handle
 class NNMemory {
@@ -188,19 +175,6 @@ class NNMemory {
   ANeuralNetworksMemory* nn_memory_handle_ = nullptr;
 };
 
-// Basic info and NN device memory handles for state tensors.
-struct NNStateTensorInfo {
-  uint32_t nn_input_index = 0;
-  uint32_t nn_output_index = 0;
-  // The size of the NN state tensor after applying any potential data type
-  // conversion.
-  int tensor_size = 0;
-  std::unique_ptr<ANeuralNetworksMemory, NNFreeMemory> nn_input_memory_handle;
-  std::unique_ptr<ANeuralNetworksMemory, NNFreeMemory> nn_output_memory_handle;
-  // The shared memory used to sync the state from the device.
-  std::unique_ptr<NNMemory> nn_temp_buffer;
-};
-
 
 enum class NNAPIValidationFailureType : int {
   // The operator is not supported by either NNAPI or the NNAPI Delegate.
@@ -366,9 +340,6 @@ class NNAPIDelegateKernel {
   // data available for TFLite model users
   std::vector<std::tuple<int, int>> feedback_loops_;
 
-  // TfLite index -> state tensor info.
-  std::map<int, NNStateTensorInfo> nn_state_tensor_info_map_;
-
   std::unique_ptr<NNMemory> nn_input_memory_;
   std::unique_ptr<NNMemory> nn_output_memory_;
 
diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc
index c1a3923de4d..16e7a260961 100644
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc
@@ -2718,15 +2718,24 @@ class RNNOpModel : public SingleOpModelWithNNAPI {
  public:
   RNNOpModel(int batches, int units, int size,
              const TensorType weights = TensorType_FLOAT32,
-             const TensorType recurrent_weights = TensorType_FLOAT32) {
-    Init(batches, units, size, weights, recurrent_weights);
-  }
-
-  RNNOpModel(const StatefulNnApiDelegate::Options& options, int batches,
-             int units, int size, const TensorType weights = TensorType_FLOAT32,
              const TensorType recurrent_weights = TensorType_FLOAT32)
-      : SingleOpModelWithNNAPI(options) {
-    Init(batches, units, size, weights, recurrent_weights);
+      : batches_(batches), units_(units), input_size_(size) {
+    input_ = AddInput(TensorType_FLOAT32);
+    weights_ = AddInput(weights);
+    recurrent_weights_ = AddInput(recurrent_weights);
+    bias_ = AddInput(TensorType_FLOAT32);
+    hidden_state_ = AddVariableInput(TensorType_FLOAT32);
+    output_ = AddOutput(TensorType_FLOAT32);
+    SetBuiltinOp(
+        BuiltinOperator_RNN, BuiltinOptions_RNNOptions,
+        CreateRNNOptions(builder_, ActivationFunctionType_RELU).Union());
+    BuildInterpreterWithNNAPI({
+        {batches_, input_size_},  // input tensor
+        {units_, input_size_},    // weights tensor
+        {units_, units_},         // recurrent weights tensor
+        {units_},                 // bias tensor
+        {batches_, units_}        // hidden state tensor
+    });
   }
 
   void SetBias(std::initializer_list<float> f) { PopulateTensor(bias_, f); }
@@ -2747,16 +2756,8 @@ class RNNOpModel : public SingleOpModelWithNNAPI {
     PopulateTensor(input_, offset, begin, end);
   }
 
-  void SetHiddenState(const std::vector<float>& data) {
-    PopulateTensor(hidden_state_, data);
-  }
-
   std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
 
-  std::vector<float> GetHiddenState() {
-    return ExtractVector<float>(hidden_state_);
-  }
-
   int input_size() { return input_size_; }
   int num_units() { return units_; }
   int num_batches() { return batches_; }
@@ -2772,50 +2773,8 @@ class RNNOpModel : public SingleOpModelWithNNAPI {
   int batches_;
   int units_;
   int input_size_;
-
- private:
-  // Performs initialization logic shared across all constructors.
-  void Init(int batches, int units, int size, const TensorType weights,
-            const TensorType recurrent_weights) {
-    batches_ = batches;
-    units_ = units;
-    input_size_ = size;
-    input_ = AddInput(TensorType_FLOAT32);
-    weights_ = AddInput(weights);
-    recurrent_weights_ = AddInput(recurrent_weights);
-    bias_ = AddInput(TensorType_FLOAT32);
-    hidden_state_ = AddVariableInput(TensorType_FLOAT32);
-    output_ = AddOutput(TensorType_FLOAT32);
-    SetBuiltinOp(
-        BuiltinOperator_RNN, BuiltinOptions_RNNOptions,
-        CreateRNNOptions(builder_, ActivationFunctionType_RELU).Union());
-    BuildInterpreterWithNNAPI({
-        {batches_, input_size_},  // input tensor
-        {units_, input_size_},    // weights tensor
-        {units_, units_},         // recurrent weights tensor
-        {units_},                 // bias tensor
-        {batches_, units_}        // hidden state tensor
-    });
-  }
 };
 
-static void InvokeAndTestSingleRnnStep(int step_index, RNNOpModel* rnn) {
-  float* batch_start = rnn_input + step_index * rnn->input_size();
-  float* batch_end = batch_start + rnn->input_size();
-  rnn->SetInput(0, batch_start, batch_end);
-  rnn->SetInput(rnn->input_size(), batch_start, batch_end);
-
-  rnn->Invoke();
-
-  float* golden_start = rnn_golden_output + step_index * rnn->num_units();
-  float* golden_end = golden_start + rnn->num_units();
-  std::vector<float> expected;
-  expected.insert(expected.end(), golden_start, golden_end);
-  expected.insert(expected.end(), golden_start, golden_end);
-
-  EXPECT_THAT(rnn->GetOutput(), ElementsAreArray(ArrayFloatNear(expected)));
-}
-
 TEST(NNAPIDelegate, RnnBlackBoxTest) {
   RNNOpModel rnn(2, 16, 8);
   rnn.SetWeights(rnn_weights);
@@ -2826,66 +2785,20 @@ TEST(NNAPIDelegate, RnnBlackBoxTest) {
                                   (rnn.input_size() * rnn.num_batches());
 
   for (int i = 0; i < input_sequence_size; i++) {
-    InvokeAndTestSingleRnnStep(i, &rnn);
-  }
-}
+    float* batch_start = rnn_input + i * rnn.input_size();
+    float* batch_end = batch_start + rnn.input_size();
+    rnn.SetInput(0, batch_start, batch_end);
+    rnn.SetInput(rnn.input_size(), batch_start, batch_end);
 
-TEST(NNAPIDelegate, RnnDeviceMemoryBasicTest) {
-  StatefulNnApiDelegate::Options options;
-  options.use_device_memory_for_state_tensors = true;
+    rnn.Invoke();
 
-  RNNOpModel rnn(options, 2, 16, 8);
-  rnn.SetWeights(rnn_weights);
-  rnn.SetBias(rnn_bias);
-  rnn.SetRecurrentWeights(rnn_recurrent_weights);
+    float* golden_start = rnn_golden_output + i * rnn.num_units();
+    float* golden_end = golden_start + rnn.num_units();
+    std::vector<float> expected;
+    expected.insert(expected.end(), golden_start, golden_end);
+    expected.insert(expected.end(), golden_start, golden_end);
 
-  auto* delegate = rnn.GetDelegate();
-  const int input_sequence_size = sizeof(rnn_input) / sizeof(float) /
-                                  (rnn.input_size() * rnn.num_batches());
-
-  // Only sync the state to device in the first invocation, all subsequent
-  // states are kept inside the driver.
-  for (int i = 0; i < input_sequence_size; i++) {
-    delegate->SetSyncStatesToDevice(i == 0);
-    InvokeAndTestSingleRnnStep(i, &rnn);
-  }
-}
-
-TEST(NNAPIDelegate, RnnDeviceMemorySyncTest) {
-  StatefulNnApiDelegate::Options options;
-  options.use_device_memory_for_state_tensors = true;
-
-  RNNOpModel rnn(options, 2, 16, 8);
-  rnn.SetWeights(rnn_weights);
-  rnn.SetBias(rnn_bias);
-  rnn.SetRecurrentWeights(rnn_recurrent_weights);
-
-  auto* delegate = rnn.GetDelegate();
-  const int input_sequence_size = sizeof(rnn_input) / sizeof(float) /
-                                  (rnn.input_size() * rnn.num_batches());
-  const int sync_output_index = input_sequence_size / 2;
-
-  // The following steps test SetSyncStatesFromDevice and SetSyncStatesToDevice:
-  // 1. Invoke RNN sequence until sync_output_index;
-  // 2. Extract the hidden output state at sync_output_index by
-  //    SetSyncStatesFromDevice(true);
-  // 3. Continue RNN sequence until the end;
-  // 4. Reset the hidden state by SetSyncStatesToDevice(true), the state should
-  //    go back to sync_output_index;
-  // 5. Continue RNN sequence from sync_output_index + 1 until the end.
-  std::vector<float> hidden_state_data;
-  for (int i = 0; i < input_sequence_size; i++) {
-    delegate->SetSyncStatesToDevice(i == 0);
-    delegate->SetSyncStatesFromDevice(i == sync_output_index);
-    InvokeAndTestSingleRnnStep(i, &rnn);
-    if (i == sync_output_index) {
-      hidden_state_data = rnn.GetHiddenState();
-    }
-  }
-  rnn.SetHiddenState(hidden_state_data);
-  for (int i = sync_output_index + 1; i < input_sequence_size; i++) {
-    delegate->SetSyncStatesToDevice(i == (sync_output_index + 1));
-    InvokeAndTestSingleRnnStep(i, &rnn);
+    EXPECT_THAT(rnn.GetOutput(), ElementsAreArray(ArrayFloatNear(expected)));
   }
 }
 

From 9dc5df5f0f2bc1b75e4672b317a826b8d780e10c Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 11 Dec 2020 17:42:58 -0800
Subject: [PATCH 11/60] [XLA] Add creation_pass_id and dummy op_names to
 OpMetadata.

PiperOrigin-RevId: 347108311
Change-Id: Iaeb7cea0c049e5f538557b1103972705e1a3f0de
---
 tensorflow/compiler/xla/client/xla_builder.cc |  1 +
 .../gpu/cudnn_fused_conv_rewriter_test.cc     |  8 ++---
 .../compiler/xla/service/hlo_computation.cc   |  6 +++-
 .../compiler/xla/service/hlo_instruction.h    | 15 +++++++--
 .../xla/service/hlo_module_metadata.h         |  6 ++++
 .../compiler/xla/service/hlo_pass_pipeline.cc | 31 +++++++++++++++++--
 .../xla/service/mlir_gpu/tests/broken_add.hlo |  4 +--
 .../compiler/xla/tests/hlo_metadata_test.cc   |  8 +++--
 .../xla/tests/llvm_irgen_test_base.cc         | 15 +++++++++
 tensorflow/compiler/xla/xla_data.proto        |  9 ++++++
 10 files changed, 88 insertions(+), 15 deletions(-)

diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc
index 821710ed2a4..3f582c73f96 100644
--- a/tensorflow/compiler/xla/client/xla_builder.cc
+++ b/tensorflow/compiler/xla/client/xla_builder.cc
@@ -144,6 +144,7 @@ bool InstrIsSetBound(const HloInstructionProto* instr_proto) {
   }
   return false;
 }
+
 }  // namespace
 
 namespace internal {
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter_test.cc b/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter_test.cc
index bd6aa6e715a..afaaa803464 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter_test.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter_test.cc
@@ -324,7 +324,7 @@ TEST_F(CudnnFusedConvRewriterTest, PreservesMetadata) {
       input = f32[1,17,9,9] parameter(0)
       filter = f32[3,3,17,32] parameter(1)
 
-      conv = f32[1,32,9,9] convolution(input, filter), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_01io->bf01, feature_group_count=1, metadata={op_type="foo"}
+      conv = f32[1,32,9,9] convolution(input, filter), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_01io->bf01, feature_group_count=1, metadata={op_type="foo" op_name="bar"}
       ROOT relu = f32[1,32,9,9] maximum(zeros, conv)
     })";
 
@@ -337,9 +337,9 @@ TEST_F(CudnnFusedConvRewriterTest, PreservesMetadata) {
               backend().default_stream_executor(), backend().memory_allocator())
           .ConsumeValueOrDie()
           ->ToString();
-  EXPECT_THAT(
-      optimized_hlo_string,
-      ::testing::ContainsRegex(R"(custom-call.*metadata=\{op_type="foo"\})"));
+  EXPECT_THAT(optimized_hlo_string,
+              ::testing::ContainsRegex(
+                  R"(custom-call.*metadata=\{op_type="foo" op_name="bar"\})"));
 }
 
 TEST_F(CudnnFusedConvRewriterTest, TestPreservesFeatureGroupCount) {
diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index 6323d0903a4..b3de1f0de01 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -918,7 +918,11 @@ Status HloComputation::ReplaceInstruction(HloInstruction* old_instruction,
   // function, and that they would be correlated to the same TF op. This might
   // not always be correct since HLO optimizations can cross TF op boundaries.
   // But still this seems to be better than nothing.
-  if (new_instruction->metadata().op_name().empty()) {
+  bool overwrite_dummy_name =
+      absl::StartsWith(new_instruction->metadata().op_name(), "DUMMY") &&
+      !old_instruction->metadata().op_name().empty() &&
+      !absl::StartsWith(old_instruction->metadata().op_name(), "DUMMY");
+  if (new_instruction->metadata().op_name().empty() || overwrite_dummy_name) {
     new_instruction->set_metadata(old_instruction->metadata());
   }
   if (new_instruction->frontend_attributes().map().empty()) {
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index c6f6919db1b..98c29af31d8 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -1601,8 +1601,19 @@ class HloInstruction {
   const PrecisionConfig& precision_config() const;
   PrecisionConfig* mutable_precision_config();
 
-  // Sets the debug metadata for this instruction.
-  void set_metadata(const OpMetadata& metadata) { metadata_ = metadata; }
+  // Sets the debug metadata for this instruction, excluding creation_pass_id,
+  // which should never be copied anywhere.
+  void set_metadata(const OpMetadata& metadata) {
+    int64 creation_pass_id = metadata_.creation_pass_id();
+    metadata_ = metadata;
+    metadata_.set_creation_pass_id(creation_pass_id);
+  }
+  void set_creation_pass_id(int64 pass_id) {
+    metadata_.set_creation_pass_id(pass_id);
+  }
+  void set_metadata_op_name(const std::string& name) {
+    metadata_.set_op_name(name);
+  }
   const OpMetadata& metadata() const { return metadata_; }
 
   // Set/get the computation containing this instruction. set_parent should only
diff --git a/tensorflow/compiler/xla/service/hlo_module_metadata.h b/tensorflow/compiler/xla/service/hlo_module_metadata.h
index 434e3bb0a26..fcb7871f4c8 100644
--- a/tensorflow/compiler/xla/service/hlo_module_metadata.h
+++ b/tensorflow/compiler/xla/service/hlo_module_metadata.h
@@ -61,6 +61,12 @@ class HloModuleMetadata {
     module_metadata_.add_partitioned_module_ids(id);
   }
 
+  StatusOr<int64> current_pass_id() {
+    TF_ASSIGN_OR_RETURN(HloPassMetadata * pass_metadata,
+                        GetCurrentHloPassMetadata());
+    return pass_metadata->pass_id();
+  }
+
   // Setters for the current HloPassMetadata.
   Status set_current_pass_name(const std::string& pass_name) {
     return MutateCurrentHloPassMetadata(
diff --git a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
index 41f907fc85e..6f25cb2e2f9 100644
--- a/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
+++ b/tensorflow/compiler/xla/service/hlo_pass_pipeline.cc
@@ -67,7 +67,7 @@ void RecordPassEndMetadata(HloModule& module, const std::string& pass_name,
   Status status =
       AttemptRecordPassEndMetadata(module, pass_name, module_changed);
   if (!status.ok()) {
-    LOG(FATAL) << status.error_message();
+    LOG(FATAL) << status;
   }
 }
 
@@ -91,7 +91,30 @@ void RecordPassEndMetadata(HloModuleGroup& module_group,
   Status status =
       AttemptRecordPassEndMetadata(module_group, pass_name, module_changed);
   if (!status.ok()) {
-    LOG(FATAL) << status.error_message();
+    LOG(FATAL) << status;
+  }
+}
+
+void SetInstructionMetadata(HloModule& module) {
+  StatusOr<int64> pass_id = module.metadata()->current_pass_id();
+  if (!pass_id.ok()) {
+    LOG(FATAL) << pass_id.status();
+  }
+  for (xla::HloComputation* computation : module.computations()) {
+    for (xla::HloInstruction* instruction : computation->instructions()) {
+      if (instruction->metadata().creation_pass_id() == 0) {
+        instruction->set_creation_pass_id(*pass_id);
+      }
+      if (instruction->metadata().op_name().empty()) {
+        instruction->set_metadata_op_name(absl::StrCat("DUMMY_", *pass_id));
+      }
+    }
+  }
+}
+
+void SetInstructionMetadata(HloModuleGroup& module_group) {
+  for (HloModule* module : module_group.modules()) {
+    SetInstructionMetadata(*module);
   }
 }
 
@@ -127,6 +150,7 @@ StatusOr<bool> HloPassPipeline::RunPassesInternal(
   TF_RETURN_IF_ERROR(RunInvariantCheckers(hlo, kPipelineStart));
 
   RecordPassStartMetadata(*hlo, std::string(kPipelineStart), pipeline_name);
+  SetInstructionMetadata(*hlo);
   MaybeDumpHloAndSaveFilenames(*hlo,
                                /*after_pass_name=*/kPipelineStart,
                                /*before_pass_name=*/passes.empty()
@@ -147,6 +171,7 @@ StatusOr<bool> HloPassPipeline::RunPassesInternal(
     }
     RecordPassStartMetadata(*hlo, pass_name, pipeline_name);
     TF_ASSIGN_OR_RETURN(bool pass_changed, RunHelper(pass, hlo));
+    SetInstructionMetadata(*hlo);
     MaybeDumpHloAndSaveFilenames(*hlo,
                                  /*after_pass_name=*/pass_name,
                                  /*before_pass_name=*/i + 1 >= passes.size()
@@ -216,7 +241,7 @@ void HloPassPipeline::MaybeDumpHloAndSaveFilenames(
            name(), before_pass_name, after_pass_name, module)) {
     Status status = module.metadata()->add_current_pass_dump_filename(filename);
     if (!status.ok()) {
-      LOG(FATAL) << status.error_message();
+      LOG(FATAL) << status;
     }
   }
 }
diff --git a/tensorflow/compiler/xla/service/mlir_gpu/tests/broken_add.hlo b/tensorflow/compiler/xla/service/mlir_gpu/tests/broken_add.hlo
index 6bbddb61a74..8feda522430 100644
--- a/tensorflow/compiler/xla/service/mlir_gpu/tests/broken_add.hlo
+++ b/tensorflow/compiler/xla/service/mlir_gpu/tests/broken_add.hlo
@@ -4,7 +4,7 @@ HloModule Add
 ENTRY %Add (x: f32[2,2,2], y: f32[2,2,2]) -> f32[2,2,2] {
   %x = f32[2,2,2]{2,1,0} parameter(0)
   %y = f32[2,2,2]{2,1,0} parameter(1)
-  ROOT %add = f32[2,2,2]{2,1,0} add(f32[2,2,2]{2,1,0} %x, f32[2,2,2]{2,1,0} %y)
+  ROOT %add = f32[2,2,2]{2,1,0} add(f32[2,2,2]{2,1,0} %x, f32[2,2,2]{2,1,0} %y), metadata={op_name="original_tf_op"}
 }
 
-// CHECK: ERRORS FOUND: [%add = f32[2,2,2]{2,1,0} add(f32[2,2,2]{2,1,0} %x, f32[2,2,2]{2,1,0} %y): failed for testing: lmhlo.add; failed for testing: std.return]
+// CHECK: ERRORS FOUND: [%add = f32[2,2,2]{2,1,0} add(f32[2,2,2]{2,1,0} %x, f32[2,2,2]{2,1,0} %y), metadata={op_name="original_tf_op"}: failed for testing: lmhlo.add; failed for testing: std.return]
diff --git a/tensorflow/compiler/xla/tests/hlo_metadata_test.cc b/tensorflow/compiler/xla/tests/hlo_metadata_test.cc
index 1868159ef7b..9b397dc7299 100644
--- a/tensorflow/compiler/xla/tests/hlo_metadata_test.cc
+++ b/tensorflow/compiler/xla/tests/hlo_metadata_test.cc
@@ -22,6 +22,9 @@ limitations under the License.
 namespace xla {
 namespace {
 
+using ::testing::StartsWith;
+using ::testing::StrEq;
+
 class HloMetadataTest : public LocalClientTestBase {
  protected:
   HloMetadataTest() {
@@ -79,9 +82,8 @@ TEST_F(HloMetadataTest, MetadataClearing) {
                          ->module()
                          .entry_computation()
                          ->root_instruction();
-  // We expect these to be empty (no metadata set).
-  EXPECT_EQ("", instruction->metadata().op_type());
-  EXPECT_EQ("", instruction->metadata().op_name());
+  EXPECT_THAT(instruction->metadata().op_type(), StrEq(""));
+  EXPECT_THAT(instruction->metadata().op_name(), StartsWith("DUMMY"));
 }
 
 }  // namespace
diff --git a/tensorflow/compiler/xla/tests/llvm_irgen_test_base.cc b/tensorflow/compiler/xla/tests/llvm_irgen_test_base.cc
index d10d54dab1c..b4d8d3c8716 100644
--- a/tensorflow/compiler/xla/tests/llvm_irgen_test_base.cc
+++ b/tensorflow/compiler/xla/tests/llvm_irgen_test_base.cc
@@ -25,6 +25,20 @@ limitations under the License.
 
 namespace xla {
 
+namespace {
+
+void RemoveDummyMetadataNames(HloModule* module) {
+  for (xla::HloComputation* computation : module->computations()) {
+    for (xla::HloInstruction* instruction : computation->instructions()) {
+      if (absl::StartsWith(instruction->metadata().op_name(), "DUMMY")) {
+        instruction->set_metadata_op_name("");
+      }
+    }
+  }
+}
+
+}  // namespace
+
 void LlvmIrGenTestBase::SetIrHook(bool match_optimized_ir) {
   auto llvm_compiler = GetLLVMCompiler();
   using std::placeholders::_1;
@@ -88,6 +102,7 @@ void LlvmIrGenTestBase::MatchOptimizedHlo(absl::string_view hlo,
                                           bool print_operand_shape) {
   TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> optimized_module,
                           GetOptimizedModule(hlo));
+  RemoveDummyMetadataNames(optimized_module.get());
   HloPrintOptions print_opts;
   print_opts.set_print_operand_shape(print_operand_shape);
   StatusOr<bool> filecheck_result =
diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto
index 11b39be32ad..844be33a0d4 100644
--- a/tensorflow/compiler/xla/xla_data.proto
+++ b/tensorflow/compiler/xla/xla_data.proto
@@ -271,6 +271,10 @@ message OpMetadata {
   //
   // This name is often unique within a computation. Note: some frameworks
   // add auto-generated names if the user does not provide one.
+  //
+  // A dummy name may be assigned if op_name is empty in order to keep track of
+  // where op_name first became empty. Dummy names begin with "DUMMY_" and may
+  // include the current HloPassMetadata.pass_id.
   string op_name = 2;
   // Indicate a file and line that this op is associated to in a user's program.
   //
@@ -279,6 +283,11 @@ message OpMetadata {
   int32 source_line = 4;
 
   repeated ProfileType profile_type = 5;
+
+  // HloPassMetadata.pass_id of the pass that created this HLO instruction
+  // object. Should never be copied between HLO instructions. Zero if unset and
+  // -1 if the instruction was created before HLO passes began.
+  int64 creation_pass_id = 6;
 }
 
 // Profile data from the execution of a computation.

From 110199cd2b22e117fcb4940d2566cf7d66d98059 Mon Sep 17 00:00:00 2001
From: River Riddle <riverriddle@google.com>
Date: Fri, 11 Dec 2020 17:53:04 -0800
Subject: [PATCH 12/60] [mlir][NFC] Replace usages or mlir/IR/StandardTypes.h
 with mlir/IR/BuiltinTypes.h

StandardTypes.h was moved to BuiltinTypes.h and is being removed.

PiperOrigin-RevId: 347109615
Change-Id: I670661f20dd34977d9c9defea687a59de1b1f804
---
 .../mlir/tensorflow/transforms/batchmatmul_to_einsum.cc         | 2 +-
 .../compiler/mlir/tensorflow/transforms/collection_ops_util.cc  | 2 +-
 .../mlir/tensorflow/transforms/decode_attributes_hook.cc        | 2 +-
 .../mlir/tensorflow/transforms/decompose_resource_ops.cc        | 2 +-
 tensorflow/compiler/mlir/tensorflow/transforms/einsum.cc        | 2 +-
 tensorflow/compiler/mlir/tensorflow/transforms/einsum.h         | 2 +-
 .../compiler/mlir/tensorflow/transforms/fold_broadcast.cc       | 2 +-
 tensorflow/compiler/mlir/tensorflow/transforms/fold_switch.cc   | 2 +-
 .../tensorflow/transforms/functional_control_flow_to_regions.cc | 2 +-
 tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo.cc  | 2 +-
 tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc      | 2 +-
 .../mlir/tensorflow/transforms/optimize_global_tensors.cc       | 2 +-
 .../mlir/tensorflow/transforms/promote_resources_to_args.cc     | 2 +-
 .../tensorflow/transforms/readonly_references_to_resources.cc   | 2 +-
 .../compiler/mlir/tensorflow/transforms/resource_op_lifting.cc  | 2 +-
 .../mlir/tensorflow/transforms/rewrite_tpu_embedding_ops.cc     | 2 +-
 .../compiler/mlir/tensorflow/transforms/shape_inference.cc      | 2 +-
 .../compiler/mlir/tensorflow/transforms/shape_inference_pass.cc | 2 +-
 .../mlir/tensorflow/transforms/stack_ops_decomposition.cc       | 2 +-
 .../tensorflow/transforms/tensor_array_ops_decomposition.cc     | 2 +-
 .../mlir/tensorflow/transforms/tensor_device_copy_conversion.cc | 2 +-
 .../mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc | 2 +-
 .../compiler/mlir/tensorflow/transforms/tf_data_optimization.cc | 2 +-
 .../mlir/tensorflow/transforms/tpu_dynamic_layout_pass.cc       | 2 +-
 .../tensorflow/transforms/tpu_extract_outside_compilation.cc    | 2 +-
 .../compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc     | 2 +-
 .../mlir/tensorflow/transforms/tpu_space_to_depth_pass.cc       | 2 +-
 .../transforms/tpu_update_embedding_enqueue_op_inputs.cc        | 2 +-
 .../compiler/mlir/tensorflow/transforms/unroll_batch_matmul.cc  | 2 +-
 29 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/batchmatmul_to_einsum.cc b/tensorflow/compiler/mlir/tensorflow/transforms/batchmatmul_to_einsum.cc
index 2a607065cdb..99da136db36 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/batchmatmul_to_einsum.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/batchmatmul_to_einsum.cc
@@ -26,9 +26,9 @@ limitations under the License.
 #include "mlir/Analysis/LoopAnalysis.h"  // from @llvm-project
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/OpImplementation.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/collection_ops_util.cc b/tensorflow/compiler/mlir/tensorflow/transforms/collection_ops_util.cc
index 23ab4ff4a19..98f7efecb99 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/collection_ops_util.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/collection_ops_util.cc
@@ -25,9 +25,9 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/decode_attributes_hook.cc b/tensorflow/compiler/mlir/tensorflow/transforms/decode_attributes_hook.cc
index d309c6d379f..09fac6e0706 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/decode_attributes_hook.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/decode_attributes_hook.cc
@@ -18,8 +18,8 @@ limitations under the License.
 #include "llvm/ADT/ArrayRef.h"
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Dialect.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.cc b/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.cc
index 28a5c583919..7701d962554 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.cc
@@ -15,7 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/mlir/tensorflow/transforms/decompose_resource_ops.h"
 
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/einsum.cc b/tensorflow/compiler/mlir/tensorflow/transforms/einsum.cc
index 35e9e90db3a..6277b1d6057 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/einsum.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/einsum.cc
@@ -33,9 +33,9 @@ limitations under the License.
 #include "mlir/Analysis/LoopAnalysis.h"  // from @llvm-project
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/OpImplementation.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/einsum.h b/tensorflow/compiler/mlir/tensorflow/transforms/einsum.h
index 490fe1ee887..65e0528096e 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/einsum.h
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/einsum.h
@@ -24,11 +24,11 @@ limitations under the License.
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/Casting.h"
 #include "mlir/IR/Attributes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Matchers.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/fold_broadcast.cc b/tensorflow/compiler/mlir/tensorflow/transforms/fold_broadcast.cc
index ce949ef9f5c..b71c9ddced2 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/fold_broadcast.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/fold_broadcast.cc
@@ -21,9 +21,9 @@ limitations under the License.
 #include "llvm/Support/Casting.h"
 #include "mlir/Dialect/Traits.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/fold_switch.cc b/tensorflow/compiler/mlir/tensorflow/transforms/fold_switch.cc
index cc24c98a786..f00c00d99ab 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/fold_switch.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/fold_switch.cc
@@ -34,11 +34,11 @@ limitations under the License.
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Block.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
 #include "mlir/IR/OperationSupport.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_regions.cc b/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_regions.cc
index 0e0e874d5bb..1bf12a1b24a 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_regions.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/functional_control_flow_to_regions.cc
@@ -22,8 +22,8 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/IR/Verifier.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo.cc b/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo.cc
index 9a13c011974..d7c8506957e 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/legalize_hlo.cc
@@ -31,12 +31,12 @@ limitations under the License.
 #include "llvm/Support/raw_ostream.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Matchers.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc
index e48e1553cd7..0f70647b848 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.cc
@@ -20,11 +20,11 @@ limitations under the License.
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "mlir/IR/Attributes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Diagnostics.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Matchers.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeRange.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/optimize_global_tensors.cc b/tensorflow/compiler/mlir/tensorflow/transforms/optimize_global_tensors.cc
index 540527cbf6c..e0e0c45d63a 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/optimize_global_tensors.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/optimize_global_tensors.cc
@@ -23,8 +23,8 @@ limitations under the License.
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/SymbolTable.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/Interfaces/CallInterfaces.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/promote_resources_to_args.cc b/tensorflow/compiler/mlir/tensorflow/transforms/promote_resources_to_args.cc
index ca456451407..9f36c2dd943 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/promote_resources_to_args.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/promote_resources_to_args.cc
@@ -59,7 +59,7 @@ limitations under the License.
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/readonly_references_to_resources.cc b/tensorflow/compiler/mlir/tensorflow/transforms/readonly_references_to_resources.cc
index 4e2de3422c7..55f41682ebb 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/readonly_references_to_resources.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/readonly_references_to_resources.cc
@@ -22,7 +22,7 @@ limitations under the License.
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc
index 306ea1d2b3a..bddec8a8e4a 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/resource_op_lifting.cc
@@ -33,10 +33,10 @@ limitations under the License.
 #include "mlir/IR/BlockAndValueMapping.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Diagnostics.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
 #include "mlir/IR/Region.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/SymbolTable.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/rewrite_tpu_embedding_ops.cc b/tensorflow/compiler/mlir/tensorflow/transforms/rewrite_tpu_embedding_ops.cc
index 8dbcf3e500d..c2b8a078057 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/rewrite_tpu_embedding_ops.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/rewrite_tpu_embedding_ops.cc
@@ -15,7 +15,7 @@ limitations under the License.
 
 #include "llvm/ADT/SmallVector.h"
 #include "mlir/IR/Attributes.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Pass/PassRegistry.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc
index 0882ec6dd38..26b1f4cccb4 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.cc
@@ -34,11 +34,11 @@ limitations under the License.
 #include "mlir/IR/Block.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Diagnostics.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
 #include "mlir/IR/OperationSupport.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/SymbolTable.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/Interfaces/CallInterfaces.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc
index 36f62a7d3e2..8587c1bc7ed 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc b/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc
index f8c9d40bf40..9e68f9a5411 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/stack_ops_decomposition.cc
@@ -27,9 +27,9 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/SymbolTable.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc
index d35b159662e..ee21bb84537 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_array_ops_decomposition.cc
@@ -27,9 +27,9 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/SymbolTable.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_device_copy_conversion.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_device_copy_conversion.cc
index d49cb25f408..20ac207e9ad 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_device_copy_conversion.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_device_copy_conversion.cc
@@ -20,8 +20,8 @@ limitations under the License.
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "mlir/Transforms/Passes.h"
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/OperationSupport.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/Pass/PassOptions.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc
index 21baaec36b5..5a2af8dae7f 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tensor_list_ops_decomposition.cc
@@ -23,7 +23,7 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization.cc
index f2321df9823..10d0bcc90b9 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization.cc
@@ -15,7 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/mlir/tensorflow/transforms/tf_data_optimization.h"
 
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_dynamic_layout_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_dynamic_layout_pass.cc
index 4b61d543c80..39c867c8174 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_dynamic_layout_pass.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_dynamic_layout_pass.cc
@@ -21,11 +21,11 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
 #include "mlir/IR/OperationSupport.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc
index 080b73dfc0d..213919e5785 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_extract_outside_compilation.cc
@@ -25,10 +25,10 @@ limitations under the License.
 #include "llvm/Support/FormatVariadic.h"
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
 #include "mlir/IR/OperationSupport.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeRange.h"  // from @llvm-project
 #include "mlir/IR/Visitors.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc
index edf6bedbb19..ffcc55284f6 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_rewrite_pass.cc
@@ -28,8 +28,8 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Pass/PassRegistry.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_space_to_depth_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_space_to_depth_pass.cc
index 52b3ec4bfd4..80b5e2c6f54 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_space_to_depth_pass.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_space_to_depth_pass.cc
@@ -25,12 +25,12 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
 #include "mlir/IR/OperationSupport.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_update_embedding_enqueue_op_inputs.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_update_embedding_enqueue_op_inputs.cc
index 550c263c444..4328677ab96 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_update_embedding_enqueue_op_inputs.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_update_embedding_enqueue_op_inputs.cc
@@ -19,8 +19,8 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Block.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/IR/Visitors.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/unroll_batch_matmul.cc b/tensorflow/compiler/mlir/tensorflow/transforms/unroll_batch_matmul.cc
index 330e76884d6..e4f70a05a1c 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/unroll_batch_matmul.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/unroll_batch_matmul.cc
@@ -27,9 +27,9 @@ limitations under the License.
 #include "mlir/Analysis/LoopAnalysis.h"  // from @llvm-project
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/OpImplementation.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project

From 9cd11ce78fc35ab39a08059d601ee5f582883cef Mon Sep 17 00:00:00 2001
From: River Riddle <riverriddle@google.com>
Date: Fri, 11 Dec 2020 18:12:13 -0800
Subject: [PATCH 13/60] [mlir][NFC] Replace usages or mlir/IR/StandardTypes.h
 with mlir/IR/BuiltinTypes.h

StandardTypes.h was moved to BuiltinTypes.h and is being removed.

PiperOrigin-RevId: 347111862
Change-Id: Ib2776d81f563b2b8d034f7b8cfd6d363a9b38df1
---
 tensorflow/compiler/mlir/tensorflow/ir/tf_attributes.h     | 2 +-
 tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc        | 2 +-
 tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc      | 2 +-
 tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h       | 2 +-
 tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc           | 2 +-
 tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h            | 2 +-
 tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc       | 2 +-
 tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h        | 2 +-
 tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc       | 2 +-
 tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h        | 2 +-
 tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.cc | 2 +-
 tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.h  | 2 +-
 tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc   | 2 +-
 tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h        | 5 ++---
 tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h         | 2 +-
 tensorflow/compiler/mlir/tensorflow/ir/tf_types.cc         | 2 +-
 tensorflow/compiler/mlir/tensorflow/ir/tf_types.h          | 2 +-
 tensorflow/compiler/mlir/tensorflow/ir/tfrt_ops.cc         | 2 +-
 18 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_attributes.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_attributes.h
index c00b9745571..d93cb3c67b6 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_attributes.h
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_attributes.h
@@ -20,8 +20,8 @@ limitations under the License.
 
 #include "llvm/ADT/StringRef.h"
 #include "mlir/IR/BuiltinAttributes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 
 namespace mlir {
 namespace TF {
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc
index 4253707b3b7..949842070ea 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc
@@ -30,12 +30,12 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinAttributes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/OpDefinition.h"  // from @llvm-project
 #include "mlir/IR/OpImplementation.h"  // from @llvm-project
 #include "mlir/IR/OperationSupport.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/UseDefLists.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc
index 6004f322e83..e24c83e2eaf 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc
@@ -31,13 +31,13 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/DialectImplementation.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Matchers.h"  // from @llvm-project
 #include "mlir/IR/OpDefinition.h"  // from @llvm-project
 #include "mlir/IR/OpImplementation.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h
index 2bc13556b4b..435473634bc 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h
@@ -24,10 +24,10 @@ limitations under the License.
 #include "mlir/Dialect/Traits.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Dialect.h"  // from @llvm-project
 #include "mlir/IR/Matchers.h"  // from @llvm-project
 #include "mlir/IR/OpImplementation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 
 namespace mlir {
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc
index f673484d536..08ec87eccd6 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.cc
@@ -42,6 +42,7 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Diagnostics.h"  // from @llvm-project
 #include "mlir/IR/DialectImplementation.h"  // from @llvm-project
 #include "mlir/IR/Identifier.h"  // from @llvm-project
@@ -51,7 +52,6 @@ limitations under the License.
 #include "mlir/IR/OpDefinition.h"  // from @llvm-project
 #include "mlir/IR/OpImplementation.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h
index e5da007625e..ebb68d74cd0 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h
@@ -23,10 +23,10 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Dialect.h"  // from @llvm-project
 #include "mlir/IR/Matchers.h"  // from @llvm-project
 #include "mlir/IR/OpImplementation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/Interfaces/CallInterfaces.h"  // from @llvm-project
 #include "mlir/Interfaces/ControlFlowInterfaces.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc
index 95b3f962d6f..513f8338343 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc
@@ -44,6 +44,7 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Diagnostics.h"  // from @llvm-project
 #include "mlir/IR/DialectImplementation.h"  // from @llvm-project
 #include "mlir/IR/Identifier.h"  // from @llvm-project
@@ -53,7 +54,6 @@ limitations under the License.
 #include "mlir/IR/OpDefinition.h"  // from @llvm-project
 #include "mlir/IR/OpImplementation.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h
index fe788ac608e..90cd1c2d621 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h
@@ -20,10 +20,10 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Dialect.h"  // from @llvm-project
 #include "mlir/IR/Matchers.h"  // from @llvm-project
 #include "mlir/IR/OpImplementation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/Interfaces/CallInterfaces.h"  // from @llvm-project
 #include "mlir/Interfaces/ControlFlowInterfaces.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc
index a2cbac6a4d2..5d681295f61 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc
@@ -45,6 +45,7 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Diagnostics.h"  // from @llvm-project
 #include "mlir/IR/DialectImplementation.h"  // from @llvm-project
 #include "mlir/IR/Identifier.h"  // from @llvm-project
@@ -54,7 +55,6 @@ limitations under the License.
 #include "mlir/IR/OpDefinition.h"  // from @llvm-project
 #include "mlir/IR/OpImplementation.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h
index 353c1c6f1d4..eef1b6c2606 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.h
@@ -20,10 +20,10 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Dialect.h"  // from @llvm-project
 #include "mlir/IR/Matchers.h"  // from @llvm-project
 #include "mlir/IR/OpImplementation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/Interfaces/CallInterfaces.h"  // from @llvm-project
 #include "mlir/Interfaces/ControlFlowInterfaces.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.cc
index e5162b0cd5c..70282b571db 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.cc
@@ -42,6 +42,7 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Diagnostics.h"  // from @llvm-project
 #include "mlir/IR/DialectImplementation.h"  // from @llvm-project
 #include "mlir/IR/Identifier.h"  // from @llvm-project
@@ -51,7 +52,6 @@ limitations under the License.
 #include "mlir/IR/OpDefinition.h"  // from @llvm-project
 #include "mlir/IR/OpImplementation.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.h
index 01a93e189d2..62caa9c46f5 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.h
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_remaining_ops.h
@@ -20,9 +20,9 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Matchers.h"  // from @llvm-project
 #include "mlir/IR/OpImplementation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/Interfaces/CallInterfaces.h"  // from @llvm-project
 #include "mlir/Interfaces/DerivedAttributeOpInterface.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc
index cc8d5248e0b..85cb8edb8c7 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc
@@ -25,10 +25,10 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Identifier.h"  // from @llvm-project
 #include "mlir/IR/OpImplementation.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/SymbolTable.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h
index b90bf2d47a8..98d2a49bb2e 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h
@@ -19,14 +19,13 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_STRUCTS_H_
 
 #include "llvm/ADT/StringMap.h"
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Diagnostics.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
-#include "tensorflow/core/util/device_name_utils.h"
-
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h.inc"
+#include "tensorflow/core/util/device_name_utils.h"
 
 namespace mlir {
 namespace TF {
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h
index aef3c538bc8..5d9013edfa1 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h
@@ -18,8 +18,8 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_TRAITS_H_
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_TRAITS_H_
 
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/OpDefinition.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/Interfaces/SideEffectInterfaces.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_types.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_types.cc
index 86369b993be..fd2b18a0492 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_types.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_types.cc
@@ -17,8 +17,8 @@ limitations under the License.
 
 #include "llvm/Support/ErrorHandling.h"
 #include "mlir/Dialect/Traits.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Dialect.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 
 namespace {
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_types.h b/tensorflow/compiler/mlir/tensorflow/ir/tf_types.h
index 1d3ca0c4a60..52021a28293 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_types.h
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_types.h
@@ -18,10 +18,10 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_TYPES_H_
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_TYPES_H_
 
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Diagnostics.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tfrt_ops.cc b/tensorflow/compiler/mlir/tensorflow/ir/tfrt_ops.cc
index 6a6a7574f29..69a1bf0c22c 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tfrt_ops.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tfrt_ops.cc
@@ -15,7 +15,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/mlir/tensorflow/ir/tfrt_ops.h"
 
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 

From 41048da313017d166b7bd9775a3bf51b2326aad2 Mon Sep 17 00:00:00 2001
From: River Riddle <riverriddle@google.com>
Date: Fri, 11 Dec 2020 18:12:14 -0800
Subject: [PATCH 14/60] [mlir][NFC] Replace usages or mlir/IR/StandardTypes.h
 with mlir/IR/BuiltinTypes.h

StandardTypes.h was moved to BuiltinTypes.h and is being removed.

PiperOrigin-RevId: 347111863
Change-Id: If59c759ff321cd041aa707d8d22d60832a15adca
---
 tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc      | 2 +-
 tensorflow/compiler/mlir/hlo/lib/utils/convert_op_folder.cc    | 2 +-
 .../mlir/lite/experimental/estimators/arithmetic_count_util.h  | 2 +-
 tensorflow/compiler/mlir/lite/ir/tfl_ops.cc                    | 2 +-
 tensorflow/compiler/mlir/lite/ir/tfl_ops.h                     | 2 +-
 .../compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc | 2 +-
 tensorflow/compiler/mlir/python/mlir_wrapper/types.cc          | 2 +-
 .../mlir/tensorflow/analysis/resource_alias_analysis.cc        | 2 +-
 .../compiler/mlir/tensorflow/analysis/side_effect_analysis.cc  | 2 +-
 .../mlir/tensorflow/c/c_api_unified_experimental_mlir.cc       | 2 +-
 tensorflow/compiler/mlir/tensorflow/translate/import_model.cc  | 2 +-
 .../compiler/mlir/tensorflow/translate/tf_mlir_translate.cc    | 2 +-
 tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.h                    | 3 +--
 tensorflow/compiler/mlir/tfjs/transforms/optimize.cc           | 2 +-
 tensorflow/compiler/mlir/tfr/integration/tfr_decompose_ctx.cc  | 2 +-
 .../compiler/mlir/tfr/integration/tfr_decompose_ctx_test.cc    | 2 +-
 tensorflow/compiler/mlir/tfr/passes/decompose.cc               | 2 +-
 tensorflow/compiler/mlir/tfr/passes/raise_to_tf.cc             | 2 +-
 .../compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h       | 2 +-
 tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.cc            | 2 +-
 20 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc b/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc
index 71b1a4e164f..bdd66a18296 100644
--- a/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc
+++ b/tensorflow/compiler/mlir/hlo/lib/utils/broadcast_utils.cc
@@ -21,8 +21,8 @@ limitations under the License.
 #include "llvm/ADT/SmallVector.h"
 #include "mlir/Dialect/Shape/IR/Shape.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/StandardTypes.h"
 
 namespace mlir {
 namespace hlo {
diff --git a/tensorflow/compiler/mlir/hlo/lib/utils/convert_op_folder.cc b/tensorflow/compiler/mlir/hlo/lib/utils/convert_op_folder.cc
index a29f0a628c4..f7177ecc473 100644
--- a/tensorflow/compiler/mlir/hlo/lib/utils/convert_op_folder.cc
+++ b/tensorflow/compiler/mlir/hlo/lib/utils/convert_op_folder.cc
@@ -18,7 +18,7 @@ limitations under the License.
 #include "mlir-hlo/utils/convert_op_folder.h"
 
 #include "mlir/IR/Attributes.h"
-#include "mlir/IR/StandardTypes.h"
+#include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/TypeUtilities.h"
 
 namespace mlir {
diff --git a/tensorflow/compiler/mlir/lite/experimental/estimators/arithmetic_count_util.h b/tensorflow/compiler/mlir/lite/experimental/estimators/arithmetic_count_util.h
index 782714f5955..59bb7ae4e0a 100644
--- a/tensorflow/compiler/mlir/lite/experimental/estimators/arithmetic_count_util.h
+++ b/tensorflow/compiler/mlir/lite/experimental/estimators/arithmetic_count_util.h
@@ -15,8 +15,8 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_LITE_EXPERIMENTAL_ESTIMATORS_ARITHMETIC_COUNT_UTIL_H_
 #define TENSORFLOW_COMPILER_MLIR_LITE_EXPERIMENTAL_ESTIMATORS_ARITHMETIC_COUNT_UTIL_H_
 
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 
 // For add/mul/div/sub and other broadcastable ops.
 class ArithmeticCountUtilHelper {
diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc
index 0230b7cf9cd..377194188b0 100644
--- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc
+++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.cc
@@ -31,11 +31,11 @@ limitations under the License.
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/Matchers.h"  // from @llvm-project
 #include "mlir/IR/OpImplementation.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/lite/ir/tfl_ops.h b/tensorflow/compiler/mlir/lite/ir/tfl_ops.h
index 589f18d789d..74fb98aa6fc 100644
--- a/tensorflow/compiler/mlir/lite/ir/tfl_ops.h
+++ b/tensorflow/compiler/mlir/lite/ir/tfl_ops.h
@@ -22,9 +22,9 @@ limitations under the License.
 #include "mlir/Dialect/Traits.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Dialect.h"  // from @llvm-project
 #include "mlir/IR/OpImplementation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/Interfaces/DerivedAttributeOpInterface.h"  // from @llvm-project
 #include "mlir/Interfaces/InferTypeOpInterface.h"  // from @llvm-project
 #include "mlir/Interfaces/LoopLikeInterface.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc b/tensorflow/compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc
index 1fa3f04f558..52905f385b1 100644
--- a/tensorflow/compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc
+++ b/tensorflow/compiler/mlir/lite/python/saved_model_to_tfl_flatbuffer.cc
@@ -21,8 +21,8 @@ limitations under the License.
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Support/FileUtilities.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/python/mlir_wrapper/types.cc b/tensorflow/compiler/mlir/python/mlir_wrapper/types.cc
index be2dc2065f3..8ea311275d7 100644
--- a/tensorflow/compiler/mlir/python/mlir_wrapper/types.cc
+++ b/tensorflow/compiler/mlir/python/mlir_wrapper/types.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/python/mlir_wrapper/mlir_wrapper.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 
diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.cc b/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.cc
index 4f0a2581c99..7b0f402415c 100644
--- a/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.cc
+++ b/tensorflow/compiler/mlir/tensorflow/analysis/resource_alias_analysis.cc
@@ -30,8 +30,8 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Block.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/IR/Visitors.h"  // from @llvm-project
 #include "mlir/Interfaces/CallInterfaces.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc
index 71ab5285b64..de7861c19b1 100644
--- a/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc
+++ b/tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.cc
@@ -31,10 +31,10 @@ limitations under the License.
 #include "mlir/IR/Block.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
 #include "mlir/IR/OperationSupport.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/Interfaces/SideEffectInterfaces.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/c/c_api_unified_experimental_mlir.cc b/tensorflow/compiler/mlir/tensorflow/c/c_api_unified_experimental_mlir.cc
index 82c23d2f42e..84d9e648d04 100644
--- a/tensorflow/compiler/mlir/tensorflow/c/c_api_unified_experimental_mlir.cc
+++ b/tensorflow/compiler/mlir/tensorflow/c/c_api_unified_experimental_mlir.cc
@@ -23,11 +23,11 @@ limitations under the License.
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
 #include "mlir/IR/OperationSupport.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/Pass/PassManager.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc
index 53c7355a3b0..3099554f5c7 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc
@@ -50,12 +50,12 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Diagnostics.h"  // from @llvm-project
 #include "mlir/IR/Identifier.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/OpDefinition.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/Verifier.h"  // from @llvm-project
 #include "mlir/Pass/PassManager.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc
index dd19327102d..496bf83cdc9 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc
@@ -19,10 +19,10 @@ limitations under the License.
 #include "llvm/Support/raw_ostream.h"
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Identifier.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/Parser.h"  // from @llvm-project
 #include "tensorflow/cc/saved_model/bundle_v2.h"
 #include "tensorflow/cc/saved_model/reader.h"
diff --git a/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.h b/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.h
index bc52e3a0c7a..4e2c5f6a678 100644
--- a/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.h
+++ b/tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.h
@@ -23,12 +23,11 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_MLIR_TFJS_IR_TFJS_OPS_H_
 
 #include "mlir/Dialect/Traits.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Dialect.h"  // from @llvm-project
 #include "mlir/IR/OpImplementation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/Interfaces/SideEffectInterfaces.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
-
 #include "tensorflow/compiler/mlir/tfjs/ir/tfjs_dialect.h.inc"
 
 #define GET_OP_CLASSES
diff --git a/tensorflow/compiler/mlir/tfjs/transforms/optimize.cc b/tensorflow/compiler/mlir/tfjs/transforms/optimize.cc
index 04811ff8ede..353e9618db1 100644
--- a/tensorflow/compiler/mlir/tfjs/transforms/optimize.cc
+++ b/tensorflow/compiler/mlir/tfjs/transforms/optimize.cc
@@ -20,9 +20,9 @@ limitations under the License.
 
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Matchers.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tfr/integration/tfr_decompose_ctx.cc b/tensorflow/compiler/mlir/tfr/integration/tfr_decompose_ctx.cc
index a3b0193449a..36cb8c06196 100644
--- a/tensorflow/compiler/mlir/tfr/integration/tfr_decompose_ctx.cc
+++ b/tensorflow/compiler/mlir/tfr/integration/tfr_decompose_ctx.cc
@@ -28,10 +28,10 @@ limitations under the License.
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Identifier.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/Verifier.h"  // from @llvm-project
 #include "mlir/Parser.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tfr/integration/tfr_decompose_ctx_test.cc b/tensorflow/compiler/mlir/tfr/integration/tfr_decompose_ctx_test.cc
index 1bd8eefbdd4..d451bea8147 100644
--- a/tensorflow/compiler/mlir/tfr/integration/tfr_decompose_ctx_test.cc
+++ b/tensorflow/compiler/mlir/tfr/integration/tfr_decompose_ctx_test.cc
@@ -19,9 +19,9 @@ limitations under the License.
 
 #include "absl/types/span.h"
 #include "mlir/IR/Builders.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Dialect.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "tensorflow/compiler/xla/test.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
diff --git a/tensorflow/compiler/mlir/tfr/passes/decompose.cc b/tensorflow/compiler/mlir/tfr/passes/decompose.cc
index d910e97231f..13d5f45e0ab 100644
--- a/tensorflow/compiler/mlir/tfr/passes/decompose.cc
+++ b/tensorflow/compiler/mlir/tfr/passes/decompose.cc
@@ -35,8 +35,8 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/SymbolTable.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/IR/Visitors.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tfr/passes/raise_to_tf.cc b/tensorflow/compiler/mlir/tfr/passes/raise_to_tf.cc
index 9c8f68086b0..7ffcd4c7b11 100644
--- a/tensorflow/compiler/mlir/tfr/passes/raise_to_tf.cc
+++ b/tensorflow/compiler/mlir/tfr/passes/raise_to_tf.cc
@@ -35,11 +35,11 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Matchers.h"  // from @llvm-project
 #include "mlir/IR/OperationSupport.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/SymbolTable.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h
index 30cefaa119c..c8f8439decc 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h
@@ -19,11 +19,11 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_MLIR_TOOLS_KERNEL_GEN_IR_TF_FRAMEWORK_OPS_H_
 
 #include "mlir/IR/Attributes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Dialect.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/OpDefinition.h"  // from @llvm-project
 #include "mlir/IR/OpImplementation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/Interfaces/SideEffectInterfaces.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_status.h.inc"
 #include "tensorflow/core/protobuf/error_codes.pb.h"
diff --git a/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.cc b/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.cc
index d6fd6600fa9..2f1320a548f 100644
--- a/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.cc
+++ b/tensorflow/compiler/mlir/xla/ir/mlir_hlo_builder.cc
@@ -17,7 +17,7 @@ limitations under the License.
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/raw_ostream.h"
 #include "mlir/IR/Builders.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
 #include "tensorflow/compiler/mlir/xla/attribute_importer.h"
 #include "tensorflow/compiler/mlir/xla/hlo_function_importer.h"

From 4b4893e25b954eecef0cac1cebf4d3d21ba0679f Mon Sep 17 00:00:00 2001
From: Frank Chen <frankchn@google.com>
Date: Fri, 11 Dec 2020 18:20:04 -0800
Subject: [PATCH 15/60] Fix API initializer bug: request TPU library initialize
 itself during loading

PiperOrigin-RevId: 347112603
Change-Id: I3951680b9c7873e507363dd40da49da9e9764b4c
---
 tensorflow/core/tpu/tpu_api_dlsym_initializer.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/tpu/tpu_api_dlsym_initializer.cc b/tensorflow/core/tpu/tpu_api_dlsym_initializer.cc
index 6a9ef76c6ee..4d3f8131ab4 100644
--- a/tensorflow/core/tpu/tpu_api_dlsym_initializer.cc
+++ b/tensorflow/core/tpu/tpu_api_dlsym_initializer.cc
@@ -52,7 +52,7 @@ Status InitializeTpuLibrary(void* library_handle) {
     void (*initialize_fn)(bool init_library, int argc, char** argv);
     initialize_fn = reinterpret_cast<decltype(initialize_fn)>(
         dlsym(library_handle, "TfTpu_Initialize"));
-    (*initialize_fn)(/*init_library=*/false, /*argc=*/0, /*argv=*/nullptr);
+    (*initialize_fn)(/*init_library=*/true, /*argc=*/0, /*argv=*/nullptr);
 
     RegisterTpuPlatform();
     RegisterTpuSystemDevice();

From 14a0a90f9b1c806d4e99ef8aa480a043340fba39 Mon Sep 17 00:00:00 2001
From: Stella Laurenzo <laurenzo@google.com>
Date: Fri, 11 Dec 2020 18:29:41 -0800
Subject: [PATCH 16/60] NFC: Simplify Tosa build and layering.

* Eliminate some intermediate source files, collapsing into two user facing libraries.
* Fix layering of dependencies so user libraries are self contained.
* Take a pass through and eliminate redundant includes.
* Separate TF and TFL passes as the latter can be built with much fewer dependencies (and they will diverge with respect to some of the things that were common).

PiperOrigin-RevId: 347113596
Change-Id: Ie5d0016750d7be49cd52ee06d349d798b177c217
---
 tensorflow/compiler/mlir/BUILD                |   4 +-
 tensorflow/compiler/mlir/tosa/BUILD           | 292 +++++-------------
 tensorflow/compiler/mlir/tosa/tf_passes.cc    |  64 ++++
 .../tosa/{tosa_passpipes.h => tf_passes.h}    |  20 +-
 .../tosa/{tosa_passpipes.cc => tfl_passes.cc} |  56 ++--
 .../{tf_tosa_pipeline.cc => tfl_passes.h}     |  20 +-
 .../compiler/mlir/tosa/tfl_tosa_pipeline.cc   |  29 --
 .../mlir/tosa/transforms/convert_tfl_uint8.cc |  27 +-
 .../mlir/tosa/transforms/fuse_bias_tf.cc      |  21 +-
 .../mlir/tosa/transforms/legalize_common.cc   |  10 +-
 .../mlir/tosa/transforms/legalize_common.h    |  32 +-
 .../mlir/tosa/transforms/legalize_tf.cc       |  27 +-
 .../mlir/tosa/transforms/legalize_tfl.cc      |  30 +-
 .../mlir/tosa/transforms/legalize_utils.cc    |   3 +-
 .../mlir/tosa/transforms/legalize_utils.h     |  22 +-
 .../compiler/mlir/tosa/transforms/passes.h    |   7 +-
 .../mlir/tosa/transforms/register_passes.h    |  34 --
 17 files changed, 222 insertions(+), 476 deletions(-)
 create mode 100644 tensorflow/compiler/mlir/tosa/tf_passes.cc
 rename tensorflow/compiler/mlir/tosa/{tosa_passpipes.h => tf_passes.h} (65%)
 rename tensorflow/compiler/mlir/tosa/{tosa_passpipes.cc => tfl_passes.cc} (56%)
 rename tensorflow/compiler/mlir/tosa/{tf_tosa_pipeline.cc => tfl_passes.h} (57%)
 delete mode 100644 tensorflow/compiler/mlir/tosa/tfl_tosa_pipeline.cc
 delete mode 100644 tensorflow/compiler/mlir/tosa/transforms/register_passes.h

diff --git a/tensorflow/compiler/mlir/BUILD b/tensorflow/compiler/mlir/BUILD
index f839acd32a2..405471ab1e4 100644
--- a/tensorflow/compiler/mlir/BUILD
+++ b/tensorflow/compiler/mlir/BUILD
@@ -112,8 +112,8 @@ cc_library(
         "//tensorflow/compiler/mlir/tensorflow:tf_dialect_passes",
         "//tensorflow/compiler/mlir/tensorflow:tf_legalize_hlo",
         "//tensorflow/compiler/mlir/tfjs:tensorflow_js_passes",
-        "//tensorflow/compiler/mlir/tosa:tf_tosa_passes",
-        "//tensorflow/compiler/mlir/tosa:tfl_tosa_passes",
+        "//tensorflow/compiler/mlir/tosa:tf_passes",
+        "//tensorflow/compiler/mlir/tosa:tfl_passes",
     ],
 )
 
diff --git a/tensorflow/compiler/mlir/tosa/BUILD b/tensorflow/compiler/mlir/tosa/BUILD
index cc423b81b6f..9032a409cc0 100644
--- a/tensorflow/compiler/mlir/tosa/BUILD
+++ b/tensorflow/compiler/mlir/tosa/BUILD
@@ -8,7 +8,7 @@ load("//third_party/mlir:tblgen.bzl", "gentbl")
 
 # TODO: Tighten visibility once targets are at the right granularity.
 package(
-    default_visibility = [":friends"],
+    default_visibility = [":internal"],
     licenses = ["notice"],  # Apache 2.0
 )
 
@@ -33,17 +33,13 @@ package_group(
 filegroup(
     name = "tosa_ops_td_files",
     srcs = [
-        "@llvm-project//mlir:TdFiles",
+        "@llvm-project//mlir:TosaDialectTdFiles",
     ],
-    # TODO: Switch to pruned list of TD files once build file changes land.
-    # srcs = [
-    #     "@llvm-project//mlir:TosaDialectTdFiles",
-    # ],
     compatible_with = get_compatible_with_cloud(),
 )
 
 gentbl(
-    name = "tosa_pass_inc_gen",
+    name = "tosa_passes_inc_gen",
     compatible_with = get_compatible_with_cloud(),
     tbl_outs = [
         (
@@ -58,6 +54,40 @@ gentbl(
     ],
 )
 
+cc_library(
+    name = "passes_header",
+    hdrs = [
+        "transforms/passes.h",
+        "transforms/passes.h.inc",
+    ],
+    compatible_with = get_compatible_with_cloud(),
+    deps = ["@llvm-project//mlir:Pass"],
+)
+
+cc_library(
+    name = "legalize_common",
+    srcs = [
+        "transforms/legalize_common.cc",
+        "transforms/legalize_utils.cc",
+    ],
+    hdrs = [
+        "transforms/legalize_common.h",
+        "transforms/legalize_utils.h",
+    ],
+    compatible_with = get_compatible_with_cloud(),
+    deps = [
+        "//tensorflow/compiler/mlir/lite:tensorflow_lite",
+        "//tensorflow/core:framework",
+        "//tensorflow/core/kernels:conv_grad_shape_utils",
+        "@llvm-project//llvm:Support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:QuantOps",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:TosaDialect",
+    ],
+    alwayslink = 1,
+)
+
 gentbl(
     name = "tosa_legalize_tf_inc_gen",
     compatible_with = get_compatible_with_cloud(),
@@ -76,6 +106,36 @@ gentbl(
     ],
 )
 
+cc_library(
+    name = "tf_passes",
+    srcs = [
+        "tf_passes.cc",
+        "transforms/fuse_bias_tf.cc",
+        "transforms/legalize_tf.cc",
+        "transforms/tf_legalize_patterns.inc",
+    ],
+    hdrs = [
+        "tf_passes.h",
+        "transforms/passes.h",
+    ],
+    compatible_with = get_compatible_with_cloud(),
+    visibility = [":friends"],
+    deps = [
+        ":legalize_common",
+        ":passes_header",
+        "//tensorflow/compiler/mlir/tensorflow",
+        "@llvm-project//llvm:Support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Pass",
+        "@llvm-project//mlir:QuantOps",
+        "@llvm-project//mlir:StandardOps",
+        "@llvm-project//mlir:Support",
+        "@llvm-project//mlir:TosaDialect",
+        "@llvm-project//mlir:Transforms",
+    ],
+    alwayslink = 1,
+)
+
 gentbl(
     name = "tosa_legalize_tfl_inc_gen",
     compatible_with = get_compatible_with_cloud(),
@@ -95,233 +155,31 @@ gentbl(
 )
 
 cc_library(
-    name = "tosa_legalize_tf",
-    srcs = [
-        "transforms/legalize_tf.cc",
-        "transforms/tf_legalize_patterns.inc",
-    ],
-    hdrs = [
-        "transforms/legalize_common.h",
-        "transforms/legalize_utils.h",
-        "transforms/passes.h",
-        "@llvm-project//mlir:include/mlir/Transforms/InliningUtils.h",
-    ],
-    compatible_with = get_compatible_with_cloud(),
-    deps = [
-        ":tosa_legalize_tf_inc_gen",
-        ":tosa_pass_inc_gen",
-        "//tensorflow/compiler/mlir/tensorflow",
-        "//tensorflow/compiler/mlir/tensorflow:convert_graphdef",
-        "//tensorflow/compiler/mlir/tensorflow:tensorflow_all_ops_inc_gen",
-        "//tensorflow/compiler/mlir/tensorflow:translate_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core/kernels:conv_grad_shape_utils",
-        "@com_google_absl//absl/container:flat_hash_map",
-        "@com_google_absl//absl/memory",
-        "@flatbuffers",
-        "@llvm-project//llvm:Support",
-        "@llvm-project//mlir:Analysis",
-        "@llvm-project//mlir:Dialect",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:Parser",
-        "@llvm-project//mlir:Pass",
-        "@llvm-project//mlir:QuantOps",
-        "@llvm-project//mlir:StandardOps",
-        "@llvm-project//mlir:Support",
-        "@llvm-project//mlir:TosaDialect",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-    alwayslink = 1,
-)
-
-cc_library(
-    name = "tosa_legalize_tfl",
+    name = "tfl_passes",
     srcs = [
+        "tfl_passes.cc",
+        "transforms/convert_tfl_uint8.cc",
         "transforms/legalize_tfl.cc",
         "transforms/tfl_legalize_patterns.inc",
     ],
     hdrs = [
-        "transforms/legalize_common.h",
-        "transforms/legalize_utils.h",
+        "tfl_passes.h",
         "transforms/passes.h",
-        "//tensorflow/compiler/mlir/lite/quantization:quantization_traits.h",
-        "@llvm-project//mlir:include/mlir/Transforms/InliningUtils.h",
     ],
     compatible_with = get_compatible_with_cloud(),
+    visibility = [":friends"],
     deps = [
-        ":tosa_legalize_tfl_inc_gen",
-        ":tosa_pass_inc_gen",
+        ":legalize_common",
+        ":passes_header",
         "//tensorflow/compiler/mlir/lite:tensorflow_lite",
-        "//tensorflow/compiler/mlir/lite:tensorflow_lite_ops_inc_gen",
-        "//tensorflow/compiler/mlir/lite:validators",
-        "//tensorflow/compiler/mlir/lite/quantization:quantization_lib",
-        "//tensorflow/compiler/mlir/tensorflow:convert_graphdef",
-        "//tensorflow/compiler/mlir/tensorflow:translate_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core/kernels:conv_grad_shape_utils",
-        "//tensorflow/lite/schema:schema_fbs",
-        "@com_google_absl//absl/container:flat_hash_map",
-        "@com_google_absl//absl/memory",
-        "@flatbuffers",
         "@llvm-project//llvm:Support",
-        "@llvm-project//mlir:Analysis",
-        "@llvm-project//mlir:Dialect",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:Parser",
-        "@llvm-project//mlir:Pass",
-        "@llvm-project//mlir:QuantOps",
-        "@llvm-project//mlir:StandardOps",
-        "@llvm-project//mlir:Support",
-        "@llvm-project//mlir:TosaDialect",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-    alwayslink = 1,
-)
-
-cc_library(
-    name = "tosa_legalize_common",
-    srcs = [
-        "transforms/legalize_common.cc",
-        "transforms/legalize_utils.cc",
-        "transforms/tf_legalize_patterns.inc",
-    ],
-    hdrs = [
-        "transforms/legalize_common.h",
-        "transforms/legalize_utils.h",
-        "@llvm-project//mlir:include/mlir/Transforms/InliningUtils.h",
-    ],
-    compatible_with = get_compatible_with_cloud(),
-    deps = [
-        "//tensorflow/compiler/mlir/lite:tensorflow_lite",
-        "//tensorflow/compiler/mlir/lite:tensorflow_lite_ops_inc_gen",
-        "//tensorflow/compiler/mlir/lite:validators",
-        "//tensorflow/compiler/mlir/tensorflow:convert_graphdef",
-        "//tensorflow/compiler/mlir/tensorflow:tensorflow_all_ops_inc_gen",
-        "//tensorflow/compiler/mlir/tensorflow:translate_lib",
-        "//tensorflow/core:framework",
-        "//tensorflow/core:lib",
-        "//tensorflow/core/kernels:conv_grad_shape_utils",
-        "@com_google_absl//absl/container:flat_hash_map",
-        "@com_google_absl//absl/memory",
-        "@flatbuffers",
-        "@llvm-project//llvm:Support",
-        "@llvm-project//mlir:Analysis",
-        "@llvm-project//mlir:Dialect",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:Parser",
-        "@llvm-project//mlir:Pass",
-        "@llvm-project//mlir:QuantOps",
-        "@llvm-project//mlir:StandardOps",
-        "@llvm-project//mlir:Support",
-        "@llvm-project//mlir:TosaDialect",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-    alwayslink = 1,
-)
-
-cc_library(
-    name = "tosa_fuse_bias_tf",
-    srcs = [
-        "transforms/fuse_bias_tf.cc",
-    ],
-    hdrs = [
-        "transforms/passes.h",
-        "@llvm-project//mlir:include/mlir/Transforms/InliningUtils.h",
-    ],
-    compatible_with = get_compatible_with_cloud(),
-    deps = [
-        ":tosa_legalize_common",
-        ":tosa_pass_inc_gen",
-        "//tensorflow/compiler/mlir/tensorflow",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:Pass",
-        "@llvm-project//mlir:StandardOps",
-        "@llvm-project//mlir:Support",
-        "@llvm-project//mlir:TosaDialect",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-    alwayslink = 1,
-)
-
-cc_library(
-    name = "tosa_convert_tfl_uint8",
-    srcs = [
-        "transforms/convert_tfl_uint8.cc",
-    ],
-    hdrs = [
-        "transforms/passes.h",
-        "@llvm-project//mlir:include/mlir/Transforms/InliningUtils.h",
-    ],
-    compatible_with = get_compatible_with_cloud(),
-    deps = [
-        ":tosa_legalize_common",
-        ":tosa_pass_inc_gen",
-        "//tensorflow/compiler/mlir/lite:tensorflow_lite",
         "@llvm-project//mlir:IR",
         "@llvm-project//mlir:Pass",
         "@llvm-project//mlir:QuantOps",
         "@llvm-project//mlir:StandardOps",
         "@llvm-project//mlir:Support",
         "@llvm-project//mlir:TosaDialect",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-    alwayslink = 1,
-)
-
-cc_library(
-    name = "tosa_pipelines",
-    srcs = [
-        "tosa_passpipes.cc",
-    ],
-    hdrs = [
-        "tosa_passpipes.h",
-        "transforms/passes.h",
-        "transforms/register_passes.h",
-    ],
-    compatible_with = get_compatible_with_cloud(),
-    deps = [
-        ":tosa_pass_inc_gen",
-        "@llvm-project//llvm:Support",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:Pass",
-        "@llvm-project//mlir:TosaDialect",
-        "@llvm-project//mlir:TransformUtils",
-    ],
-    alwayslink = 1,
-)
-
-cc_library(
-    name = "tf_tosa_passes",
-    srcs = [
-        "tf_tosa_pipeline.cc",
-    ],
-    hdrs = [
-    ],
-    compatible_with = get_compatible_with_cloud(),
-    deps = [
-        ":tosa_fuse_bias_tf",
-        ":tosa_legalize_common",
-        ":tosa_legalize_tf",
-        ":tosa_pipelines",
-    ],
-    alwayslink = 1,
-)
-
-cc_library(
-    name = "tfl_tosa_passes",
-    srcs = [
-        "tfl_tosa_pipeline.cc",
-    ],
-    hdrs = [
-    ],
-    compatible_with = get_compatible_with_cloud(),
-    deps = [
-        ":tosa_convert_tfl_uint8",
-        ":tosa_legalize_common",
-        ":tosa_legalize_tfl",
-        ":tosa_pipelines",
+        "@llvm-project//mlir:Transforms",
     ],
     alwayslink = 1,
 )
diff --git a/tensorflow/compiler/mlir/tosa/tf_passes.cc b/tensorflow/compiler/mlir/tosa/tf_passes.cc
new file mode 100644
index 00000000000..fadf7e54580
--- /dev/null
+++ b/tensorflow/compiler/mlir/tosa/tf_passes.cc
@@ -0,0 +1,64 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/mlir/tosa/tf_passes.h"
+
+#include "mlir/Dialect/Tosa/Transforms/Passes.h"  // from @llvm-project
+#include "mlir/Pass/PassRegistry.h"  // from @llvm-project
+#include "mlir/Transforms/Passes.h"  // from @llvm-project
+#include "tensorflow/compiler/mlir/tosa/transforms/passes.h"
+
+namespace mlir {
+namespace tosa {
+
+void createTFtoTOSALegalizationPipeline(
+    OpPassManager& pm, const TOSATFLegalizationPipelineOptions& opts) {
+  //----------------------------------------------------------------------------
+  // Prepare TFL module for conversion
+  //----------------------------------------------------------------------------
+  // Inline all functions into main and then delete the functions themselves.
+  pm.addPass(mlir::createInlinerPass());
+
+  // Now that there is only one function, run some MLIR passes on it.
+  pm.addPass(mlir::createCanonicalizerPass());
+  pm.addPass(mlir::createCSEPass());
+
+  pm.addPass(mlir::createLoopFusionPass());
+  pm.addPass(mlir::createMemRefDataFlowOptPass());
+
+  //----------------------------------------------------------------------------
+  // Perform main conversion.
+  // Now that there is only one function, run some MLIR passes on it.
+  //----------------------------------------------------------------------------
+  pm.addPass(mlir::tosa::createFuseBiasTFPass());
+  pm.addPass(mlir::tosa::createLegalizeTFPass());
+
+  //----------------------------------------------------------------------------
+  // Post conversion cleanup.
+  //----------------------------------------------------------------------------
+  pm.addPass(mlir::tosa::createTosaMakeBroadcastablePass());
+  // Inline the call/return basic blocks within TOSA control flow ops.
+  pm.addPass(mlir::createInlinerPass());
+  // Clean up with DCE.
+  pm.addPass(mlir::createSymbolDCEPass());
+}
+
+static mlir::PassPipelineRegistration<TOSATFLegalizationPipelineOptions>
+    tf_tosa_pipeline("tf-to-tosa-pipeline",
+                     "TensorFlow to TOSA legalization pipeline",
+                     createTFtoTOSALegalizationPipeline);
+
+}  // namespace tosa
+}  // namespace mlir
diff --git a/tensorflow/compiler/mlir/tosa/tosa_passpipes.h b/tensorflow/compiler/mlir/tosa/tf_passes.h
similarity index 65%
rename from tensorflow/compiler/mlir/tosa/tosa_passpipes.h
rename to tensorflow/compiler/mlir/tosa/tf_passes.h
index eee7e634a12..18d11cde4d3 100644
--- a/tensorflow/compiler/mlir/tosa/tosa_passpipes.h
+++ b/tensorflow/compiler/mlir/tosa/tf_passes.h
@@ -16,28 +16,20 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TOSA_TOSA_PASSES_H
 #define TENSORFLOW_COMPILER_MLIR_TOSA_TOSA_PASSES_H
 
-#include "mlir/Dialect/Tosa/Transforms/Passes.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/Pass/PassManager.h"
-#include "llvm/ADT/Optional.h"
-#include "tensorflow/compiler/mlir/tosa/transforms/passes.h"
+#include "mlir/Pass/PassManager.h"  // from @llvm-project
+#include "mlir/Pass/PassOptions.h"  // from @llvm-project
 
 namespace mlir {
-
 namespace tosa {
 
-void addPreOptMlirPasses(mlir::OpPassManager& pm);
-
-void addPostOptMlirPasses(mlir::OpPassManager& pm);
+struct TOSATFLegalizationPipelineOptions
+    : public PassPipelineOptions<TOSATFLegalizationPipelineOptions> {};
 
+// Legalizes TF dialect(s) to Tosa.
 void createTFtoTOSALegalizationPipeline(
-    OpPassManager& pm, const TOSALegalizationPipelineOptions& opts);
-
-void createTFLtoTOSALegalizationPipeline(
-    OpPassManager& pm, const TOSALegalizationPipelineOptions& opts);
+    OpPassManager& pm, const TOSATFLegalizationPipelineOptions& opts);
 
 }  // namespace tosa
-
 }  // namespace mlir
 
 #endif  // TENSORFLOW_COMPILER_MLIR_TOSA_TOSA_PASSES_H
diff --git a/tensorflow/compiler/mlir/tosa/tosa_passpipes.cc b/tensorflow/compiler/mlir/tosa/tfl_passes.cc
similarity index 56%
rename from tensorflow/compiler/mlir/tosa/tosa_passpipes.cc
rename to tensorflow/compiler/mlir/tosa/tfl_passes.cc
index 1bad41522f3..25d9041a508 100644
--- a/tensorflow/compiler/mlir/tosa/tosa_passpipes.cc
+++ b/tensorflow/compiler/mlir/tosa/tfl_passes.cc
@@ -13,23 +13,20 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/mlir/tosa/tosa_passpipes.h"
+#include "tensorflow/compiler/mlir/tosa/tfl_passes.h"
 
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Pass/PassRegistry.h"
-#include "mlir/Transforms/Passes.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
+#include "mlir/Dialect/Tosa/Transforms/Passes.h"  // from @llvm-project
+#include "mlir/Transforms/Passes.h"  // from @llvm-project
+#include "tensorflow/compiler/mlir/tosa/transforms/passes.h"
 
 namespace mlir {
-
 namespace tosa {
 
-void addPreOptMlirPasses(mlir::OpPassManager& pm) {
+void createTFLtoTOSALegalizationPipeline(
+    OpPassManager& pm, const TOSATFLLegalizationPipelineOptions& opts) {
+  //----------------------------------------------------------------------------
+  // Prepare TFL module for conversion
+  //----------------------------------------------------------------------------
   // Inline all functions into main and then delete the functions themselves.
   pm.addPass(mlir::createInlinerPass());
 
@@ -39,9 +36,16 @@ void addPreOptMlirPasses(mlir::OpPassManager& pm) {
 
   pm.addPass(mlir::createLoopFusionPass());
   pm.addPass(mlir::createMemRefDataFlowOptPass());
-}
 
-void addPostOptMlirPasses(mlir::OpPassManager& pm) {
+  //----------------------------------------------------------------------------
+  // Perform main conversion.
+  //----------------------------------------------------------------------------
+  pm.addPass(mlir::tosa::createConvertTFLUint8Pass());
+  pm.addPass(mlir::tosa::createLegalizeTFLPass());
+
+  //----------------------------------------------------------------------------
+  // Post conversion cleanup.
+  //----------------------------------------------------------------------------
   pm.addPass(mlir::tosa::createTosaMakeBroadcastablePass());
   // Inline the call/return basic blocks within TOSA control flow ops.
   pm.addPass(mlir::createInlinerPass());
@@ -49,26 +53,10 @@ void addPostOptMlirPasses(mlir::OpPassManager& pm) {
   pm.addPass(mlir::createSymbolDCEPass());
 }
 
-void createTFtoTOSALegalizationPipeline(
-    OpPassManager& pm, const TOSALegalizationPipelineOptions& opts) {
-  addPreOptMlirPasses(pm);
-
-  pm.addPass(mlir::tosa::createFuseBiasTFPass());
-  pm.addPass(mlir::tosa::createLegalizeTFPass());
-
-  addPostOptMlirPasses(pm);
-}
-
-void createTFLtoTOSALegalizationPipeline(
-    OpPassManager& pm, const TOSALegalizationPipelineOptions& opts) {
-  addPreOptMlirPasses(pm);
-
-  pm.addPass(mlir::tosa::createConvertTFLUint8Pass());
-  pm.addPass(mlir::tosa::createLegalizeTFLPass());
-
-  addPostOptMlirPasses(pm);
-}
+static mlir::PassPipelineRegistration<TOSATFLLegalizationPipelineOptions>
+    tfl_tosa_pipeline("tfl-to-tosa-pipeline",
+                      "TensorFlow Lite to TOSA legalization pipeline",
+                      createTFLtoTOSALegalizationPipeline);
 
 }  // namespace tosa
-
 }  // namespace mlir
diff --git a/tensorflow/compiler/mlir/tosa/tf_tosa_pipeline.cc b/tensorflow/compiler/mlir/tosa/tfl_passes.h
similarity index 57%
rename from tensorflow/compiler/mlir/tosa/tf_tosa_pipeline.cc
rename to tensorflow/compiler/mlir/tosa/tfl_passes.h
index e8d1aa73478..255418ae443 100644
--- a/tensorflow/compiler/mlir/tosa/tf_tosa_pipeline.cc
+++ b/tensorflow/compiler/mlir/tosa/tfl_passes.h
@@ -13,17 +13,23 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include "tensorflow/compiler/mlir/tosa/tosa_passpipes.h"
+#ifndef TENSORFLOW_COMPILER_MLIR_TOSA_TFL_PASSES_H_
+#define TENSORFLOW_COMPILER_MLIR_TOSA_TFL_PASSES_H_
+
+#include "mlir/Pass/PassManager.h"  // from @llvm-project
+#include "mlir/Pass/PassOptions.h"  // from @llvm-project
 
 namespace mlir {
-
 namespace tosa {
 
-static mlir::PassPipelineRegistration<TOSALegalizationPipelineOptions>
-    tf_tosa_pipeline("tf-to-tosa-pipeline",
-                     "TensorFlow to TOSA legalization pipeline",
-                     createTFtoTOSALegalizationPipeline);
+struct TOSATFLLegalizationPipelineOptions
+    : public PassPipelineOptions<TOSATFLLegalizationPipelineOptions> {};
+
+// Legalizes TFL (TensorFlow lite) dialect(s) to Tosa.
+void createTFLtoTOSALegalizationPipeline(
+    OpPassManager& pm, const TOSATFLLegalizationPipelineOptions& opts);
 
 }  // namespace tosa
-
 }  // namespace mlir
+
+#endif  // TENSORFLOW_COMPILER_MLIR_TOSA_TFL_PASSES_H_
diff --git a/tensorflow/compiler/mlir/tosa/tfl_tosa_pipeline.cc b/tensorflow/compiler/mlir/tosa/tfl_tosa_pipeline.cc
deleted file mode 100644
index 8552a68101a..00000000000
--- a/tensorflow/compiler/mlir/tosa/tfl_tosa_pipeline.cc
+++ /dev/null
@@ -1,29 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/compiler/mlir/tosa/tosa_passpipes.h"
-
-namespace mlir {
-
-namespace tosa {
-
-static mlir::PassPipelineRegistration<TOSALegalizationPipelineOptions>
-    tfl_tosa_pipeline("tfl-to-tosa-pipeline",
-                      "TensorFlow Lite to TOSA legalization pipeline",
-                      createTFLtoTOSALegalizationPipeline);
-
-}  // namespace tosa
-
-}  // namespace mlir
diff --git a/tensorflow/compiler/mlir/tosa/transforms/convert_tfl_uint8.cc b/tensorflow/compiler/mlir/tosa/transforms/convert_tfl_uint8.cc
index 8a0e36dd941..08ee3c29ed4 100644
--- a/tensorflow/compiler/mlir/tosa/transforms/convert_tfl_uint8.cc
+++ b/tensorflow/compiler/mlir/tosa/transforms/convert_tfl_uint8.cc
@@ -29,25 +29,14 @@ limitations under the License.
 #include <iterator>
 #include <numeric>
 
-#include "mlir/Dialect/Quant/FakeQuantSupport.h"
-#include "mlir/Dialect/Quant/UniformSupport.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/Dialect/Tosa/IR/TosaOps.h"
-#include "mlir/Dialect/Tosa/Utils/QuantUtils.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Matchers.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/TypeUtilities.h"
-#include "mlir/IR/Types.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "mlir/Dialect/Tosa/Utils/QuantUtils.h"  // from @llvm-project
+#include "mlir/IR/Builders.h"  // from @llvm-project
+#include "mlir/IR/BuiltinAttributes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
+#include "mlir/IR/PatternMatch.h"  // from @llvm-project
+#include "mlir/Pass/PassRegistry.h"  // from @llvm-project
+#include "mlir/Support/LogicalResult.h"  // from @llvm-project
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/tosa/transforms/legalize_common.h"
 #include "tensorflow/compiler/mlir/tosa/transforms/legalize_utils.h"
diff --git a/tensorflow/compiler/mlir/tosa/transforms/fuse_bias_tf.cc b/tensorflow/compiler/mlir/tosa/transforms/fuse_bias_tf.cc
index 74382ded178..058ba48e2c7 100644
--- a/tensorflow/compiler/mlir/tosa/transforms/fuse_bias_tf.cc
+++ b/tensorflow/compiler/mlir/tosa/transforms/fuse_bias_tf.cc
@@ -21,23 +21,10 @@ limitations under the License.
 #include <iterator>
 #include <numeric>
 
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/Dialect/Tosa/IR/TosaOps.h"
-#include "mlir/Dialect/Tosa/Utils/QuantUtils.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Matchers.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/TypeUtilities.h"
-#include "mlir/IR/Types.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "mlir/IR/MLIRContext.h"  // from @llvm-project
+#include "mlir/Pass/Pass.h"  // from @llvm-project
+#include "mlir/Support/LogicalResult.h"  // from @llvm-project
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tosa/transforms/legalize_common.h"
 
diff --git a/tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc b/tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc
index 084efbf077a..9f987cad3b2 100644
--- a/tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc
+++ b/tensorflow/compiler/mlir/tosa/transforms/legalize_common.cc
@@ -14,18 +14,26 @@ limitations under the License.
 ==============================================================================*/
 
 // This file contains legalizations common to mapping both TensorFlow and
-// TensorFlow Lite to TOSA.
+// TensorFlow Lite to TOSA. It operates generically on ops and does not have
+// a hard reference on either dialect.
 //
 // Conversion functions return llvm::None on a legalization failure or a
 // legalized value on success.  Callers must check for presence of an
 // llvm::Optional value after each call.
 
 #include "tensorflow/compiler/mlir/tosa/transforms/legalize_common.h"
+
 #include <climits>
 #include <cstddef>
 #include <cstdint>
 #include <iterator>
 #include <numeric>
+
+#include "llvm/Support/FormatVariadic.h"
+#include "mlir/Dialect/Quant/QuantTypes.h"  // from @llvm-project
+#include "mlir/Dialect/Tosa/IR/TosaOps.h"  // from @llvm-project
+#include "mlir/IR/Matchers.h"  // from @llvm-project
+#include "mlir/IR/PatternMatch.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tosa/transforms/legalize_utils.h"
 
 namespace mlir {
diff --git a/tensorflow/compiler/mlir/tosa/transforms/legalize_common.h b/tensorflow/compiler/mlir/tosa/transforms/legalize_common.h
index 06016bbfb3b..d5ef518f176 100644
--- a/tensorflow/compiler/mlir/tosa/transforms/legalize_common.h
+++ b/tensorflow/compiler/mlir/tosa/transforms/legalize_common.h
@@ -16,39 +16,17 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TOSA_TRANSFORMS_LEGALIZE_COMMON_H
 #define TENSORFLOW_COMPILER_MLIR_TOSA_TRANSFORMS_LEGALIZE_COMMON_H
 
+#include "mlir/IR/PatternMatch.h"  // from @llvm-project
+#include "mlir/Support/LLVM.h"  // from @llvm-project
+
 // This file contains legalizations common to mapping both TensorFlow and
 // TensorFlow Lite to TOSA.
 //
-// Conversion functions return nullptr on a lowerization failure or a lowered
-// operator on success.   Callers must check and return a LogicalResult failure
-// on nullptr.
+// Conversion functions return None on a failure or result value on success.
+// Callers must check and return a LogicalResult failure on nullptr.
 //
 // For these functions, the framework-specific operands/attributes/defaults
 // are already extracted and placed in a common form for lowering.
-#include "mlir/Dialect/Quant/FakeQuantSupport.h"
-#include "mlir/Dialect/Quant/UniformSupport.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/Dialect/Tosa/IR/TosaOps.h"
-#include "mlir/Dialect/Tosa/Utils/QuantUtils.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Matchers.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/TypeUtilities.h"
-#include "mlir/IR/Types.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/Support/FormatVariadic.h"
 
 namespace mlir {
 namespace tosa {
diff --git a/tensorflow/compiler/mlir/tosa/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/tosa/transforms/legalize_tf.cc
index e24253b420c..1219e14eed5 100644
--- a/tensorflow/compiler/mlir/tosa/transforms/legalize_tf.cc
+++ b/tensorflow/compiler/mlir/tosa/transforms/legalize_tf.cc
@@ -21,30 +21,9 @@ limitations under the License.
 #include <iterator>
 #include <numeric>
 
-#include "mlir/Dialect/Quant/FakeQuantSupport.h"
-#include "mlir/Dialect/Quant/UniformSupport.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/Dialect/Tosa/IR/TosaOps.h"
-#include "mlir/Dialect/Tosa/Utils/QuantUtils.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Matchers.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/TypeUtilities.h"
-#include "mlir/IR/Types.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "mlir/Dialect/Tosa/IR/TosaOps.h"  // from @llvm-project
+#include "mlir/Support/LLVM.h"  // from @llvm-project
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tosa/transforms/legalize_common.h"
 #include "tensorflow/compiler/mlir/tosa/transforms/legalize_utils.h"
diff --git a/tensorflow/compiler/mlir/tosa/transforms/legalize_tfl.cc b/tensorflow/compiler/mlir/tosa/transforms/legalize_tfl.cc
index 2ae339dc6d4..4e51bd795b7 100644
--- a/tensorflow/compiler/mlir/tosa/transforms/legalize_tfl.cc
+++ b/tensorflow/compiler/mlir/tosa/transforms/legalize_tfl.cc
@@ -23,32 +23,9 @@ limitations under the License.
 #include <numeric>
 #include <unordered_set>
 
-#include "mlir/Dialect/Quant/FakeQuantSupport.h"
-#include "mlir/Dialect/Quant/QuantTypes.h"
-#include "mlir/Dialect/Quant/UniformSupport.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/Dialect/Tosa/IR/TosaOps.h"
-#include "mlir/Dialect/Tosa/Utils/QuantUtils.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Matchers.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/TypeUtilities.h"
-#include "mlir/IR/Types.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "mlir/Dialect/Tosa/IR/TosaOps.h"  // from @llvm-project
+#include "mlir/Support/LLVM.h"  // from @llvm-project
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/tosa/transforms/legalize_common.h"
 #include "tensorflow/compiler/mlir/tosa/transforms/legalize_utils.h"
@@ -2996,5 +2973,6 @@ std::unique_ptr<OperationPass<FuncOp>> createLegalizeTFLPass() {
 
 static PassRegistration<LegalizeTFL> pass(
     PASS_NAME, "Legalize from TensorFlow Lite to TOSA dialect");
+
 }  // namespace tosa
 }  // namespace mlir
diff --git a/tensorflow/compiler/mlir/tosa/transforms/legalize_utils.cc b/tensorflow/compiler/mlir/tosa/transforms/legalize_utils.cc
index 5bae8eccf35..7280d4c23de 100644
--- a/tensorflow/compiler/mlir/tosa/transforms/legalize_utils.cc
+++ b/tensorflow/compiler/mlir/tosa/transforms/legalize_utils.cc
@@ -15,13 +15,14 @@ limitations under the License.
 
 #include "tensorflow/compiler/mlir/tosa/transforms/legalize_utils.h"
 
+#include "mlir/Dialect/Tosa/IR/TosaOps.h"  // from @llvm-project
+#include "mlir/Dialect/Tosa/Utils/QuantUtils.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/tosa/transforms/legalize_common.h"
 
 // Implements legalization and post-legalization optimization helper functions
 
 namespace mlir {
-
 namespace tosa {
 
 // Create a TOSA rescale op from TFLite scaling, zero points and rounding mode
diff --git a/tensorflow/compiler/mlir/tosa/transforms/legalize_utils.h b/tensorflow/compiler/mlir/tosa/transforms/legalize_utils.h
index 69671a6a7a5..f18e5733b8b 100644
--- a/tensorflow/compiler/mlir/tosa/transforms/legalize_utils.h
+++ b/tensorflow/compiler/mlir/tosa/transforms/legalize_utils.h
@@ -22,24 +22,10 @@ limitations under the License.
 #include <iterator>
 #include <numeric>
 
-#include "mlir/Dialect/Quant/FakeQuantSupport.h"
-#include "mlir/Dialect/Quant/UniformSupport.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/Dialect/Tosa/IR/TosaOps.h"
-#include "mlir/Dialect/Tosa/Utils/QuantUtils.h"
-#include "mlir/IR/Attributes.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/IR/Diagnostics.h"
-#include "mlir/IR/MLIRContext.h"
-#include "mlir/IR/Matchers.h"
-#include "mlir/IR/Operation.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
-#include "mlir/IR/TypeUtilities.h"
-#include "mlir/IR/Types.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Transforms/DialectConversion.h"
+#include "mlir/IR/BuiltinAttributes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
+#include "mlir/IR/PatternMatch.h"  // from @llvm-project
+#include "mlir/Support/LLVM.h"  // from @llvm-project
 #include "tensorflow/core/framework/kernel_shape_util.h"
 #include "tensorflow/core/kernels/conv_grad_shape_utils.h"
 #include "tensorflow/core/util/padding.h"
diff --git a/tensorflow/compiler/mlir/tosa/transforms/passes.h b/tensorflow/compiler/mlir/tosa/transforms/passes.h
index f9449080ec0..69d4e923d20 100644
--- a/tensorflow/compiler/mlir/tosa/transforms/passes.h
+++ b/tensorflow/compiler/mlir/tosa/transforms/passes.h
@@ -18,15 +18,11 @@ limitations under the License.
 
 #include <memory>
 
-#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/Pass.h"  // from @llvm-project
 
 namespace mlir {
-
 namespace tosa {
 
-struct TOSALegalizationPipelineOptions
-    : public PassPipelineOptions<TOSALegalizationPipelineOptions> {};
-
 std::unique_ptr<OperationPass<FuncOp>> createLegalizeTFPass();
 std::unique_ptr<OperationPass<FuncOp>> createFuseBiasTFPass();
 std::unique_ptr<OperationPass<FuncOp>> createLegalizeTFLPass();
@@ -36,7 +32,6 @@ std::unique_ptr<OperationPass<FuncOp>> createConvertTFLUint8Pass();
 #include "tensorflow/compiler/mlir/tosa/transforms/passes.h.inc"
 
 }  // namespace tosa
-
 }  // namespace mlir
 
 #endif  // TENSORFLOW_COMPILER_MLIR_TOSA_TRANSFORMS_PASSES_H
diff --git a/tensorflow/compiler/mlir/tosa/transforms/register_passes.h b/tensorflow/compiler/mlir/tosa/transforms/register_passes.h
deleted file mode 100644
index 7d13205a42f..00000000000
--- a/tensorflow/compiler/mlir/tosa/transforms/register_passes.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_COMPILER_MLIR_TOSA_TRANSFORMS_REGISTER_PASSES_H
-#define TENSORFLOW_COMPILER_MLIR_TOSA_TRANSFORMS_REGISTER_PASSES_H
-
-#include "mlir/Dialect/Tosa/Transforms/Passes.h"
-#include "mlir/Pass/Pass.h"
-#include "tensorflow/compiler/mlir/tosa/transforms/passes.h"
-
-namespace mlir {
-namespace tosa {
-
-inline void registerAllTosaPasses() {
-  registerLegalizeTosaPasses();
-  registerTosaOptPasses();
-}
-
-}  // namespace tosa
-}  // namespace mlir
-
-#endif  // TENSORFLOW_COMPILER_MLIR_TOSA_TRANSFORMS_REGISTER_PASSES_H

From 347ff5466148043cf37987287bd23b8bb0b95491 Mon Sep 17 00:00:00 2001
From: River Riddle <riverriddle@google.com>
Date: Fri, 11 Dec 2020 18:51:01 -0800
Subject: [PATCH 17/60] [mlir][NFC] Replace usages or mlir/IR/StandardTypes.h
 with mlir/IR/BuiltinTypes.h

StandardTypes.h was moved to BuiltinTypes.h and is being removed.

PiperOrigin-RevId: 347115220
Change-Id: I356c3df3d78df896e4ef44b8c78e3dbf71990e67
---
 .../compiler/mlir/lite/transforms/default_quant_params.cc       | 2 +-
 tensorflow/compiler/mlir/lite/transforms/dense_to_sparse.cc     | 2 +-
 tensorflow/compiler/mlir/lite/transforms/dilated_conv.h         | 2 +-
 tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc         | 2 +-
 .../compiler/mlir/lite/transforms/lower_static_tensor_list.cc   | 2 +-
 tensorflow/compiler/mlir/lite/transforms/optimize.cc            | 2 +-
 .../compiler/mlir/lite/transforms/optimize_functional_ops.cc    | 2 +-
 .../mlir/lite/transforms/prepare_composite_functions_tf.cc      | 2 +-
 tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc    | 2 +-
 .../compiler/mlir/lite/transforms/prepare_quantize_lstm.h       | 2 +-
 tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc          | 2 +-
 tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc    | 2 +-
 .../compiler/mlir/lite/transforms/split_merged_operands.cc      | 2 +-
 tensorflow/compiler/mlir/lite/transforms/while_loop_outline.cc  | 2 +-
 14 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/compiler/mlir/lite/transforms/default_quant_params.cc b/tensorflow/compiler/mlir/lite/transforms/default_quant_params.cc
index 451eb613543..ad5dbc69afc 100644
--- a/tensorflow/compiler/mlir/lite/transforms/default_quant_params.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/default_quant_params.cc
@@ -17,8 +17,8 @@ limitations under the License.
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/Attributes.h"
+#include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Support/LLVM.h"
 #include "absl/memory/memory.h"
diff --git a/tensorflow/compiler/mlir/lite/transforms/dense_to_sparse.cc b/tensorflow/compiler/mlir/lite/transforms/dense_to_sparse.cc
index 90ef09bf7af..1dc06ca36cf 100644
--- a/tensorflow/compiler/mlir/lite/transforms/dense_to_sparse.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/dense_to_sparse.cc
@@ -20,7 +20,7 @@ limitations under the License.
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/lite/tools/optimize/sparsity/format_converter.h"
diff --git a/tensorflow/compiler/mlir/lite/transforms/dilated_conv.h b/tensorflow/compiler/mlir/lite/transforms/dilated_conv.h
index 2cd11525bfa..f689dcd45b9 100644
--- a/tensorflow/compiler/mlir/lite/transforms/dilated_conv.h
+++ b/tensorflow/compiler/mlir/lite/transforms/dilated_conv.h
@@ -22,9 +22,9 @@ limitations under the License.
 
 #include "llvm/Support/Casting.h"
 #include "mlir/IR/Attributes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Matchers.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/lite/utils/validators.h"
diff --git a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc
index febbd7d2c83..f05884fc6eb 100644
--- a/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/legalize_tf.cc
@@ -33,11 +33,11 @@ limitations under the License.
 #include "mlir/Dialect/Quant/QuantOps.h"  // from @llvm-project
 #include "mlir/Dialect/Quant/UniformSupport.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Diagnostics.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc b/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc
index 3f35b06694a..ebe935dda9a 100644
--- a/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/lower_static_tensor_list.cc
@@ -36,12 +36,12 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Block.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Matchers.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
 #include "mlir/IR/OperationSupport.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/SymbolTable.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize.cc b/tensorflow/compiler/mlir/lite/transforms/optimize.cc
index 704cae9646e..1e258cdf7df 100644
--- a/tensorflow/compiler/mlir/lite/transforms/optimize.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/optimize.cc
@@ -37,8 +37,8 @@ limitations under the License.
 #include "llvm/Support/raw_ostream.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Matchers.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc b/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc
index 665fae3d8fc..eb449728946 100644
--- a/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/optimize_functional_ops.cc
@@ -20,8 +20,8 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/BlockAndValueMapping.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc
index eee0378dc6e..eff26680abb 100644
--- a/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/prepare_composite_functions_tf.cc
@@ -27,11 +27,11 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Identifier.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/SymbolTable.h"  // from @llvm-project
 #include "mlir/IR/Visitors.h"  // from @llvm-project
 #include "mlir/Interfaces/CallInterfaces.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc
index 2c1deb33e17..dca02c18704 100644
--- a/tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/prepare_quantize.cc
@@ -30,10 +30,10 @@ limitations under the License.
 #include "mlir/Dialect/Quant/QuantOps.h"  // from @llvm-project
 #include "mlir/Dialect/Quant/QuantTypes.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_quantize_lstm.h b/tensorflow/compiler/mlir/lite/transforms/prepare_quantize_lstm.h
index 253c94c2b1d..07423d7c221 100644
--- a/tensorflow/compiler/mlir/lite/transforms/prepare_quantize_lstm.h
+++ b/tensorflow/compiler/mlir/lite/transforms/prepare_quantize_lstm.h
@@ -30,9 +30,9 @@ limitations under the License.
 #include "mlir/Dialect/Quant/QuantTypes.h"  // from @llvm-project
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/OpDefinition.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc
index 10a4a698e7f..3012ec8082d 100644
--- a/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/prepare_tf.cc
@@ -45,8 +45,8 @@ limitations under the License.
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc b/tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc
index 40cca526951..c7fa7d40416 100644
--- a/tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/raise_custom_ops.cc
@@ -18,8 +18,8 @@ limitations under the License.
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/lite/transforms/passes.h"
diff --git a/tensorflow/compiler/mlir/lite/transforms/split_merged_operands.cc b/tensorflow/compiler/mlir/lite/transforms/split_merged_operands.cc
index af9f21aaa2a..a58b7a3edad 100644
--- a/tensorflow/compiler/mlir/lite/transforms/split_merged_operands.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/split_merged_operands.cc
@@ -23,11 +23,11 @@ limitations under the License.
 #include "mlir/IR/Block.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Matchers.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
 #include "mlir/IR/OperationSupport.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/SymbolTable.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/lite/transforms/while_loop_outline.cc b/tensorflow/compiler/mlir/lite/transforms/while_loop_outline.cc
index cfa2efe5191..91d5379fb8e 100644
--- a/tensorflow/compiler/mlir/lite/transforms/while_loop_outline.cc
+++ b/tensorflow/compiler/mlir/lite/transforms/while_loop_outline.cc
@@ -19,11 +19,11 @@ limitations under the License.
 #include "llvm/Support/CommandLine.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Identifier.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Matchers.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/SymbolTable.h"  // from @llvm-project
 #include "mlir/Pass/Pass.h"  // from @llvm-project
 #include "mlir/Transforms/RegionUtils.h"  // from @llvm-project

From 6b16f6292642ce421b1c9a9784f51e8f565946e9 Mon Sep 17 00:00:00 2001
From: River Riddle <riverriddle@google.com>
Date: Fri, 11 Dec 2020 18:56:41 -0800
Subject: [PATCH 18/60] [mlir][NFC] Replace usages or mlir/IR/StandardTypes.h
 with mlir/IR/BuiltinTypes.h

StandardTypes.h was moved to BuiltinTypes.h and is being removed.

PiperOrigin-RevId: 347115670
Change-Id: I66f0932fa6327ea2648b7bb2f42acfc3cdb0743a
---
 tensorflow/compiler/mlir/lite/utils/attribute_utils.cc          | 2 +-
 tensorflow/compiler/mlir/lite/utils/constant_utils.h            | 2 +-
 tensorflow/compiler/mlir/lite/utils/convert_type.cc             | 2 +-
 tensorflow/compiler/mlir/lite/utils/lstm_utils.cc               | 2 +-
 tensorflow/compiler/mlir/lite/utils/lstm_utils.h                | 2 +-
 tensorflow/compiler/mlir/lite/utils/lstm_utils_test.cc          | 2 +-
 tensorflow/compiler/mlir/lite/utils/tftext_utils.cc             | 2 +-
 tensorflow/compiler/mlir/lite/utils/tftext_utils.h              | 2 +-
 tensorflow/compiler/mlir/lite/utils/validators.h                | 2 +-
 .../mlir/tools/kernel_gen/transforms/buffer_reuse_pass.cc       | 2 +-
 .../compiler/mlir/tools/kernel_gen/transforms/bufferize.cc      | 2 +-
 .../compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc | 2 +-
 .../mlir/tools/kernel_gen/transforms/embed_memref_prints.cc     | 2 +-
 .../mlir/tools/kernel_gen/transforms/embed_tf_framework.cc      | 2 +-
 .../mlir/tools/kernel_gen/transforms/same_shape_propagation.cc  | 2 +-
 .../transforms/tensorflow_abi_knowledge_propagation.cc          | 2 +-
 .../kernel_gen/transforms/tf_framework_legalize_to_llvm.cc      | 2 +-
 tensorflow/compiler/mlir/xla/hlo_function_importer.cc           | 2 +-
 tensorflow/compiler/mlir/xla/hlo_function_importer.h            | 2 +-
 tensorflow/compiler/mlir/xla/hlo_module_importer.cc             | 2 +-
 tensorflow/compiler/mlir/xla/hlo_utils.cc                       | 2 +-
 tensorflow/compiler/mlir/xla/hlo_utils.h                        | 2 +-
 tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc                 | 2 +-
 tensorflow/compiler/mlir/xla/type_to_shape.cc                   | 2 +-
 tensorflow/compiler/mlir/xla/type_to_shape_test.cc              | 2 +-
 25 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/tensorflow/compiler/mlir/lite/utils/attribute_utils.cc b/tensorflow/compiler/mlir/lite/utils/attribute_utils.cc
index 2085517b652..41a7cd113c9 100644
--- a/tensorflow/compiler/mlir/lite/utils/attribute_utils.cc
+++ b/tensorflow/compiler/mlir/lite/utils/attribute_utils.cc
@@ -14,7 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include "mlir/IR/BuiltinAttributes.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 
 namespace mlir {
 namespace TFL {
diff --git a/tensorflow/compiler/mlir/lite/utils/constant_utils.h b/tensorflow/compiler/mlir/lite/utils/constant_utils.h
index 5c348021b5e..0434cf714a3 100644
--- a/tensorflow/compiler/mlir/lite/utils/constant_utils.h
+++ b/tensorflow/compiler/mlir/lite/utils/constant_utils.h
@@ -17,10 +17,10 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_MLIR_LITE_UTILS_CONSTANT_UTILS_H_
 
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "tensorflow/stream_executor/lib/statusor.h"
 
 namespace mlir {
diff --git a/tensorflow/compiler/mlir/lite/utils/convert_type.cc b/tensorflow/compiler/mlir/lite/utils/convert_type.cc
index 56aac8fc217..489b0f32f8d 100644
--- a/tensorflow/compiler/mlir/lite/utils/convert_type.cc
+++ b/tensorflow/compiler/mlir/lite/utils/convert_type.cc
@@ -16,7 +16,7 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/lite/utils/convert_type.h"
 
 #include "mlir/IR/Builders.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
diff --git a/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc b/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc
index 1a5e740756c..357079c561b 100644
--- a/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc
+++ b/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc
@@ -25,12 +25,12 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Identifier.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/OpDefinition.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/lite/utils/lstm_utils.h b/tensorflow/compiler/mlir/lite/utils/lstm_utils.h
index 449c473831a..6fc01198702 100644
--- a/tensorflow/compiler/mlir/lite/utils/lstm_utils.h
+++ b/tensorflow/compiler/mlir/lite/utils/lstm_utils.h
@@ -22,8 +22,8 @@ limitations under the License.
 #include "llvm/ADT/StringRef.h"
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
diff --git a/tensorflow/compiler/mlir/lite/utils/lstm_utils_test.cc b/tensorflow/compiler/mlir/lite/utils/lstm_utils_test.cc
index 9eb767c6cd9..688e2cede59 100644
--- a/tensorflow/compiler/mlir/lite/utils/lstm_utils_test.cc
+++ b/tensorflow/compiler/mlir/lite/utils/lstm_utils_test.cc
@@ -28,9 +28,9 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc b/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc
index f71445344f3..98b183291bd 100644
--- a/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc
+++ b/tensorflow/compiler/mlir/lite/utils/tftext_utils.cc
@@ -26,13 +26,13 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Identifier.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Matchers.h"  // from @llvm-project
 #include "mlir/IR/OpDefinition.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/lite/utils/tftext_utils.h b/tensorflow/compiler/mlir/lite/utils/tftext_utils.h
index 82938d58306..60a954dc1aa 100644
--- a/tensorflow/compiler/mlir/lite/utils/tftext_utils.h
+++ b/tensorflow/compiler/mlir/lite/utils/tftext_utils.h
@@ -22,8 +22,8 @@ limitations under the License.
 #include "llvm/ADT/StringRef.h"
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
diff --git a/tensorflow/compiler/mlir/lite/utils/validators.h b/tensorflow/compiler/mlir/lite/utils/validators.h
index d7a56fae1a6..f73a75c7d8c 100644
--- a/tensorflow/compiler/mlir/lite/utils/validators.h
+++ b/tensorflow/compiler/mlir/lite/utils/validators.h
@@ -20,7 +20,7 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_MLIR_LITE_UTILS_VALIDATORS_H_
 
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 
 namespace mlir {
 namespace TFL {
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/buffer_reuse_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/buffer_reuse_pass.cc
index 26184fa59b1..51e489310b4 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/buffer_reuse_pass.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/buffer_reuse_pass.cc
@@ -26,8 +26,8 @@ limitations under the License.
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/AffineMap.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h"
 #include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h"
 #include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h"
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize.cc
index 3a92192b4b6..9935f6b20b7 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize.cc
@@ -21,7 +21,7 @@ limitations under the License.
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/BlockAndValueMapping.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/Transforms/DialectConversion.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h"
 
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc
index 2985e6b3e5c..78d6f9687a4 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc
@@ -28,10 +28,10 @@ limitations under the License.
 #include "mlir/Dialect/StandardOps/Transforms/FuncConversions.h"  // from @llvm-project
 #include "mlir/Dialect/StandardOps/Transforms/Passes.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Visitors.h"  // from @llvm-project
 #include "mlir/Transforms/Bufferize.h"  // from @llvm-project
 #include "mlir/Transforms/DialectConversion.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_memref_prints.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_memref_prints.cc
index a295c055cac..a1fbf7b5325 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_memref_prints.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_memref_prints.cc
@@ -19,8 +19,8 @@ limitations under the License.
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/AffineMap.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/Transforms/DialectConversion.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h"
 #include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h"
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_tf_framework.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_tf_framework.cc
index d363355b176..e5e9997f996 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_tf_framework.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/embed_tf_framework.cc
@@ -15,7 +15,7 @@ limitations under the License.
 
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeRange.h"  // from @llvm-project
 #include "mlir/Transforms/DialectConversion.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h"
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/same_shape_propagation.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/same_shape_propagation.cc
index 01661416dbb..6153afe6d5d 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/same_shape_propagation.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/same_shape_propagation.cc
@@ -29,8 +29,8 @@ limitations under the License.
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"  // from @llvm-project
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/AsmState.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h"
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tensorflow_abi_knowledge_propagation.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tensorflow_abi_knowledge_propagation.cc
index beda2a9c14e..e6ad17573b4 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tensorflow_abi_knowledge_propagation.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tensorflow_abi_knowledge_propagation.cc
@@ -25,8 +25,8 @@ limitations under the License.
 #include "mlir/Dialect/GPU/GPUDialect.h"  // from @llvm-project
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"  // from @llvm-project
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h"
 #include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h"
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm.cc
index 03b6636b2a5..88c3bc2e6b7 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/tf_framework_legalize_to_llvm.cc
@@ -19,8 +19,8 @@ limitations under the License.
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/Transforms/DialectConversion.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h"
 #include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h"
diff --git a/tensorflow/compiler/mlir/xla/hlo_function_importer.cc b/tensorflow/compiler/mlir/xla/hlo_function_importer.cc
index 23fab366d85..341132d96f5 100644
--- a/tensorflow/compiler/mlir/xla/hlo_function_importer.cc
+++ b/tensorflow/compiler/mlir/xla/hlo_function_importer.cc
@@ -26,10 +26,10 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/BlockAndValueMapping.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Identifier.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/Region.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/error_util.h"
 #include "tensorflow/compiler/mlir/xla/attribute_importer.h"
diff --git a/tensorflow/compiler/mlir/xla/hlo_function_importer.h b/tensorflow/compiler/mlir/xla/hlo_function_importer.h
index d849b83c8b1..99fc64f40ba 100644
--- a/tensorflow/compiler/mlir/xla/hlo_function_importer.h
+++ b/tensorflow/compiler/mlir/xla/hlo_function_importer.h
@@ -23,8 +23,8 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/error_util.h"
 #include "tensorflow/compiler/xla/comparison_util.h"
diff --git a/tensorflow/compiler/mlir/xla/hlo_module_importer.cc b/tensorflow/compiler/mlir/xla/hlo_module_importer.cc
index 9db5861934f..b554f38b148 100644
--- a/tensorflow/compiler/mlir/xla/hlo_module_importer.cc
+++ b/tensorflow/compiler/mlir/xla/hlo_module_importer.cc
@@ -17,9 +17,9 @@ limitations under the License.
 
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/OperationSupport.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
 #include "tensorflow/compiler/mlir/xla/hlo_function_importer.h"
diff --git a/tensorflow/compiler/mlir/xla/hlo_utils.cc b/tensorflow/compiler/mlir/xla/hlo_utils.cc
index 51e00bcf9de..16aaec05658 100644
--- a/tensorflow/compiler/mlir/xla/hlo_utils.cc
+++ b/tensorflow/compiler/mlir/xla/hlo_utils.cc
@@ -19,7 +19,7 @@ limitations under the License.
 
 #include "mlir/IR/AffineMap.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/lhlo_ops.h"
 #include "tensorflow/compiler/xla/literal.h"
diff --git a/tensorflow/compiler/mlir/xla/hlo_utils.h b/tensorflow/compiler/mlir/xla/hlo_utils.h
index 3ad39aec3be..88b775b7c59 100644
--- a/tensorflow/compiler/mlir/xla/hlo_utils.h
+++ b/tensorflow/compiler/mlir/xla/hlo_utils.h
@@ -20,7 +20,7 @@ limitations under the License.
 
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
 #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/convert_op_folder.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
diff --git a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc
index 34345fe7f3d..2e58bf23c49 100644
--- a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc
+++ b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc
@@ -31,11 +31,11 @@ limitations under the License.
 #include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Matchers.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeUtilities.h"  // from @llvm-project
 #include "mlir/IR/UseDefLists.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
diff --git a/tensorflow/compiler/mlir/xla/type_to_shape.cc b/tensorflow/compiler/mlir/xla/type_to_shape.cc
index 3822e10089b..049e5418948 100644
--- a/tensorflow/compiler/mlir/xla/type_to_shape.cc
+++ b/tensorflow/compiler/mlir/xla/type_to_shape.cc
@@ -18,9 +18,9 @@ limitations under the License.
 #include <string>
 
 #include "mlir/IR/AffineMap.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Diagnostics.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/Support/DebugStringHelper.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h"
diff --git a/tensorflow/compiler/mlir/xla/type_to_shape_test.cc b/tensorflow/compiler/mlir/xla/type_to_shape_test.cc
index 97417748b64..bb6361183bf 100644
--- a/tensorflow/compiler/mlir/xla/type_to_shape_test.cc
+++ b/tensorflow/compiler/mlir/xla/type_to_shape_test.cc
@@ -18,8 +18,8 @@ limitations under the License.
 #include <iostream>
 
 #include "mlir/IR/Builders.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/xla/hlo_utils.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/test.h"

From 8515e61584fa13dc3ed1b0d2bc92a6d882658203 Mon Sep 17 00:00:00 2001
From: River Riddle <riverriddle@google.com>
Date: Fri, 11 Dec 2020 19:00:36 -0800
Subject: [PATCH 19/60] [mlir][NFC] Replace usages or mlir/IR/StandardTypes.h
 with mlir/IR/BuiltinTypes.h

StandardTypes.h was moved to BuiltinTypes.h and is being removed.

PiperOrigin-RevId: 347115952
Change-Id: I2d91f7ef1f94131794b0a3ade6c1f098b623501b
---
 .../compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h  | 2 +-
 .../mlir/hlo/include/mlir-hlo/utils/convert_op_folder.h         | 2 +-
 tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/hlo_utils.h | 2 +-
 tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc    | 2 +-
 tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc     | 2 +-
 .../compiler/mlir/hlo/lib/Dialect/mhlo/IR/lhlo_gpu_ops.cc       | 2 +-
 tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/lhlo_ops.cc    | 2 +-
 tensorflow/compiler/mlir/lite/flatbuffer_export.cc              | 2 +-
 tensorflow/compiler/mlir/lite/flatbuffer_import.cc              | 2 +-
 tensorflow/compiler/mlir/lite/flatbuffer_operator.cc            | 2 +-
 tensorflow/compiler/mlir/lite/flatbuffer_translate.cc           | 2 +-
 tensorflow/compiler/mlir/tfr/ir/tfr_ops.cc                      | 2 +-
 tensorflow/compiler/mlir/tfr/ir/tfr_ops.h                       | 2 +-
 tensorflow/compiler/mlir/tfr/ir/tfr_types.h                     | 2 +-
 14 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h
index 1c57073f4ab..7059d95afb4 100644
--- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h
+++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/broadcast_utils.h
@@ -21,8 +21,8 @@ limitations under the License.
 
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Location.h"
-#include "mlir/IR/StandardTypes.h"
 #include "mlir/Interfaces/InferTypeOpInterface.h"
 #include "mlir/Support/LLVM.h"
 
diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/convert_op_folder.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/convert_op_folder.h
index 39e0acf57ad..5e41fd026a9 100644
--- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/convert_op_folder.h
+++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/convert_op_folder.h
@@ -17,7 +17,7 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_UTILS_CONVERT_OP_FOLDER_H_
 
 #include "mlir/IR/BuiltinAttributes.h"
-#include "mlir/IR/StandardTypes.h"
+#include "mlir/IR/BuiltinTypes.h"
 
 namespace mlir {
 namespace hlo {
diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/hlo_utils.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/hlo_utils.h
index 5513dc60bb4..602ca968d2d 100644
--- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/hlo_utils.h
+++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/hlo_utils.h
@@ -18,8 +18,8 @@ limitations under the License.
 
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
 #include "mlir/IR/TypeUtilities.h"
 
 namespace mlir {
diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc
index 7ea42c6f806..60c944837f5 100644
--- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc
+++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/chlo_ops.cc
@@ -19,9 +19,9 @@ limitations under the License.
 #include "mlir-hlo/utils/broadcast_utils.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Diagnostics.h"
 #include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
 #include "mlir/IR/TypeUtilities.h"
 
 namespace mlir {
diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc
index 082e202fa10..24049eee9b5 100644
--- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc
+++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/hlo_ops.cc
@@ -42,6 +42,7 @@ limitations under the License.
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/Location.h"
 #include "mlir/IR/MLIRContext.h"
@@ -51,7 +52,6 @@ limitations under the License.
 #include "mlir/IR/Operation.h"
 #include "mlir/IR/OperationSupport.h"
 #include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
 #include "mlir/IR/TypeUtilities.h"
 #include "mlir/IR/Types.h"
 #include "mlir/IR/Value.h"
diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/lhlo_gpu_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/lhlo_gpu_ops.cc
index 10c5c0c2f9d..572cc43eab1 100644
--- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/lhlo_gpu_ops.cc
+++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/lhlo_gpu_ops.cc
@@ -31,6 +31,7 @@ limitations under the License.
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/Location.h"
 #include "mlir/IR/MLIRContext.h"
@@ -39,7 +40,6 @@ limitations under the License.
 #include "mlir/IR/Operation.h"
 #include "mlir/IR/OperationSupport.h"
 #include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
 #include "mlir/IR/TypeUtilities.h"
 #include "mlir/IR/Types.h"
 #include "mlir/IR/Value.h"
diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/lhlo_ops.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/lhlo_ops.cc
index 126eda0c790..f4ca3a1ea97 100644
--- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/lhlo_ops.cc
+++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/IR/lhlo_ops.cc
@@ -32,6 +32,7 @@ limitations under the License.
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/Location.h"
 #include "mlir/IR/MLIRContext.h"
@@ -40,7 +41,6 @@ limitations under the License.
 #include "mlir/IR/Operation.h"
 #include "mlir/IR/OperationSupport.h"
 #include "mlir/IR/PatternMatch.h"
-#include "mlir/IR/StandardTypes.h"
 #include "mlir/IR/TypeUtilities.h"
 #include "mlir/IR/Types.h"
 #include "mlir/IR/Value.h"
diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc
index fdf27cb4118..b9a1c4737d3 100644
--- a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc
+++ b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc
@@ -47,10 +47,10 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_import.cc b/tensorflow/compiler/mlir/lite/flatbuffer_import.cc
index 0e0e2164014..1a5e6ea630a 100644
--- a/tensorflow/compiler/mlir/lite/flatbuffer_import.cc
+++ b/tensorflow/compiler/mlir/lite/flatbuffer_import.cc
@@ -51,12 +51,12 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Diagnostics.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
 #include "mlir/IR/OperationSupport.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/Support/LLVM.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_operator.cc b/tensorflow/compiler/mlir/lite/flatbuffer_operator.cc
index 60fd1160be2..df9ddaf7599 100644
--- a/tensorflow/compiler/mlir/lite/flatbuffer_operator.cc
+++ b/tensorflow/compiler/mlir/lite/flatbuffer_operator.cc
@@ -23,7 +23,7 @@ limitations under the License.
 #include "llvm/ADT/StringSwitch.h"
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_types.h"
 #include "tensorflow/compiler/xla/statusor.h"
diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_translate.cc b/tensorflow/compiler/mlir/lite/flatbuffer_translate.cc
index bcd3243d4ad..901199e4bee 100644
--- a/tensorflow/compiler/mlir/lite/flatbuffer_translate.cc
+++ b/tensorflow/compiler/mlir/lite/flatbuffer_translate.cc
@@ -23,10 +23,10 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/IR/Value.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tfr/ir/tfr_ops.cc b/tensorflow/compiler/mlir/tfr/ir/tfr_ops.cc
index 2c59e64ee83..e1ef506ba1f 100644
--- a/tensorflow/compiler/mlir/tfr/ir/tfr_ops.cc
+++ b/tensorflow/compiler/mlir/tfr/ir/tfr_ops.cc
@@ -31,6 +31,7 @@ limitations under the License.
 #include "mlir/IR/Attributes.h"  // from @llvm-project
 #include "mlir/IR/Builders.h"  // from @llvm-project
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/DialectImplementation.h"  // from @llvm-project
 #include "mlir/IR/FunctionImplementation.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
@@ -38,7 +39,6 @@ limitations under the License.
 #include "mlir/IR/OpDefinition.h"  // from @llvm-project
 #include "mlir/IR/OpImplementation.h"  // from @llvm-project
 #include "mlir/IR/PatternMatch.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
 #include "mlir/Transforms/InliningUtils.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tfr/ir/tfr_ops.h b/tensorflow/compiler/mlir/tfr/ir/tfr_ops.h
index afc7eaded17..673219897cb 100644
--- a/tensorflow/compiler/mlir/tfr/ir/tfr_ops.h
+++ b/tensorflow/compiler/mlir/tfr/ir/tfr_ops.h
@@ -18,11 +18,11 @@ limitations under the License.
 
 #include "llvm/ADT/StringSet.h"
 #include "mlir/IR/BuiltinOps.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Dialect.h"  // from @llvm-project
 #include "mlir/IR/DialectImplementation.h"  // from @llvm-project
 #include "mlir/IR/FunctionSupport.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/Interfaces/CallInterfaces.h"  // from @llvm-project
 #include "mlir/Interfaces/ControlFlowInterfaces.h"  // from @llvm-project
diff --git a/tensorflow/compiler/mlir/tfr/ir/tfr_types.h b/tensorflow/compiler/mlir/tfr/ir/tfr_types.h
index 4bda8f34658..b27d56e98f0 100644
--- a/tensorflow/compiler/mlir/tfr/ir/tfr_types.h
+++ b/tensorflow/compiler/mlir/tfr/ir/tfr_types.h
@@ -17,10 +17,10 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_MLIR_TFR_IR_TFR_TYPES_H_
 
 #include "mlir/IR/Attributes.h"  // from @llvm-project
+#include "mlir/IR/BuiltinTypes.h"  // from @llvm-project
 #include "mlir/IR/Location.h"  // from @llvm-project
 #include "mlir/IR/MLIRContext.h"  // from @llvm-project
 #include "mlir/IR/Operation.h"  // from @llvm-project
-#include "mlir/IR/StandardTypes.h"  // from @llvm-project
 #include "mlir/IR/TypeSupport.h"  // from @llvm-project
 #include "mlir/IR/Types.h"  // from @llvm-project
 

From 564f72aef83e959cc5cb5e306f7cd888de9223fc Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 11 Dec 2020 19:59:15 -0800
Subject: [PATCH 20/60] [XLA:SPMD] Fix concate at partition dim when last
 operand needs more padding than temp out.

PiperOrigin-RevId: 347120543
Change-Id: Ib327f7d41d30259466c729671515481517d39281
---
 .../xla/service/spmd/spmd_partitioner.cc       | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc
index 3bca04377a1..d541cee1c01 100644
--- a/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc
+++ b/tensorflow/compiler/xla/service/spmd/spmd_partitioner.cc
@@ -1422,9 +1422,25 @@ Status SpmdPartitioningVisitor::HandleConcatenate(HloInstruction* hlo) {
   // temp_output_shape is the output shape where the concatenate dimension
   // is changed to the full (and padded to shard count) dimension size.
   auto temp_output_shape = MakePartitionedShape(hlo->shape(), sharding);
+  auto last_operand_padded_shape =
+      MakePartitionedShape(hlo->operands().back()->shape(), sharding);
+  // If the last operand has more padding than the temp_output padding, needs to
+  // add extra padding to avoid dynamic update slice out of bound.
+  int last_operand_padding =
+      last_operand_padded_shape.dimensions(dimension) *
+          sharding.tile_assignment().dim(dimension) -
+      hlo->operands().back()->shape().dimensions(dimension);
+  int temp_output_padding = temp_output_shape.dimensions(dimension) *
+                                sharding.tile_assignment().dim(dimension) -
+                            hlo->shape().dimensions(dimension);
+  int padding_for_last_operand =
+      last_operand_padding < temp_output_padding
+          ? 0
+          : last_operand_padding - temp_output_padding;
   temp_output_shape.set_dimensions(
       dimension, temp_output_shape.dimensions(dimension) *
-                     sharding.tile_assignment().dim(dimension));
+                         sharding.tile_assignment().dim(dimension) +
+                     padding_for_last_operand);
   auto temp_output = CreateZero(temp_output_shape, &b_);
 
   // Offset of each operand along the concatenate dimension.

From e7696790bb29bdd5701f69d4ca500d5a3d2e0734 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 12 Dec 2020 01:01:33 -0800
Subject: [PATCH 21/60] compat: Update forward compatibility horizon to
 2020-12-12

PiperOrigin-RevId: 347143074
Change-Id: I34661b77a4c94d6d15c772f9ac3dab3309e8fb2a
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 53b58bae5ae..1d9e6eb91df 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 12, 11)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 12, 12)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS"
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 

From 67a9764695429fc5963aeeb905526bfe5af45739 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 12 Dec 2020 01:01:35 -0800
Subject: [PATCH 22/60] Update GraphDef version to 613.

PiperOrigin-RevId: 347143077
Change-Id: Ib95c8699e5da14fec381dcc2f978d8390c12bafa
---
 tensorflow/core/public/version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 08548f04590..3ee2105821f 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 612  // Updated: 2020/12/11
+#define TF_GRAPH_DEF_VERSION 613  // Updated: 2020/12/12
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //

From 894d16da3156e770c39cfc3e1321b6df9fdd49a1 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sat, 12 Dec 2020 22:49:36 -0800
Subject: [PATCH 23/60] Fix wrong TF documentation which cites
 "tf.profiler.ProfilerOptions", which should be
 "tf.profiler.experimental.ProfilerOptions".

PiperOrigin-RevId: 347225204
Change-Id: Ibcc40225f3dcb23c6fbf60d4dd55929f1dc96bfb
---
 tensorflow/python/profiler/profiler_v2.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/profiler/profiler_v2.py b/tensorflow/python/profiler/profiler_v2.py
index 102a510906b..2bd210a6e48 100644
--- a/tensorflow/python/profiler/profiler_v2.py
+++ b/tensorflow/python/profiler/profiler_v2.py
@@ -54,7 +54,7 @@ class ProfilerOptions(
     ])):
   """Options for finer control over the profiler.
 
-  Use `tf.profiler.ProfilerOptions` to control `tf.profiler`
+  Use `tf.profiler.experimental.ProfilerOptions` to control `tf.profiler`
   behavior.
 
   Fields:
@@ -204,8 +204,8 @@ class Profile(object):
 
     Args:
       logdir: profile data will save to this directory.
-      options: An optional tf.profiler.ProfilerOptions can be provided to fine
-        tune the profiler's behavior.
+      options: An optional `tf.profiler.experimental.ProfilerOptions` can be
+        provided to fine tune the profiler's behavior.
     """
     self._logdir = logdir
     self._options = options

From 90d6e2575433591a84239aee3b78e0fe20b58151 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 13 Dec 2020 01:01:41 -0800
Subject: [PATCH 24/60] Update GraphDef version to 614.

PiperOrigin-RevId: 347233123
Change-Id: I9027fb9a820d1a6af1ceceecea424fd861e8227c
---
 tensorflow/core/public/version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index 3ee2105821f..fc3758e928d 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 613  // Updated: 2020/12/12
+#define TF_GRAPH_DEF_VERSION 614  // Updated: 2020/12/13
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //

From 1987dba1a166d0336ccea8176dd2a16a65ddf19a Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 13 Dec 2020 01:01:42 -0800
Subject: [PATCH 25/60] compat: Update forward compatibility horizon to
 2020-12-13

PiperOrigin-RevId: 347233124
Change-Id: Ided819a9c60c33a4b880bf9d45100976d6259446
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index 1d9e6eb91df..d92c999c404 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 12, 12)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 12, 13)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS"
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 

From 92f07a46bbe49a44bbda5e6cf9b6c60630395986 Mon Sep 17 00:00:00 2001
From: Rohan Jain <rohanj@google.com>
Date: Sun, 13 Dec 2020 09:08:51 -0800
Subject: [PATCH 26/60] Removing run_deprecated_v1 and run_v1_only tags on
 tests for array_ops

PiperOrigin-RevId: 347263734
Change-Id: Iafc1e7ce0ac2bed4c7cbb2ac8bbed8a606a80ead
---
 .../python/kernel_tests/array_ops_test.py     | 953 +++++++++---------
 .../python/kernel_tests/v1_compat_tests/BUILD |  12 +
 .../v1_compat_tests/array_ops_test.py         |  88 ++
 3 files changed, 567 insertions(+), 486 deletions(-)
 create mode 100644 tensorflow/python/kernel_tests/v1_compat_tests/array_ops_test.py

diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index 006737f95d7..0e8548ce5cf 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -24,11 +24,11 @@ import unittest
 from absl.testing import parameterized
 import numpy as np
 
-from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import session
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
+from tensorflow.python.framework import config
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -42,7 +42,6 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import gradient_checker_v2
-from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import list_ops
 from tensorflow.python.ops import map_fn
@@ -142,60 +141,52 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
       masked_arr = arr[:, mask]
     elif axis == 2:
       masked_arr = arr[:, :, mask]
-    with self.cached_session():
-      masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis)
+    masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis)
 
-      # Leading dimension size of masked_tensor is always unknown until runtime
-      # since we don't how many elements will be kept.
-      leading = 1 if axis is None else axis + 1
-      self.assertAllEqual(masked_tensor.get_shape()[leading:],
-                          masked_arr.shape[leading:])
+    # Leading dimension size of masked_tensor is always unknown until runtime
+    # since we don't how many elements will be kept.
+    leading = 1 if axis is None else axis + 1
+    self.assertAllEqual(masked_tensor.get_shape()[leading:],
+                        masked_arr.shape[leading:])
 
-      self.assertAllClose(masked_arr, masked_tensor)
+    self.assertAllClose(masked_arr, masked_tensor)
 
-  @test_util.run_deprecated_v1
   def testMaskDim1ArrDim2Axis1(self):
     ndims_mask = 1
     for arr_shape in [(1, 1), (2, 2), (2, 5)]:
       with self.subTest(arr_shape=arr_shape):
         self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1)
 
-  @test_util.run_deprecated_v1
   def testMaskDim2ArrDim2Axis1(self):
     ndims_mask = 2
     for arr_shape in [(1, 1), (2, 2), (2, 5)]:
       with self.subTest(arr_shape=arr_shape):
         self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1)
 
-  @test_util.run_deprecated_v1
   def testMaskDim1ArrDim1(self):
     ndims_mask = 1
     for arr_shape in [(1,), (2,), (3,), (10,)]:
       with self.subTest(arr_shape=arr_shape):
         self.CheckVersusNumpy(ndims_mask, arr_shape)
 
-  @test_util.run_deprecated_v1
   def testMaskDim1ArrDim2(self):
     ndims_mask = 1
     for arr_shape in [(1, 1), (2, 2), (2, 5)]:
       with self.subTest(arr_shape=arr_shape):
         self.CheckVersusNumpy(ndims_mask, arr_shape)
 
-  @test_util.run_deprecated_v1
   def testMaskDim2ArrDim2(self):
     ndims_mask = 2
     for arr_shape in [(1, 1), (2, 2), (2, 5)]:
       with self.subTest(arr_shape=arr_shape):
         self.CheckVersusNumpy(ndims_mask, arr_shape)
 
-  @test_util.run_deprecated_v1
   def testMaskDim2ArrDim3(self):
     ndims_mask = 2
     for arr_shape in [(1, 1, 1), (1, 2, 2), (2, 2, 1)]:
       with self.subTest(arr_shape=arr_shape):
         self.CheckVersusNumpy(ndims_mask, arr_shape)
 
-  @test_util.run_deprecated_v1
   def testEmptyInput2D(self):
     mask = np.array([True, False])
     arr = np.array([[], []]).astype(np.float32)
@@ -205,7 +196,6 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
     with self.cached_session():
       self.assertAllClose(numpy_result, tf_result)
 
-  @test_util.run_deprecated_v1
   def testEmptyInput1D(self):
     mask = np.array([]).astype(bool)
     arr = np.array([]).astype(np.float32)
@@ -215,7 +205,6 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
     with self.cached_session():
       self.assertAllClose(numpy_result, tf_result)
 
-  @test_util.run_deprecated_v1
   def testEmptyOutput(self):
     make_mask = lambda shape: np.zeros(shape, dtype=bool)
     for ndims_mask in range(1, 4):
@@ -225,71 +214,68 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
             arr_shape = np.random.randint(1, 5, size=ndims_arr)
             self.CheckVersusNumpy(ndims_mask, arr_shape, make_mask=make_mask)
 
-  @test_util.run_deprecated_v1
   def testWorksWithDimensionsEqualToNoneDuringGraphBuild(self):
     # The rank of the mask tensor must be specified. This is explained
     # in the docstring as well.
-    with self.cached_session() as sess:
-      ph_tensor = array_ops.placeholder(dtypes.int32, shape=None)
-      ph_mask = array_ops.placeholder(dtypes.bool, shape=[None])
+    @def_function.function
+    def func(ph_tensor, ph_mask):
+      return array_ops.boolean_mask(ph_tensor, ph_mask)
 
-      arr = np.array([[1, 2], [3, 4]])
-      mask = np.array([False, True])
+    f = func.get_concrete_function(
+        tensor_spec.TensorSpec(None, dtypes.int32),
+        tensor_spec.TensorSpec([None], dtypes.bool))
+    arr = np.array([[1, 2], [3, 4]], np.int32)
+    mask = np.array([False, True])
+    masked_tensor = f(arr, mask)
+    self.assertAllEqual(masked_tensor, arr[mask])
 
-      masked_tensor = sess.run(
-          array_ops.boolean_mask(ph_tensor, ph_mask),
-          feed_dict={
-              ph_tensor: arr,
-              ph_mask: mask
-          })
-      np.testing.assert_allclose(masked_tensor, arr[mask])
-
-  @test_util.run_deprecated_v1
   def testMaskDimensionsSetToNoneRaises(self):
     # The rank of the mask tensor must be specified. This is explained
     # in the docstring as well.
-    with self.cached_session():
-      tensor = array_ops.placeholder(dtypes.int32, shape=[None, 2])
-      mask = array_ops.placeholder(dtypes.bool, shape=None)
-      with self.assertRaisesRegex(ValueError, "dimensions must be specified"):
-        array_ops.boolean_mask(tensor, mask)
+    @def_function.function
+    def func(tensor, mask):
+      return array_ops.boolean_mask(tensor, mask)
+
+    with self.assertRaisesRegex(ValueError, "dimensions must be specified"):
+      _ = func.get_concrete_function(
+          tensor_spec.TensorSpec([None, 2], dtypes.int32),
+          tensor_spec.TensorSpec(None, dtypes.bool))
 
   def testMaskHasMoreDimsThanTensorRaises(self):
     mask = [[True, True], [False, False]]
     tensor = [1, 2, 3, 4]
     with self.cached_session():
       with self.assertRaisesRegex(ValueError, "incompatible"):
-        array_ops.boolean_mask(tensor, mask).eval()
+        self.evaluate(array_ops.boolean_mask(tensor, mask))
 
   def testMaskIsScalarRaises(self):
     mask = True
     tensor = 1
     with self.cached_session():
       with self.assertRaisesRegex(ValueError, "mask.*scalar"):
-        array_ops.boolean_mask(tensor, mask).eval()
+        self.evaluate(array_ops.boolean_mask(tensor, mask))
 
   def testMaskShapeDifferentThanFirstPartOfTensorShapeRaises(self):
     mask = [True, True, True]
     tensor = [[1, 2], [3, 4]]
     with self.cached_session():
       with self.assertRaisesRegex(ValueError, "incompatible"):
-        array_ops.boolean_mask(tensor, mask).eval()
+        self.evaluate(array_ops.boolean_mask(tensor, mask))
 
-  @test_util.run_deprecated_v1
   def testStringMask(self):
     # Reproduces b/111171330, where the optimized boolean_mask graph would
     # be incorrectly placed on GPU.
-    with ops.Graph().as_default():
-      tile_placeholder = array_ops.placeholder(dtypes.int32, [2])
-      string_tensor = array_ops.tile([["hello"]], tile_placeholder)
-      bool_tensor = array_ops.tile([[True]], tile_placeholder)
+    config.set_optimizer_experimental_options({"shape_optimization": True})
+
+    @def_function.function
+    def func(tile_input):
+      string_tensor = array_ops.tile([["hello"]], tile_input)
+      bool_tensor = array_ops.tile([[True]], tile_input)
       masked_tensor = array_ops.boolean_mask(string_tensor, bool_tensor)
-      config = config_pb2.ConfigProto()
-      config.graph_options.rewrite_options.shape_optimization = 1
-      config.gpu_options.per_process_gpu_memory_fraction = 0.3
-      with session.Session(config=config) as sess:
-        result = sess.run(masked_tensor, feed_dict={tile_placeholder: [2, 2]})
-        self.assertAllEqual([b"hello", b"hello", b"hello", b"hello"], result)
+      return masked_tensor
+
+    result = func([2, 2])
+    self.assertAllEqual([b"hello", b"hello", b"hello", b"hello"], result)
 
   def testMaskWithAxisTensor(self):
 
@@ -350,13 +336,12 @@ class OperatorShapeTest(test_util.TensorFlowTestCase):
 
 class ReverseV2Test(test_util.TensorFlowTestCase):
 
-  @test_util.run_deprecated_v1
   def testReverse0DimAuto(self):
     x_np = 4
     for use_gpu in [False, True]:
       with self.subTest(use_gpu=use_gpu):
         with self.cached_session(use_gpu=use_gpu):
-          x_tf = array_ops.reverse_v2(x_np, []).eval()
+          x_tf = self.evaluate(array_ops.reverse_v2(x_np, []))
           self.assertAllEqual(x_tf, x_np)
 
   def _reverse1DimAuto(self, np_dtype):
@@ -365,10 +350,10 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
     for use_gpu in [False, True]:
       for axis_dtype in [dtypes.int32, dtypes.int64]:
         with self.subTest(use_gpu=use_gpu, axis_dtype=axis_dtype):
-          with self.cached_session(use_gpu=use_gpu):
-            x_tf = array_ops.reverse_v2(
-                x_np, constant_op.constant([0], dtype=axis_dtype)).eval()
-            self.assertAllEqual(x_tf, np.asarray(x_np)[::-1])
+          x_tf = self.evaluate(
+              array_ops.reverse_v2(x_np,
+                                   constant_op.constant([0], dtype=axis_dtype)))
+          self.assertAllEqual(x_tf, np.asarray(x_np)[::-1])
 
   def _reverse2DimAuto(self, np_dtype):
     x_np = np.array([[1, 200, 3], [4, 5, 60]], dtype=np_dtype)
@@ -378,38 +363,34 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
         for axis_dtype in [dtypes.int32, dtypes.int64]:
           with self.subTest(
               reverse_f=reverse_f, use_gpu=use_gpu, axis_dtype=axis_dtype):
-            with self.cached_session(use_gpu=use_gpu):
-              x_tf_1 = reverse_f(x_np,
-                                 constant_op.constant([0],
-                                                      dtype=axis_dtype)).eval()
-              x_tf_2 = reverse_f(x_np,
-                                 constant_op.constant([-2],
-                                                      dtype=axis_dtype)).eval()
-              x_tf_3 = reverse_f(x_np,
-                                 constant_op.constant([1],
-                                                      dtype=axis_dtype)).eval()
-              x_tf_4 = reverse_f(x_np,
-                                 constant_op.constant([-1],
-                                                      dtype=axis_dtype)).eval()
-              x_tf_5 = reverse_f(x_np,
-                                 constant_op.constant([1, 0],
-                                                      dtype=axis_dtype)).eval()
-              self.assertAllEqual(x_tf_1, np.asarray(x_np)[::-1, :])
-              self.assertAllEqual(x_tf_2, np.asarray(x_np)[::-1, :])
-              self.assertAllEqual(x_tf_3, np.asarray(x_np)[:, ::-1])
-              self.assertAllEqual(x_tf_4, np.asarray(x_np)[:, ::-1])
-              self.assertAllEqual(x_tf_5, np.asarray(x_np)[::-1, ::-1])
+            x_tf_1 = self.evaluate(
+                reverse_f(x_np, constant_op.constant([0], dtype=axis_dtype)))
+            x_tf_2 = self.evaluate(
+                reverse_f(x_np, constant_op.constant([-2], dtype=axis_dtype)))
+            x_tf_3 = self.evaluate(
+                reverse_f(x_np, constant_op.constant([1], dtype=axis_dtype)))
+            x_tf_4 = self.evaluate(
+                reverse_f(x_np, constant_op.constant([-1], dtype=axis_dtype)))
+            x_tf_5 = self.evaluate(
+                reverse_f(x_np, constant_op.constant([1, 0], dtype=axis_dtype)))
+            self.assertAllEqual(x_tf_1, np.asarray(x_np)[::-1, :])
+            self.assertAllEqual(x_tf_2, np.asarray(x_np)[::-1, :])
+            self.assertAllEqual(x_tf_3, np.asarray(x_np)[:, ::-1])
+            self.assertAllEqual(x_tf_4, np.asarray(x_np)[:, ::-1])
+            self.assertAllEqual(x_tf_5, np.asarray(x_np)[::-1, ::-1])
 
   # This test covers the axis validation in the shape function
   # (no eval())
-  @test_util.run_deprecated_v1
   def testInvalidAxis(self):
     x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
-    with self.assertRaisesRegex(ValueError, "is out of valid range"):
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "is out of valid range"):
       array_ops.reverse_v2(x_np, [-30])
-    with self.assertRaisesRegex(ValueError, "is out of valid range"):
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "is out of valid range"):
       array_ops.reverse_v2(x_np, [2])
-    with self.assertRaisesRegex(ValueError, "axis 0 specified more than once"):
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                "axis 0 specified more than once"):
       array_ops.reverse_v2(x_np, [0, -2])
 
   # This is the version of reverse that uses axis indices rather than
@@ -418,24 +399,24 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
   #
   # Note: this test passes placeholder as constant axis is validated
   # in shape function (see testInvalidAxis)
-  @test_util.run_deprecated_v1
   def testInvalid(self):
     x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
-    axis = array_ops.placeholder(dtypes.int32)
-    with self.cached_session():
-      with self.assertRaisesRegex(errors_impl.InvalidArgumentError,
-                                  "is out of.*range"):
-        array_ops.reverse_v2(x_np, axis).eval(feed_dict={axis: [-30]})
-      with self.assertRaisesRegex(errors_impl.InvalidArgumentError,
-                                  "is out of.*range"):
-        array_ops.reverse_v2(x_np, axis).eval(feed_dict={axis: [2]})
-      with self.assertRaisesRegex(
-          errors_impl.InvalidArgumentError,
-          "(axis 0 specified more than once|canonicalized axis 0 was repeated.)"
-      ):
-        array_ops.reverse_v2(x_np, axis).eval(feed_dict={axis: [0, -2]})
 
-  @test_util.run_deprecated_v1
+    @def_function.function
+    def func(ax):
+      return array_ops.reverse_v2(x_np, ax)
+
+    with self.assertRaisesRegex((ValueError, errors_impl.InvalidArgumentError),
+                                "is out of.*range"):
+      func([-30])
+    with self.assertRaisesRegex((ValueError, errors_impl.InvalidArgumentError),
+                                "is out of.*range"):
+      func([2])
+    with self.assertRaisesRegex(
+        (ValueError, errors_impl.InvalidArgumentError),
+        "(axis 0 specified more than once|canonicalized axis 0 was repeated.)"):
+      func([0, -2])
+
   def testReverse1DimAuto(self):
     for dtype in [
         np.uint8, np.int8, np.uint16, np.int16, np.int32, np.int64, np.bool,
@@ -444,7 +425,6 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
     ]:
       self._reverse1DimAuto(dtype)
 
-  @test_util.run_deprecated_v1
   def testReverse2DimAuto(self):
     for dtype in [
         np.uint8, np.int8, np.uint16, np.int16, np.int32, np.int64, np.bool,
@@ -453,75 +433,54 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
     ]:
       self._reverse2DimAuto(dtype)
 
-  @test_util.run_deprecated_v1
-  def testUnknownDims(self):
-    reverse_v2 = array_ops.reverse_v2
-    data_t = array_ops.placeholder(dtypes.float32)
-    axis_known_t = array_ops.placeholder(dtypes.int32, shape=[3])
-    reverse_known_t = reverse_v2(data_t, axis_known_t)
-    # Unlike V1 we cannot know this anymore
-    self.assertEqual(None, reverse_known_t.get_shape().ndims)
-
-    axis_unknown_t = array_ops.placeholder(dtypes.int32)
-    reverse_unknown_t = reverse_v2(data_t, axis_unknown_t)
-    self.assertIs(None, reverse_unknown_t.get_shape().ndims)
-
-    data_2d_t = array_ops.placeholder(dtypes.float32, shape=[None, None])
-    axis_2d_t = array_ops.placeholder(dtypes.int32, shape=[3])
-    reverse_2d_t = reverse_v2(data_2d_t, axis_2d_t)
-    self.assertEqual(2, reverse_2d_t.get_shape().ndims)
-
-  @test_util.run_deprecated_v1
   def testReverseRowsOf3Channels(self):
     """Tests optimized code for reversing rows with last dim size = 3."""
-    with self.session(use_gpu=True):
-      for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
-        for outer_size in (1, 2):
-          for middle_size in list(range(50)) + [100000]:
-            with self.subTest(
-                reverse_f=reverse_f,
-                outer_size=outer_size,
-                middle_size=middle_size):
-              x_np = np.reshape(
-                  np.arange(outer_size * middle_size * 3, dtype=np.float32),
-                  newshape=(outer_size, middle_size, 3))
-              x_tf = reverse_f(x_np, [1]).eval()
-              np_answer = x_np[:, ::-1, :]
-              self.assertAllEqual(x_tf, np_answer)
+    for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
+      for outer_size in (1, 2):
+        for middle_size in list(range(50)) + [100000]:
+          with self.subTest(
+              reverse_f=reverse_f,
+              outer_size=outer_size,
+              middle_size=middle_size,
+              use_gpu=True):
+            x_np = np.reshape(
+                np.arange(outer_size * middle_size * 3, dtype=np.float32),
+                newshape=(outer_size, middle_size, 3))
+            x_tf = self.evaluate(reverse_f(x_np, [1]))
+            np_answer = x_np[:, ::-1, :]
+            self.assertAllEqual(x_tf, np_answer)
 
-  @test_util.run_deprecated_v1
   def testReverseRowsOf4Channels(self):
-    with self.session(use_gpu=True):
-      for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
-        for outer_size in (1, 2):
-          for middle_size in list(range(50)) + [100000]:
-            with self.subTest(
-                reverse_f=reverse_f,
-                outer_size=outer_size,
-                middle_size=middle_size):
-              x_np = np.reshape(
-                  np.arange(outer_size * middle_size * 4, dtype=np.float32),
-                  newshape=(outer_size, middle_size, 4))
-              x_tf = reverse_f(x_np, [1]).eval()
-              np_answer = x_np[:, ::-1, :]
-              self.assertAllEqual(x_tf, np_answer)
+    for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
+      for outer_size in (1, 2):
+        for middle_size in list(range(50)) + [100000]:
+          with self.subTest(
+              reverse_f=reverse_f,
+              outer_size=outer_size,
+              middle_size=middle_size,
+              use_gpu=True):
+            x_np = np.reshape(
+                np.arange(outer_size * middle_size * 4, dtype=np.float32),
+                newshape=(outer_size, middle_size, 4))
+            x_tf = self.evaluate(reverse_f(x_np, [1]))
+            np_answer = x_np[:, ::-1, :]
+            self.assertAllEqual(x_tf, np_answer)
 
-  @test_util.run_deprecated_v1
   def testReverseColumnsOf3Channels(self):
-    with self.session(use_gpu=True):
-      for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
-        for outer_size in list(range(50)) + [100000]:
-          for middle_size in (1, 2):
-            with self.subTest(
-                reverse_f=reverse_f,
-                outer_size=outer_size,
-                middle_size=middle_size):
-              x_np = np.reshape(
-                  np.arange(outer_size * middle_size * 3, dtype=np.float32),
-                  newshape=(outer_size, middle_size, 3))
-              x_tf = reverse_f(x_np, [0]).eval()
-              np_answer = x_np[::-1, :, :]
-              self.assertAllEqual(x_tf, np_answer)
+    for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
+      for outer_size in list(range(50)) + [100000]:
+        for middle_size in (1, 2):
+          with self.subTest(
+              reverse_f=reverse_f,
+              outer_size=outer_size,
+              middle_size=middle_size,
+              use_gpu=True):
+            x_np = np.reshape(
+                np.arange(outer_size * middle_size * 3, dtype=np.float32),
+                newshape=(outer_size, middle_size, 3))
+            x_tf = self.evaluate(reverse_f(x_np, [0]))
+            np_answer = x_np[::-1, :, :]
+            self.assertAllEqual(x_tf, np_answer)
 
   def testReverseInvalidShape(self):
     x = np.ndarray(shape=[0, 1, 1])
@@ -548,12 +507,11 @@ class MeshgridTest(test_util.TensorFlowTestCase):
           x += 1j
         inputs.append(x)
       numpy_out = np.meshgrid(*inputs, indexing=index)
-      with self.cached_session(use_gpu=use_gpu):
+      with test_util.device(use_gpu=use_gpu):
         tf_out = array_ops.meshgrid(*inputs, indexing=index)
         for x_np, x_tf in zip(numpy_out, tf_out):
           self.assertAllEqual(x_np, x_tf)
 
-  @test_util.run_deprecated_v1
   def testCompare(self):
     for t in (np.float16, np.float32, np.float64, np.int32, np.int64,
               np.complex64, np.complex128):
@@ -592,15 +550,15 @@ class StridedSliceChecker(object):
 
     def eval_if_tensor(x):
       try:
-        return x.eval()
-      except AttributeError:
+        return self.test.evaluate(x)
+      except (AttributeError, TypeError, ValueError):
         return x
 
     if isinstance(spec, bool) or \
       (isinstance(spec, ops.Tensor) and spec.dtype == dtypes.bool) or \
       (isinstance(spec, np.ndarray) and spec.dtype == bool) or \
       (isinstance(spec, (list, tuple)) and np.asarray(spec).dtype == bool):
-      tensor = op.eval()
+      tensor = self.test.evaluate(op)
       np_spec = eval_if_tensor(spec)
       self.test.assertAllEqual(self.x_np[np_spec], tensor)
       return tensor
@@ -608,7 +566,7 @@ class StridedSliceChecker(object):
     if not isinstance(spec, (list, tuple)):
       spec = [spec]
 
-    tensor = op.eval()
+    tensor = self.test.evaluate(op)
 
     # Make a numpy spec that pre-evals the tensors
     np_specs = []
@@ -637,34 +595,32 @@ STRIDED_SLICE_TYPES = [
 class StridedSliceTest(test_util.TensorFlowTestCase):
   """Test the strided slice operation with variants of slices."""
 
-  @test_util.run_deprecated_v1
   def test_basic_slice(self):
     for tensor_type in STRIDED_SLICE_TYPES:
-      with self.subTest(tensor_type=tensor_type):
-        with self.cached_session(use_gpu=True):
-          checker = StridedSliceChecker(
-              self, StridedSliceChecker.REF_TENSOR, tensor_type=tensor_type)
-          _ = checker[:, :, :]
-          # Various ways of representing identity slice
-          _ = checker[:, :, :]
-          _ = checker[::, ::, ::]
-          _ = checker[::1, ::1, ::1]
-          # Not zero slice
-          _ = checker[::1, ::5, ::2]
-          # Reverse in each dimension independently
-          _ = checker[::-1, :, :]
-          _ = checker[:, ::-1, :]
-          _ = checker[:, :, ::-1]
-          ## negative index tests i.e. n-2 in first component
-          _ = checker[-2::-1, :, ::1]
-          # negative index tests i.e. n-2 in first component, non-unit stride
-          _ = checker[-2::-1, :, ::2]
+      with self.subTest(tensor_type=tensor_type, use_gpu=True):
+        checker = StridedSliceChecker(
+            self, StridedSliceChecker.REF_TENSOR, tensor_type=tensor_type)
+        _ = checker[:, :, :]
+        # Various ways of representing identity slice
+        _ = checker[:, :, :]
+        _ = checker[::, ::, ::]
+        _ = checker[::1, ::1, ::1]
+        # Not zero slice
+        _ = checker[::1, ::5, ::2]
+        # Reverse in each dimension independently
+        _ = checker[::-1, :, :]
+        _ = checker[:, ::-1, :]
+        _ = checker[:, :, ::-1]
+        ## negative index tests i.e. n-2 in first component
+        _ = checker[-2::-1, :, ::1]
+        # negative index tests i.e. n-2 in first component, non-unit stride
+        _ = checker[-2::-1, :, ::2]
 
-          # Check rank-0 examples
-          checker2 = StridedSliceChecker(self, 5, tensor_type=tensor_type)
-          _ = checker2[None]
-          _ = checker2[...]
-          _ = checker2[tuple()]
+        # Check rank-0 examples
+        checker2 = StridedSliceChecker(self, 5, tensor_type=tensor_type)
+        _ = checker2[None]
+        _ = checker2[...]
+        _ = checker2[tuple()]
 
   def testInt64GPU(self):
     if not test_util.is_gpu_available():
@@ -694,9 +650,8 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       v = variables.Variable([1., 2.])
       v[0]  # pylint: disable=pointless-statement
 
-  @test_util.run_deprecated_v1
   def testDegenerateSlices(self):
-    with self.session(use_gpu=True):
+    with test_util.device(use_gpu=True):
       checker = StridedSliceChecker(self, StridedSliceChecker.REF_TENSOR)
       # degenerate by offering a forward interval with a negative stride
       _ = checker[0:-1:-1, :, :]
@@ -709,15 +664,13 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
                                     StridedSliceChecker.REF_TENSOR_ALIGNED)
       _ = checker[1:0]
 
-  @test_util.run_deprecated_v1
   def testSliceWithUndefinedDimension(self):
     t = constant_op.constant([1, 2, 3])
     d = tensor_shape.Dimension(None)
     self.assertAllEqual(t[d:d:d], t)
 
-  @test_util.run_deprecated_v1
   def testEllipsis(self):
-    with self.session(use_gpu=True):
+    with test_util.device(use_gpu=True):
       raw = [[[[[1, 2], [3, 4], [5, 6]]], [[[7, 8], [9, 10], [11, 12]]]]]
       checker = StridedSliceChecker(self, raw)
 
@@ -733,12 +686,12 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       # ellipsis at middle
       _ = checker[0:1, ..., 0:1]
       # multiple ellipses not allowed
-      with self.assertRaisesRegex(ValueError, "Multiple ellipses"):
+      with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                  "Multiple ellipses"):
         _ = checker[..., :, ...].eval()
 
-  @test_util.run_deprecated_v1
   def testShrink(self):
-    with self.session(use_gpu=True):
+    with test_util.device(use_gpu=True):
       raw = [[[[[1, 2, 4, 5], [5, 6, 7, 8], [9, 10, 11, 12]]],
               [[[13, 14, 15, 16], [17, 18, 19, 20], [21, 22, 23, 24]]]]]
       checker = StridedSliceChecker(self, raw)
@@ -747,17 +700,21 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       _ = checker[:, 0]
       _ = checker[:, :, 0]
 
-  @test_util.run_deprecated_v1
   def testBothNewAxisAndShrink(self):
-    with self.session(use_gpu=True):
-      ones = array_ops.placeholder(shape=[2, 2], dtype=dtypes.int16)
-      self.assertAllEqual(
-          ones[array_ops.newaxis, :,
-               0].eval(feed_dict={ones: [[1, 1], [1, 1]]}), [[1, 1]])
+    with test_util.device(use_gpu=True):
+
+      @def_function.function
+      def func(inp):
+        return inp[array_ops.newaxis, :, 0]
+
+      f = func.get_concrete_function(
+          tensor_spec.TensorSpec([2, 2], dtypes.int16))
+
+      ones = constant_op.constant([[1, 1], [1, 1]], dtypes.int16)
+      self.assertAllEqual([[1, 1]], self.evaluate(f(ones)))
 
-  @test_util.run_deprecated_v1
   def testTensorIndexing(self):
-    with self.session(use_gpu=True):
+    with test_util.device(use_gpu=True):
       raw = [[[[[1, 2, 4, 5], [5, 6, 7, 8], [9, 10, 11, 12]]],
               [[[13, 14, 15, 16], [17, 18, 19, 20], [21, 22, 23, 24]]]]]
       checker = StridedSliceChecker(self, raw, check_type_infer=False)
@@ -785,9 +742,8 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       with self.assertRaisesRegex(TypeError, expected):
         _ = checker[[2.1, -0.7, 1.5]]
 
-  @test_util.run_deprecated_v1
   def testExpand(self):
-    with self.session(use_gpu=True):
+    with test_util.device(use_gpu=True):
       raw = [[[[[1, 2, 4, 5], [5, 6, 7, 8], [9, 10, 11, 12]]],
               [[[13, 14, 15, 16], [17, 18, 19, 20], [21, 22, 23, 24]]]]]
       checker = StridedSliceChecker(self, raw)
@@ -803,18 +759,16 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       # Ellipsis in middle of two newaxis
       _ = checker[np.newaxis, ..., np.newaxis]
 
-  @test_util.run_deprecated_v1
   def testExpandVariable(self):
-    with self.session(use_gpu=True):
+    with test_util.device(use_gpu=True):
       x = variables.Variable(7, dtype=dtypes.int32)
       self.evaluate(x.initializer)
-      y = x[None].eval()
+      y = self.evaluate(x[None])
       self.assertEqual(y.shape, (1,))
       self.assertAllEqual(y, (7,))
 
-  @test_util.run_deprecated_v1
   def testOptimizedCases(self):
-    with self.session(use_gpu=True):
+    with test_util.device(use_gpu=True):
       checker = StridedSliceChecker(self,
                                     StridedSliceChecker.REF_TENSOR_ALIGNED)
       # Identity
@@ -828,9 +782,8 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       # First axis slice
       _ = checker[np.newaxis, 1:]
 
-  @test_util.run_v1_only("currently failing on v2")
   def testMasks(self):
-    with self.session(use_gpu=True):
+    with test_util.device(use_gpu=True):
       scalar = np.array(0)
       # Test tensor type mask
       checker = StridedSliceChecker(self, StridedSliceChecker.REF_TENSOR)
@@ -855,134 +808,225 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       _ = checker2[ops.convert_to_tensor(mask)]
 
 
-class StridedSliceShapeChecker(object):
-
-  def __init__(self, x):
-    self.x = x
-
-  def __getitem__(self, spec):
-    op = self.x.__getitem__(spec)
-    return op.get_shape()
-
-
 class StridedSliceShapeTest(test_util.TensorFlowTestCase):
   """Test the shape inference of StridedSliceShapes."""
 
-  @test_util.run_deprecated_v1
   def testUnknown(self):
-    with self.session(use_gpu=True):
-      uncertain_tensor = array_ops.placeholder(dtypes.float32)
-      a = StridedSliceShapeChecker(uncertain_tensor)
-      a_slice_shape = a[...]
-      self.assertAllEqual(a_slice_shape.ndims, None)
+    with test_util.device(use_gpu=True):
+
+      @def_function.function
+      def f(x):
+        y = x[...]
+        self.assertAllEqual(y.get_shape().ndims, None)
+
+      _ = f.get_concrete_function(tensor_spec.TensorSpec(None, dtypes.float32))
 
   def tensorShapeEqual(self, x, y):
     self.assertTrue(x is not None and y is not None or x is None and y is None)
     self.assertEqual(x.as_list(), y.as_list())
 
-  @test_util.run_deprecated_v1
   def testTensorShapeUncertain(self):
-    with self.session(use_gpu=True):
-      uncertain_tensor = array_ops.placeholder(
-          dtypes.float32, shape=(5, None, 7))
-      a = StridedSliceShapeChecker(uncertain_tensor)
-      self.tensorShapeEqual(a[3:5], tensor_shape.TensorShape([2, None, 7]))
-      self.tensorShapeEqual(a[3:5, :, 4], tensor_shape.TensorShape([2, None]))
-      self.tensorShapeEqual(a[3:5, 3:4, 4], tensor_shape.TensorShape([2, None]))
-      self.tensorShapeEqual(a[3:5, :, 5:10],
-                            tensor_shape.TensorShape([2, None, 2]))
-      self.tensorShapeEqual(a[3:5, :, 50:3],
-                            tensor_shape.TensorShape([2, None, 0]))
-      self.tensorShapeEqual(a[3:5, :, array_ops.newaxis, 50:3,],
-                            tensor_shape.TensorShape([2, None, 1, 0]))
-      self.tensorShapeEqual(a[1:5:2, :, array_ops.newaxis, 50:3,],
-                            tensor_shape.TensorShape([2, None, 1, 0]))
-      self.tensorShapeEqual(a[:5:3, :, array_ops.newaxis, 50:3,],
-                            tensor_shape.TensorShape([2, None, 1, 0]))
-      self.tensorShapeEqual(a[:2:3, :, array_ops.newaxis, 50:3,],
-                            tensor_shape.TensorShape([1, None, 1, 0]))
-      self.tensorShapeEqual(a[::-1, :, array_ops.newaxis, ::-2],
-                            tensor_shape.TensorShape([5, None, 1, 4]))
+    with test_util.device(use_gpu=True):
+
+      @def_function.function
+      def f1(x):
+        y = x[3:5]
+        self.tensorShapeEqual(y.get_shape(),
+                              tensor_shape.TensorShape([2, None, 7]))
+
+      _ = f1.get_concrete_function(
+          tensor_spec.TensorSpec((5, None, 7), dtypes.float32))
+
+      @def_function.function
+      def f2(x):
+        y = x[3:5, :, 4]
+        self.tensorShapeEqual(y.get_shape(), tensor_shape.TensorShape([2,
+                                                                       None]))
+
+      _ = f2.get_concrete_function(
+          tensor_spec.TensorSpec((5, None, 7), dtypes.float32))
+
+      @def_function.function
+      def f3(x):
+        y = x[3:5, 3:4, 4]
+        self.tensorShapeEqual(y.get_shape(), tensor_shape.TensorShape([2,
+                                                                       None]))
+
+      _ = f3.get_concrete_function(
+          tensor_spec.TensorSpec((5, None, 7), dtypes.float32))
+
+      @def_function.function
+      def f4(x):
+        y = x[3:5, :, 5:10]
+        self.tensorShapeEqual(y.get_shape(),
+                              tensor_shape.TensorShape([2, None, 2]))
+
+      _ = f4.get_concrete_function(
+          tensor_spec.TensorSpec((5, None, 7), dtypes.float32))
+
+      @def_function.function
+      def f5(x):
+        y = x[3:5, :, 50:3]
+        self.tensorShapeEqual(y.get_shape(),
+                              tensor_shape.TensorShape([2, None, 0]))
+
+      _ = f5.get_concrete_function(
+          tensor_spec.TensorSpec((5, None, 7), dtypes.float32))
+
+      @def_function.function
+      def f6(x):
+        y = x[3:5, :, array_ops.newaxis, 50:3,]
+        self.tensorShapeEqual(y.get_shape(),
+                              tensor_shape.TensorShape([2, None, 1, 0]))
+
+      _ = f6.get_concrete_function(
+          tensor_spec.TensorSpec((5, None, 7), dtypes.float32))
+
+      @def_function.function
+      def f7(x):
+        y = x[1:5:2, :, array_ops.newaxis, 50:3,]
+        self.tensorShapeEqual(y.get_shape(),
+                              tensor_shape.TensorShape([2, None, 1, 0]))
+
+      _ = f7.get_concrete_function(
+          tensor_spec.TensorSpec((5, None, 7), dtypes.float32))
+
+      @def_function.function
+      def f8(x):
+        y = x[:5:3, :, array_ops.newaxis, 50:3,]
+        self.tensorShapeEqual(y.get_shape(),
+                              tensor_shape.TensorShape([2, None, 1, 0]))
+
+      _ = f8.get_concrete_function(
+          tensor_spec.TensorSpec((5, None, 7), dtypes.float32))
+
+      @def_function.function
+      def f9(x):
+        y = x[:2:3, :, array_ops.newaxis, 50:3,]
+        self.tensorShapeEqual(y.get_shape(),
+                              tensor_shape.TensorShape([1, None, 1, 0]))
+
+      _ = f9.get_concrete_function(
+          tensor_spec.TensorSpec((5, None, 7), dtypes.float32))
+
+      @def_function.function
+      def f10(x):
+        y = x[::-1, :, array_ops.newaxis, ::-2]
+        self.tensorShapeEqual(y.get_shape(),
+                              tensor_shape.TensorShape([5, None, 1, 4]))
+
+      _ = f10.get_concrete_function(
+          tensor_spec.TensorSpec((5, None, 7), dtypes.float32))
 
-  @test_util.run_deprecated_v1
   def testTensorValuedIndexShape(self):
     with self.session(use_gpu=True):
-      defined_shape_tensor = array_ops.placeholder(
-          dtypes.float32, shape=(5, 3, 7))
-      index_value = array_ops.placeholder(dtypes.int32, shape=())
-      a = StridedSliceShapeChecker(defined_shape_tensor)
-      self.tensorShapeEqual(a[index_value], tensor_shape.TensorShape([3, 7]))
-      self.tensorShapeEqual(a[index_value, ::-1],
-                            tensor_shape.TensorShape([3, 7]))
-      self.tensorShapeEqual(a[index_value, ::-2],
-                            tensor_shape.TensorShape([2, 7]))
-      other_scalar = array_ops.placeholder(dtypes.int32, shape=())
-      self.tensorShapeEqual(a[index_value, other_scalar:2],
-                            tensor_shape.TensorShape([None, 7]))
+
+      @def_function.function
+      def f1(x, y):
+        z = x[y]
+        self.tensorShapeEqual(z.get_shape(), tensor_shape.TensorShape([3, 7]))
+
+      _ = f1.get_concrete_function(
+          tensor_spec.TensorSpec((5, 3, 7)),
+          tensor_spec.TensorSpec((), dtypes.int32))
+
+      @def_function.function
+      def f2(x, y):
+        z = x[y, ::-1]
+        self.tensorShapeEqual(z.get_shape(), tensor_shape.TensorShape([3, 7]))
+
+      _ = f2.get_concrete_function(
+          tensor_spec.TensorSpec((5, 3, 7)),
+          tensor_spec.TensorSpec((), dtypes.int32))
+
+      @def_function.function
+      def f3(x, y):
+        z = x[y, ::-2]
+        self.tensorShapeEqual(z.get_shape(), tensor_shape.TensorShape([2, 7]))
+
+      _ = f3.get_concrete_function(
+          tensor_spec.TensorSpec((5, 3, 7)),
+          tensor_spec.TensorSpec((), dtypes.int32))
+
+      @def_function.function
+      def f4(x, y, s):
+        z = x[y, s:2]
+        self.tensorShapeEqual(z.get_shape(), tensor_shape.TensorShape([None,
+                                                                       7]))
+
+      _ = f4.get_concrete_function(
+          tensor_spec.TensorSpec((5, 3, 7)),
+          tensor_spec.TensorSpec((), dtypes.int32),
+          tensor_spec.TensorSpec((), dtypes.int32))
 
 
 class GradSliceChecker(object):
   """Tests that we can compute a gradient for var^2."""
 
-  def __init__(self, test, sess, var, varnp):
+  def __init__(self, test, var, varnp, use_tape):
     self.test = test
-    self.sess = sess
-    self.val = var * var
     self.var = var
     self.varnp = varnp
+    self.use_tape = use_tape
 
   def __getitem__(self, spec):
-    slice_var = self.var[spec]
-    slice_val = self.val[spec]
+    with test_util.AbstractGradientTape(
+        use_tape=self.use_tape, persistent=True) as tape:
+      tape.watch(self.var)
+      val = self.var * self.var
+      slice_var = self.var[spec]
+      slice_val = val[spec]
 
-    # compute analytic 2nd derivative
-    analytic_grad2 = 2 * slice_val
+      # compute analytic 2nd derivative
+      analytic_grad2 = 2 * slice_val
 
-    dy = variables.Variable(
-        array_ops.ones_like(slice_var, dtype=dtypes.float32))
-    assign = dy.assign(slice_var)
-    slice_val_grad, = gradients_impl.gradients(slice_val, self.var, grad_ys=dy)
-    slice_val_grad2, = gradients_impl.gradients(
-        slice_val_grad, dy, grad_ys=self.var)
-    self.sess.run(assign)
+      dy = variables.Variable(
+          array_ops.ones_like(slice_var, dtype=dtypes.float32))
+      assign = dy.assign(slice_var)
+
+      slice_val_grad = tape.gradient(slice_val, self.var, [dy])
+      slice_val_grad2 = tape.gradient(slice_val_grad, dy, [self.var])
+    self.test.evaluate(assign)
     slice_val_grad_evaled, slice_val_grad2_evaled = (
-        self.sess.run([slice_val_grad, slice_val_grad2]))
-    analytic_grad2_evaled = analytic_grad2.eval()
+        self.test.evaluate([slice_val_grad, slice_val_grad2]))
+    analytic_grad2_evaled = self.test.evaluate(analytic_grad2)
     self.test.assertAllEqual(slice_val_grad2_evaled, analytic_grad2_evaled)
 
     # compute analytic gradient for slice
     np_val_grad = (2 * self.varnp * self.varnp)
     np_sliceval_grad = np.zeros(self.var.get_shape())
     if isinstance(spec, ops.Tensor):
-      spec = self.sess.run([spec])
+      spec = self.test.evaluate([spec])
     np_sliceval_grad[spec] = np_val_grad[spec]
     # verify gradient
     self.test.assertAllEqual(slice_val_grad_evaled, np_sliceval_grad)
 
 
-class StridedSliceGradTest(test_util.TensorFlowTestCase):
+class StridedSliceGradTest(test_util.TensorFlowTestCase,
+                           parameterized.TestCase):
   """Test that strided slice's custom gradient produces correct gradients."""
 
-  @test_util.run_v1_only("b/120545219")
-  def testGradient(self):
-    with self.session(use_gpu=True) as sess:
+  @parameterized.parameters(set((True, context.executing_eagerly())))
+  def testGradient(self, use_tape):
+    with test_util.device(use_gpu=True):
       var = variables.Variable(
           array_ops.reshape(
               math_ops.range(1, 97, 1, dtype=dtypes.float32), shape=(6, 4, 4)))
-      init = variables.global_variables_initializer()
-      sess.run(init)
+      self.evaluate(var.initializer)
 
       raw = np.array(range(1, 97, 1)).reshape((6, 4, 4))
-      grad = GradSliceChecker(self, sess, var, raw)
+      grad = GradSliceChecker(self, var, raw, use_tape)
       _ = grad[2:6:2, 1:3, 1:3]
       _ = grad[3:0:-2, 1:3, 1:3]
       _ = grad[3:0:-2, array_ops.newaxis, 1:3, 2, array_ops.newaxis]
       _ = grad[3:0:-2, 1:3, 2]
       _ = grad[:, -1, :]
       _ = grad[:, -2, :]
-      with self.assertRaisesRegex(ValueError, "out of bounds"):
+      with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                  "out of bounds"):
         _ = grad[:, -200, :]
-      with self.assertRaisesRegex(ValueError, "out of bounds"):
+      with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                  "out of bounds"):
         _ = grad[:, 200, :]
 
       # Test numpy array type mask
@@ -990,75 +1034,64 @@ class StridedSliceGradTest(test_util.TensorFlowTestCase):
       # Test tensor type mask
       _ = grad[ops.convert_to_tensor(raw) <= 76]
 
-  @test_util.run_v1_only("b/120545219")
-  def testGradientZero(self):
-    with self.session(use_gpu=True) as sess:
+  @parameterized.parameters(set((True, context.executing_eagerly())))
+  def testGradientZero(self, use_tape):
+    with test_util.device(use_gpu=True):
       var = variables.Variable(8.)
-      init = variables.global_variables_initializer()
-      sess.run(init)
-      grad = GradSliceChecker(self, sess, var, np.array(8))
+      self.evaluate(var.initializer)
+      grad = GradSliceChecker(self, var, np.array(8), use_tape)
       _ = grad[tuple()]
 
-  @test_util.run_deprecated_v1
-  def testInt64Indices(self):
-    with self.session(use_gpu=True) as sess:
+  @parameterized.parameters(set((True, context.executing_eagerly())))
+  def testInt64Indices(self, use_tape):
+    with test_util.AbstractGradientTape(use_tape=use_tape) as tape:
       a = math_ops.range(3, dtype=dtypes.float32)
+      tape.watch(a)
       index = constant_op.constant(1, dtype=dtypes.int64)
       b = 2. * a[index]
-      grad, = gradients_impl.gradients(b, a)
-      self.assertAllEqual(self.evaluate(grad), [0., 2., 0.])
+    grad = tape.gradient(b, a)
+    self.assertAllEqual(self.evaluate(grad), [0., 2., 0.])
 
 
 class StridedSliceGradTypeTest(test_util.TensorFlowTestCase):
   """Test varied index types and host located memory."""
 
-  @test_util.run_deprecated_v1
   def testHostVsDevice(self):
-    with self.session(use_gpu=True) as sess:
-      var2 = variables.Variable(
-          array_ops.reshape(
-              math_ops.cast(math_ops.range(1, 5, 1), dtypes.float32),
-              shape=(4, 1, 1)))
-      varshape = variables.Variable([6, 4, 4], dtype=dtypes.int32)
-      self.evaluate(variables.global_variables_initializer())
-      begin = constant_op.constant([0, 0, 0])
-      end = constant_op.constant([4, 1, 1])
-      strides = constant_op.constant([1, 1, 1])
-      foo = array_ops.strided_slice_grad(varshape, begin, end, strides, var2)
-      sess.run(foo)
+    var2 = variables.Variable(
+        array_ops.reshape(
+            math_ops.cast(math_ops.range(1, 5, 1), dtypes.float32),
+            shape=(4, 1, 1)))
+    varshape = variables.Variable([6, 4, 4], dtype=dtypes.int32)
+    begin = constant_op.constant([0, 0, 0])
+    end = constant_op.constant([4, 1, 1])
+    strides = constant_op.constant([1, 1, 1])
+    foo = array_ops.strided_slice_grad(varshape, begin, end, strides, var2)
+    self.evaluate(var2.initializer)
+    self.evaluate(varshape.initializer)
+    self.evaluate(foo)
 
-  @test_util.run_deprecated_v1
   def testInt64Shape(self):
-    with self.session(use_gpu=True) as sess:
-      original_dy = array_ops.reshape(
-          math_ops.cast(math_ops.range(1, 5, 1), dtypes.float32),
-          shape=(4, 1, 1))
-      original_shape = constant_op.constant([6, 4, 4], dtype=dtypes.int64)
-      self.evaluate(variables.global_variables_initializer())
-      begin = constant_op.constant([0, 0, 0], dtype=dtypes.int64)
-      end = constant_op.constant([4, 1, 1], dtype=dtypes.int64)
-      strides = constant_op.constant([1, 1, 1], dtype=dtypes.int64)
+    original_dy = array_ops.reshape(
+        math_ops.cast(math_ops.range(1, 5, 1), dtypes.float32), shape=(4, 1, 1))
+    original_shape = constant_op.constant([6, 4, 4], dtype=dtypes.int64)
+    begin = constant_op.constant([0, 0, 0], dtype=dtypes.int64)
+    end = constant_op.constant([4, 1, 1], dtype=dtypes.int64)
+    strides = constant_op.constant([1, 1, 1], dtype=dtypes.int64)
+    dx = array_ops.strided_slice_grad(original_shape, begin, end, strides,
+                                      original_dy)
+    self.evaluate(dx)
+
+  def testMixedIndexTypes(self):
+    original_dy = array_ops.reshape(
+        math_ops.cast(math_ops.range(1, 5, 1), dtypes.float32), shape=(4, 1, 1))
+    original_shape = constant_op.constant([6, 4, 4], dtype=dtypes.int64)
+    begin = constant_op.constant([0, 0, 0], dtype=dtypes.int32)
+    end = constant_op.constant([4, 1, 1], dtype=dtypes.int64)
+    strides = constant_op.constant([1, 1, 1], dtype=dtypes.int64)
+    with self.assertRaises((TypeError, errors_impl.InvalidArgumentError)):
       dx = array_ops.strided_slice_grad(original_shape, begin, end, strides,
                                         original_dy)
-      sess.run(dx)
-
-  @test_util.run_deprecated_v1
-  def testMixedIndexTypes(self):
-    with self.session(use_gpu=True) as sess:
-      original_dy = array_ops.reshape(
-          math_ops.cast(math_ops.range(1, 5, 1), dtypes.float32),
-          shape=(4, 1, 1))
-      original_shape = constant_op.constant([6, 4, 4], dtype=dtypes.int64)
-      self.evaluate(variables.global_variables_initializer())
-      begin = constant_op.constant([0, 0, 0], dtype=dtypes.int32)
-      end = constant_op.constant([4, 1, 1], dtype=dtypes.int64)
-      strides = constant_op.constant([1, 1, 1], dtype=dtypes.int64)
-      with self.assertRaisesRegex(
-          TypeError, "Input 'begin' of 'StridedSliceGrad' Op has type int32"
-          " that does not match type int64 of argument 'shape'"):
-        dx = array_ops.strided_slice_grad(original_shape, begin, end, strides,
-                                          original_dy)
-        sess.run(dx)
+      self.evaluate(dx)
 
 
 class BenchmarkSlice(object):
@@ -1133,16 +1166,16 @@ class StridedSliceAssignChecker(object):
     if self.tensor_type.is_complex:
       value -= 1j * value
 
-    with self.test.test_session(use_gpu=True) as sess:
+    with test_util.device(use_gpu=True):
       if self._use_resource:
         var = resource_variable_ops.ResourceVariable(self.x)
       else:
         var = variables.Variable(self.x)
-      sess.run(variables.variables_initializer([var]))
-      val = sess.run(var[index].assign(value))
+      self.test.evaluate(var.initializer)
+      val = self.test.evaluate(var[index].assign(value))
       # val_copy is used to check that tf.compat.v1.assign works equivalently
       # to the assign method above.
-      val_copy = sess.run(state_ops.assign(var[index], value))
+      val_copy = self.test.evaluate(state_ops.assign(var[index], value))
       valnp = np.copy(self.x_np)
       valnp[index] = np.array(value)
       self.test.assertAllEqual(val, valnp)
@@ -1187,48 +1220,34 @@ class SliceAssignTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     checker2[...] = 6  # ellipsis
     checker2[None] = [6]  # new axis
 
-  @test_util.run_deprecated_v1
   @test_util.disable_xla("b/123559667")
   def testSliceAssign(self):
     self.doTestSliceAssign(use_resource=False)
 
-  @test_util.run_deprecated_v1
   @test_util.disable_xla("b/123559667")
   def testSliceAssignResource(self):
     self.doTestSliceAssign(use_resource=True)
 
-  @test_util.run_v1_only("b/120545219")
-  def testUninitialized(self):
-    with self.assertRaisesRegex(
-        errors.FailedPreconditionError,
-        "Attempting to use uninitialized value Variable"):
-      with self.cached_session() as sess:
-        v = variables.VariableV1([1, 2])
-        sess.run(v[:].assign([1, 2]))
-
-  @test_util.run_v1_only("b/120545219")
   def testTypeError(self):
     init_val = constant_op.constant([1, 2], dtype=dtypes.int32)
     too_small_val = constant_op.constant([3, 4], dtype=dtypes.int8)
     too_large_val = constant_op.constant([3, 4], dtype=dtypes.int64)
     v = variables.VariableV1(init_val)
-    with self.assertRaises(TypeError):
-      v[:].assign(too_small_val)
-    with self.assertRaises(TypeError):
-      v[:].assign(too_large_val)
+    with self.assertRaises((ValueError, TypeError)):
+      self.evaluate(v[:].assign(too_small_val))
+    with self.assertRaises((ValueError, TypeError)):
+      self.evaluate(v[:].assign(too_large_val))
 
-  @test_util.run_deprecated_v1
   def testTypeErrorResource(self):
     init_val = constant_op.constant([1, 2], dtype=dtypes.int32)
     too_small_val = constant_op.constant([3, 4], dtype=dtypes.int8)
     too_large_val = constant_op.constant([3, 4], dtype=dtypes.int64)
     v = resource_variable_ops.ResourceVariable(init_val)
-    with self.cached_session() as sess:
-      self.evaluate(v.initializer)
-      with self.assertRaises(ValueError):
-        sess.run(v[:].assign(too_large_val))
-      with self.assertRaises(ValueError):
-        sess.run(v[:].assign(too_small_val))
+    self.evaluate(v.initializer)
+    with self.assertRaises(ValueError):
+      self.evaluate(v[:].assign(too_large_val))
+    with self.assertRaises(ValueError):
+      self.evaluate(v[:].assign(too_small_val))
 
   @test_util.disable_xla("b/123559667")
   @test_util.run_in_graph_and_eager_modes
@@ -1326,61 +1345,46 @@ class SequenceMaskTest(test_util.TensorFlowTestCase):
       with self.assertRaisesRegex(ValueError, "maxlen must be scalar"):
         array_ops.sequence_mask([10, 20], [10, 20])
 
-  @test_util.run_deprecated_v1
   def testOneDimensionalWithMaxlen(self):
-    with self.cached_session():
-      res = array_ops.sequence_mask(constant_op.constant([1, 3, 2]), 5)
-      self.assertAllEqual(res.get_shape(), [3, 5])
-      self.assertAllEqual(
-          res,
-          [[True, False, False, False, False], [True, True, True, False, False],
-           [True, True, False, False, False]])
+    res = array_ops.sequence_mask(constant_op.constant([1, 3, 2]), 5)
+    self.assertAllEqual(res.get_shape(), [3, 5])
+    self.assertAllEqual(
+        res,
+        [[True, False, False, False, False], [True, True, True, False, False],
+         [True, True, False, False, False]])
 
-  @test_util.run_deprecated_v1
   def testOneDimensionalDtypeWithoutMaxlen(self):
-    with self.cached_session():
-      # test dtype and default maxlen:
-      res = array_ops.sequence_mask(
-          constant_op.constant([0, 1, 4]), dtype=dtypes.float32)
-      self.assertAllEqual(res.get_shape().as_list(), [3, 4])
-      self.assertAllEqual(
-          res,
-          [[0.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0]])
+    # test dtype and default maxlen:
+    res = array_ops.sequence_mask(
+        constant_op.constant([0, 1, 4]), dtype=dtypes.float32)
+    self.assertAllEqual(res.get_shape().as_list(), [3, 4])
+    self.assertAllEqual(
+        res, [[0.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0]])
 
-  @test_util.run_deprecated_v1
   def testOneDimensionalWithoutMaxlen(self):
-    with self.cached_session():
-      res = array_ops.sequence_mask(constant_op.constant([0, 1, 4]))
-      self.assertAllEqual(res.get_shape().as_list(), [3, 4])
-      self.assertAllEqual(
-          res, [[False, False, False, False], [True, False, False, False],
-                [True, True, True, True]])
+    res = array_ops.sequence_mask(constant_op.constant([0, 1, 4]))
+    self.assertAllEqual(res.get_shape().as_list(), [3, 4])
+    self.assertAllEqual(res,
+                        [[False, False, False, False],
+                         [True, False, False, False], [True, True, True, True]])
 
-  @test_util.run_deprecated_v1
   def testTwoDimensional(self):
-    with self.cached_session():
-      res = array_ops.sequence_mask(constant_op.constant([[1, 3, 2]]), 5)
-      self.assertAllEqual(res.get_shape(), [1, 3, 5])
-      self.assertAllEqual(res, [[[True, False, False, False, False],
-                                 [True, True, True, False, False],
-                                 [True, True, False, False, False]]])
+    res = array_ops.sequence_mask(constant_op.constant([[1, 3, 2]]), 5)
+    self.assertAllEqual(res.get_shape(), [1, 3, 5])
+    self.assertAllEqual(
+        res,
+        [[[True, False, False, False, False], [True, True, True, False, False],
+          [True, True, False, False, False]]])
 
-      # test dtype and default maxlen:
-      res = array_ops.sequence_mask(
-          constant_op.constant([[0, 1, 4], [1, 2, 3]]), dtype=dtypes.float32)
-      self.assertAllEqual(res.get_shape().as_list(), [2, 3, 4])
-      self.assertAllEqual(
-          res,
-          [[[0.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0]],
-           [[1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 0.0, 0.0], [1.0, 1.0, 1.0, 0.0]]])
+    # test dtype and default maxlen:
+    res = array_ops.sequence_mask(
+        constant_op.constant([[0, 1, 4], [1, 2, 3]]), dtype=dtypes.float32)
+    self.assertAllEqual(res.get_shape().as_list(), [2, 3, 4])
+    self.assertAllEqual(
+        res,
+        [[[0.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0]],
+         [[1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 0.0, 0.0], [1.0, 1.0, 1.0, 0.0]]])
 
-  @test_util.run_deprecated_v1
-  def testUnknownShape(self):
-    lengths = array_ops.placeholder(dtype=dtypes.int32)
-    res = array_ops.sequence_mask(lengths)
-    self.assertEqual(res.shape, None)
-
-  @test_util.run_deprecated_v1
   def testDtypes(self):
 
     def check_dtypes(lengths_dtype, maxlen_dtype):
@@ -1393,11 +1397,10 @@ class SequenceMaskTest(test_util.TensorFlowTestCase):
           [[True, False, False, False, False], [True, True, True, False, False],
            [True, True, False, False, False]])
 
-    with self.cached_session():
-      check_dtypes(dtypes.int32, dtypes.int32)
-      check_dtypes(dtypes.int32, dtypes.int64)
-      check_dtypes(dtypes.int64, dtypes.int32)
-      check_dtypes(dtypes.int64, dtypes.int64)
+    check_dtypes(dtypes.int32, dtypes.int32)
+    check_dtypes(dtypes.int32, dtypes.int64)
+    check_dtypes(dtypes.int64, dtypes.int32)
+    check_dtypes(dtypes.int64, dtypes.int64)
 
   def testOutputDtype(self):
 
@@ -1431,7 +1434,6 @@ class SequenceMaskTest(test_util.TensorFlowTestCase):
 class ConcatSliceResourceTest(test_util.TensorFlowTestCase):
 
   @test_util.run_in_graph_and_eager_modes
-  @test_util.run_deprecated_v1
   def testConcatSlice(self):
     r1 = test_ops.stub_resource_handle_op(container="a", shared_name="b")
     r2 = test_ops.stub_resource_handle_op(container="a", shared_name="c")
@@ -1510,15 +1512,13 @@ class PadTest(test_util.TensorFlowTestCase):
 
 class InvertPermutationTest(test_util.TensorFlowTestCase):
 
-  @test_util.run_deprecated_v1
   def testInvertPermutation(self):
     for dtype in [dtypes.int32, dtypes.int64]:
-      with self.subTest(dtype=dtype):
-        with self.cached_session(use_gpu=True):
-          x = constant_op.constant([3, 4, 0, 2, 1], dtype=dtype)
-          y = array_ops.invert_permutation(x)
-          self.assertAllEqual(y.get_shape(), [5])
-          self.assertAllEqual(y, [2, 4, 3, 0, 1])
+      with self.subTest(dtype=dtype, use_gpu=True):
+        x = constant_op.constant([3, 4, 0, 2, 1], dtype=dtype)
+        y = array_ops.invert_permutation(x)
+        self.assertAllEqual(y.get_shape(), [5])
+        self.assertAllEqual(y, [2, 4, 3, 0, 1])
 
 
 class UnravelIndexTest(test_util.TensorFlowTestCase):
@@ -1557,50 +1557,43 @@ class UnravelIndexTest(test_util.TensorFlowTestCase):
 
 class GuaranteeConstOpTest(test_util.TensorFlowTestCase):
 
-  @test_util.run_deprecated_v1
   def testSimple(self):
-    with self.cached_session():
-      a = array_ops.constant(10)
-      guarantee_a = array_ops.guarantee_const(a)
-      self.assertEqual(10, self.evaluate(guarantee_a))
+    a = array_ops.constant(10)
+    guarantee_a = array_ops.guarantee_const(a)
+    self.assertEqual(10, self.evaluate(guarantee_a))
 
-  @test_util.run_deprecated_v1
   def testVariables(self):
-    with self.cached_session() as sess:
-      for use_resource in [False, True]:
-        with self.subTest(use_resource=use_resource):
-          a = variable_scope.get_variable(
-              "var_{}".format(use_resource), [],
-              initializer=init_ops.constant_initializer(10.0),
-              use_resource=use_resource)
-          guarantee_a = array_ops.guarantee_const(a)
-          self.evaluate(variables.global_variables_initializer())
-          self.assertEqual(10.0, self.evaluate(guarantee_a))
+    for use_resource in [False, True]:
+      with self.subTest(use_resource=use_resource):
+        a = variable_scope.get_variable(
+            "var_{}".format(use_resource), [],
+            initializer=init_ops.constant_initializer(10.0),
+            use_resource=use_resource)
+        guarantee_a = array_ops.guarantee_const(a)
+        self.evaluate(a.initializer)
+        self.assertEqual(10.0, self.evaluate(guarantee_a))
 
-  @test_util.run_deprecated_v1
   def testResourceRejection(self):
-    with self.cached_session() as sess:
+    with ops.device("/cpu:0"):
       a = variable_scope.get_variable(
           "resource_var", [],
           initializer=init_ops.constant_initializer(10.0),
           use_resource=True)
+    with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError,
+                                             "cannot be a resource variable"):
       guarantee_a = array_ops.guarantee_const(a.handle)
-      self.evaluate(variables.global_variables_initializer())
-      with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError,
-                                               "cannot be a resource variable"):
-        self.evaluate(guarantee_a)
+      self.evaluate(a.initializer)
+      self.evaluate(guarantee_a)
 
 
 class SnapshotOpTest(test_util.TensorFlowTestCase):
 
-  @test_util.run_deprecated_v1
   def testInvertPermutation(self):
     for dtype in [dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]:
-      with self.subTest(dtype=dtype):
-        with self.cached_session(use_gpu=True):
-          x = constant_op.constant([0, 1, 2, 3], dtype=dtype)
-          y = gen_array_ops.snapshot(x)
-          self.assertAllEqual(y, [0, 1, 2, 3])
+      with self.subTest(dtype=dtype, use_gpu=True):
+        x = constant_op.constant([0, 1, 2, 3], dtype=dtype)
+        y = gen_array_ops.snapshot(x)
+        self.assertAllEqual(y, [0, 1, 2, 3])
 
 
 @test_util.run_all_in_graph_and_eager_modes
@@ -2059,7 +2052,6 @@ class BatchGatherNdTest(test_util.TensorFlowTestCase):
     with self.assertRaises(ValueError):
       array_ops.batch_gather_nd(params=params, indices=indices, batch_dims=4)
 
-  @test_util.run_deprecated_v1
   def testNoneBatchDimensions(self):
     """Tests gather_nd works with None dimensions."""
     shapes = []
@@ -2086,19 +2078,18 @@ class BatchGatherNdTest(test_util.TensorFlowTestCase):
         params_ph_shape[i] = None
         indices_ph_shape[i] = None
 
-      params = array_ops.placeholder(dtypes.float32, shape=params_ph_shape)
-      indices = array_ops.placeholder(dtypes.int32, shape=indices_ph_shape)
-      out = array_ops.batch_gather_nd(
-          params=params, indices=indices, batch_dims=batch_dims)
+      @def_function.function
+      def func(params, indices):
+        return array_ops.batch_gather_nd(
+            params=params, indices=indices, batch_dims=batch_dims)  # pylint: disable=cell-var-from-loop
 
-      with self.cached_session() as sess:
-        params_val = np.ones(dtype=np.float32, shape=params_shape)
-        indices_val = np.ones(dtype=np.int32, shape=indices_shape)
-        res = sess.run(
-            out, feed_dict={
-                params: params_val,
-                indices: indices_val
-            })
+      f = func.get_concrete_function(
+          tensor_spec.TensorSpec(params_ph_shape, dtypes.float32),
+          tensor_spec.TensorSpec(indices_ph_shape, dtypes.int32))
+
+      params_val = np.ones(dtype=np.float32, shape=params_shape)
+      indices_val = np.ones(dtype=np.int32, shape=indices_shape)
+      res = f(params_val, indices_val)
       row_ndims = len(params_shape) - batch_dims - indices_shape[-1]
       expected_out_shape = indices_shape[:-1]
       if row_ndims > 0:
@@ -2106,16 +2097,6 @@ class BatchGatherNdTest(test_util.TensorFlowTestCase):
 
       self.assertSequenceEqual(res.shape, expected_out_shape)
 
-  @test_util.run_deprecated_v1
-  def testUnknownIndices(self):
-    """Tests whether indices with unknown rank works correctly."""
-    params = constant_op.constant(((0, 1, 2),))
-    indices = array_ops.placeholder(dtypes.int32)
-    gather_nd_t = array_ops.gather_nd(params, indices, batch_dims=1)
-    shape = gather_nd_t.get_shape()
-    self.assertEqual(None, shape.ndims)
-    self.assertEqual(None, tensor_shape.dimension_value(shape[0]))
-
 
 @test_util.run_all_in_graph_and_eager_modes
 class RepeatTest(test_util.TensorFlowTestCase, parameterized.TestCase):
diff --git a/tensorflow/python/kernel_tests/v1_compat_tests/BUILD b/tensorflow/python/kernel_tests/v1_compat_tests/BUILD
index bd9c02d8101..e5512eb133e 100644
--- a/tensorflow/python/kernel_tests/v1_compat_tests/BUILD
+++ b/tensorflow/python/kernel_tests/v1_compat_tests/BUILD
@@ -42,3 +42,15 @@ cuda_py_test(
         "//tensorflow/python:session_ops",
     ],
 )
+
+cuda_py_test(
+    name = "array_ops_test",
+    size = "small",
+    srcs = ["array_ops_test.py"],
+    deps = [
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:framework_test_lib",
+    ],
+)
diff --git a/tensorflow/python/kernel_tests/v1_compat_tests/array_ops_test.py b/tensorflow/python/kernel_tests/v1_compat_tests/array_ops_test.py
new file mode 100644
index 00000000000..2203c4b0723
--- /dev/null
+++ b/tensorflow/python/kernel_tests/v1_compat_tests/array_ops_test.py
@@ -0,0 +1,88 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for array_ops that only work in V1."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test as test_lib
+
+
+class ReverseV2Test(test_util.TensorFlowTestCase):
+
+  # Pure shape inference test only V1
+  @test_util.run_deprecated_v1
+  def testUnknownDims(self):
+    reverse_v2 = array_ops.reverse_v2
+    data_t = array_ops.placeholder(dtypes.float32)
+    axis_known_t = array_ops.placeholder(dtypes.int32, shape=[3])
+    reverse_known_t = reverse_v2(data_t, axis_known_t)
+    # Unlike V1 we cannot know this anymore
+    self.assertIsNone(reverse_known_t.get_shape().ndims)
+
+    axis_unknown_t = array_ops.placeholder(dtypes.int32)
+    reverse_unknown_t = reverse_v2(data_t, axis_unknown_t)
+    self.assertIs(None, reverse_unknown_t.get_shape().ndims)
+
+    data_2d_t = array_ops.placeholder(dtypes.float32, shape=[None, None])
+    axis_2d_t = array_ops.placeholder(dtypes.int32, shape=[3])
+    reverse_2d_t = reverse_v2(data_2d_t, axis_2d_t)
+    self.assertEqual(2, reverse_2d_t.get_shape().ndims)
+
+
+class SequenceMaskTest(test_util.TensorFlowTestCase):
+
+  # Pure shape inference test only V1
+  @test_util.run_deprecated_v1
+  def testUnknownShape(self):
+    lengths = array_ops.placeholder(dtype=dtypes.int32)
+    res = array_ops.sequence_mask(lengths)
+    self.assertEqual(res.shape, None)  # pylint: disable=g-generic-assert
+
+
+class BatchGatherNdTest(test_util.TensorFlowTestCase):
+
+  # Pure shape inference test only V1
+  @test_util.run_deprecated_v1
+  def testUnknownIndices(self):
+    """Tests whether indices with unknown rank works correctly."""
+    params = constant_op.constant(((0, 1, 2),))
+    indices = array_ops.placeholder(dtypes.int32)
+    gather_nd_t = array_ops.gather_nd(params, indices, batch_dims=1)
+    shape = gather_nd_t.get_shape()
+    self.assertIsNone(shape.ndims)
+    self.assertIsNone(tensor_shape.dimension_value(shape[0]))
+
+
+class SliceAssignTest(test_util.TensorFlowTestCase):
+
+  @test_util.run_v1_only("Variables need initialization only in V1")
+  def testUninitialized(self):
+    with self.assertRaisesRegex(
+        errors.FailedPreconditionError,
+        "Attempting to use uninitialized value Variable"):
+      v = variables.VariableV1([1, 2])
+      self.evaluate(v[:].assign([1, 2]))
+
+
+if __name__ == "__main__":
+  test_lib.main()

From 3727302f03a495b665efda24a845c4b30a7cffe9 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Sun, 13 Dec 2020 10:18:02 -0800
Subject: [PATCH 27/60] Removing run_deprecated_v1 and run_v1_only tags on
 tests for array_ops

PiperOrigin-RevId: 347267934
Change-Id: Id8d1edfa7ff51c1ba3d9fc496d51a981a19c2ba9
---
 .../python/kernel_tests/array_ops_test.py     | 953 +++++++++---------
 .../python/kernel_tests/v1_compat_tests/BUILD |  12 -
 .../v1_compat_tests/array_ops_test.py         |  88 --
 3 files changed, 486 insertions(+), 567 deletions(-)
 delete mode 100644 tensorflow/python/kernel_tests/v1_compat_tests/array_ops_test.py

diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py
index 0e8548ce5cf..006737f95d7 100644
--- a/tensorflow/python/kernel_tests/array_ops_test.py
+++ b/tensorflow/python/kernel_tests/array_ops_test.py
@@ -24,11 +24,11 @@ import unittest
 from absl.testing import parameterized
 import numpy as np
 
+from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import session
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
-from tensorflow.python.framework import config
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -42,6 +42,7 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import gradient_checker_v2
+from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import list_ops
 from tensorflow.python.ops import map_fn
@@ -141,52 +142,60 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
       masked_arr = arr[:, mask]
     elif axis == 2:
       masked_arr = arr[:, :, mask]
-    masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis)
+    with self.cached_session():
+      masked_tensor = array_ops.boolean_mask(arr, mask, axis=axis)
 
-    # Leading dimension size of masked_tensor is always unknown until runtime
-    # since we don't how many elements will be kept.
-    leading = 1 if axis is None else axis + 1
-    self.assertAllEqual(masked_tensor.get_shape()[leading:],
-                        masked_arr.shape[leading:])
+      # Leading dimension size of masked_tensor is always unknown until runtime
+      # since we don't how many elements will be kept.
+      leading = 1 if axis is None else axis + 1
+      self.assertAllEqual(masked_tensor.get_shape()[leading:],
+                          masked_arr.shape[leading:])
 
-    self.assertAllClose(masked_arr, masked_tensor)
+      self.assertAllClose(masked_arr, masked_tensor)
 
+  @test_util.run_deprecated_v1
   def testMaskDim1ArrDim2Axis1(self):
     ndims_mask = 1
     for arr_shape in [(1, 1), (2, 2), (2, 5)]:
       with self.subTest(arr_shape=arr_shape):
         self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1)
 
+  @test_util.run_deprecated_v1
   def testMaskDim2ArrDim2Axis1(self):
     ndims_mask = 2
     for arr_shape in [(1, 1), (2, 2), (2, 5)]:
       with self.subTest(arr_shape=arr_shape):
         self.CheckVersusNumpy(ndims_mask, arr_shape, axis=1)
 
+  @test_util.run_deprecated_v1
   def testMaskDim1ArrDim1(self):
     ndims_mask = 1
     for arr_shape in [(1,), (2,), (3,), (10,)]:
       with self.subTest(arr_shape=arr_shape):
         self.CheckVersusNumpy(ndims_mask, arr_shape)
 
+  @test_util.run_deprecated_v1
   def testMaskDim1ArrDim2(self):
     ndims_mask = 1
     for arr_shape in [(1, 1), (2, 2), (2, 5)]:
       with self.subTest(arr_shape=arr_shape):
         self.CheckVersusNumpy(ndims_mask, arr_shape)
 
+  @test_util.run_deprecated_v1
   def testMaskDim2ArrDim2(self):
     ndims_mask = 2
     for arr_shape in [(1, 1), (2, 2), (2, 5)]:
       with self.subTest(arr_shape=arr_shape):
         self.CheckVersusNumpy(ndims_mask, arr_shape)
 
+  @test_util.run_deprecated_v1
   def testMaskDim2ArrDim3(self):
     ndims_mask = 2
     for arr_shape in [(1, 1, 1), (1, 2, 2), (2, 2, 1)]:
       with self.subTest(arr_shape=arr_shape):
         self.CheckVersusNumpy(ndims_mask, arr_shape)
 
+  @test_util.run_deprecated_v1
   def testEmptyInput2D(self):
     mask = np.array([True, False])
     arr = np.array([[], []]).astype(np.float32)
@@ -196,6 +205,7 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
     with self.cached_session():
       self.assertAllClose(numpy_result, tf_result)
 
+  @test_util.run_deprecated_v1
   def testEmptyInput1D(self):
     mask = np.array([]).astype(bool)
     arr = np.array([]).astype(np.float32)
@@ -205,6 +215,7 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
     with self.cached_session():
       self.assertAllClose(numpy_result, tf_result)
 
+  @test_util.run_deprecated_v1
   def testEmptyOutput(self):
     make_mask = lambda shape: np.zeros(shape, dtype=bool)
     for ndims_mask in range(1, 4):
@@ -214,68 +225,71 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
             arr_shape = np.random.randint(1, 5, size=ndims_arr)
             self.CheckVersusNumpy(ndims_mask, arr_shape, make_mask=make_mask)
 
+  @test_util.run_deprecated_v1
   def testWorksWithDimensionsEqualToNoneDuringGraphBuild(self):
     # The rank of the mask tensor must be specified. This is explained
     # in the docstring as well.
-    @def_function.function
-    def func(ph_tensor, ph_mask):
-      return array_ops.boolean_mask(ph_tensor, ph_mask)
+    with self.cached_session() as sess:
+      ph_tensor = array_ops.placeholder(dtypes.int32, shape=None)
+      ph_mask = array_ops.placeholder(dtypes.bool, shape=[None])
 
-    f = func.get_concrete_function(
-        tensor_spec.TensorSpec(None, dtypes.int32),
-        tensor_spec.TensorSpec([None], dtypes.bool))
-    arr = np.array([[1, 2], [3, 4]], np.int32)
-    mask = np.array([False, True])
-    masked_tensor = f(arr, mask)
-    self.assertAllEqual(masked_tensor, arr[mask])
+      arr = np.array([[1, 2], [3, 4]])
+      mask = np.array([False, True])
 
+      masked_tensor = sess.run(
+          array_ops.boolean_mask(ph_tensor, ph_mask),
+          feed_dict={
+              ph_tensor: arr,
+              ph_mask: mask
+          })
+      np.testing.assert_allclose(masked_tensor, arr[mask])
+
+  @test_util.run_deprecated_v1
   def testMaskDimensionsSetToNoneRaises(self):
     # The rank of the mask tensor must be specified. This is explained
     # in the docstring as well.
-    @def_function.function
-    def func(tensor, mask):
-      return array_ops.boolean_mask(tensor, mask)
-
-    with self.assertRaisesRegex(ValueError, "dimensions must be specified"):
-      _ = func.get_concrete_function(
-          tensor_spec.TensorSpec([None, 2], dtypes.int32),
-          tensor_spec.TensorSpec(None, dtypes.bool))
+    with self.cached_session():
+      tensor = array_ops.placeholder(dtypes.int32, shape=[None, 2])
+      mask = array_ops.placeholder(dtypes.bool, shape=None)
+      with self.assertRaisesRegex(ValueError, "dimensions must be specified"):
+        array_ops.boolean_mask(tensor, mask)
 
   def testMaskHasMoreDimsThanTensorRaises(self):
     mask = [[True, True], [False, False]]
     tensor = [1, 2, 3, 4]
     with self.cached_session():
       with self.assertRaisesRegex(ValueError, "incompatible"):
-        self.evaluate(array_ops.boolean_mask(tensor, mask))
+        array_ops.boolean_mask(tensor, mask).eval()
 
   def testMaskIsScalarRaises(self):
     mask = True
     tensor = 1
     with self.cached_session():
       with self.assertRaisesRegex(ValueError, "mask.*scalar"):
-        self.evaluate(array_ops.boolean_mask(tensor, mask))
+        array_ops.boolean_mask(tensor, mask).eval()
 
   def testMaskShapeDifferentThanFirstPartOfTensorShapeRaises(self):
     mask = [True, True, True]
     tensor = [[1, 2], [3, 4]]
     with self.cached_session():
       with self.assertRaisesRegex(ValueError, "incompatible"):
-        self.evaluate(array_ops.boolean_mask(tensor, mask))
+        array_ops.boolean_mask(tensor, mask).eval()
 
+  @test_util.run_deprecated_v1
   def testStringMask(self):
     # Reproduces b/111171330, where the optimized boolean_mask graph would
     # be incorrectly placed on GPU.
-    config.set_optimizer_experimental_options({"shape_optimization": True})
-
-    @def_function.function
-    def func(tile_input):
-      string_tensor = array_ops.tile([["hello"]], tile_input)
-      bool_tensor = array_ops.tile([[True]], tile_input)
+    with ops.Graph().as_default():
+      tile_placeholder = array_ops.placeholder(dtypes.int32, [2])
+      string_tensor = array_ops.tile([["hello"]], tile_placeholder)
+      bool_tensor = array_ops.tile([[True]], tile_placeholder)
       masked_tensor = array_ops.boolean_mask(string_tensor, bool_tensor)
-      return masked_tensor
-
-    result = func([2, 2])
-    self.assertAllEqual([b"hello", b"hello", b"hello", b"hello"], result)
+      config = config_pb2.ConfigProto()
+      config.graph_options.rewrite_options.shape_optimization = 1
+      config.gpu_options.per_process_gpu_memory_fraction = 0.3
+      with session.Session(config=config) as sess:
+        result = sess.run(masked_tensor, feed_dict={tile_placeholder: [2, 2]})
+        self.assertAllEqual([b"hello", b"hello", b"hello", b"hello"], result)
 
   def testMaskWithAxisTensor(self):
 
@@ -336,12 +350,13 @@ class OperatorShapeTest(test_util.TensorFlowTestCase):
 
 class ReverseV2Test(test_util.TensorFlowTestCase):
 
+  @test_util.run_deprecated_v1
   def testReverse0DimAuto(self):
     x_np = 4
     for use_gpu in [False, True]:
       with self.subTest(use_gpu=use_gpu):
         with self.cached_session(use_gpu=use_gpu):
-          x_tf = self.evaluate(array_ops.reverse_v2(x_np, []))
+          x_tf = array_ops.reverse_v2(x_np, []).eval()
           self.assertAllEqual(x_tf, x_np)
 
   def _reverse1DimAuto(self, np_dtype):
@@ -350,10 +365,10 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
     for use_gpu in [False, True]:
       for axis_dtype in [dtypes.int32, dtypes.int64]:
         with self.subTest(use_gpu=use_gpu, axis_dtype=axis_dtype):
-          x_tf = self.evaluate(
-              array_ops.reverse_v2(x_np,
-                                   constant_op.constant([0], dtype=axis_dtype)))
-          self.assertAllEqual(x_tf, np.asarray(x_np)[::-1])
+          with self.cached_session(use_gpu=use_gpu):
+            x_tf = array_ops.reverse_v2(
+                x_np, constant_op.constant([0], dtype=axis_dtype)).eval()
+            self.assertAllEqual(x_tf, np.asarray(x_np)[::-1])
 
   def _reverse2DimAuto(self, np_dtype):
     x_np = np.array([[1, 200, 3], [4, 5, 60]], dtype=np_dtype)
@@ -363,34 +378,38 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
         for axis_dtype in [dtypes.int32, dtypes.int64]:
           with self.subTest(
               reverse_f=reverse_f, use_gpu=use_gpu, axis_dtype=axis_dtype):
-            x_tf_1 = self.evaluate(
-                reverse_f(x_np, constant_op.constant([0], dtype=axis_dtype)))
-            x_tf_2 = self.evaluate(
-                reverse_f(x_np, constant_op.constant([-2], dtype=axis_dtype)))
-            x_tf_3 = self.evaluate(
-                reverse_f(x_np, constant_op.constant([1], dtype=axis_dtype)))
-            x_tf_4 = self.evaluate(
-                reverse_f(x_np, constant_op.constant([-1], dtype=axis_dtype)))
-            x_tf_5 = self.evaluate(
-                reverse_f(x_np, constant_op.constant([1, 0], dtype=axis_dtype)))
-            self.assertAllEqual(x_tf_1, np.asarray(x_np)[::-1, :])
-            self.assertAllEqual(x_tf_2, np.asarray(x_np)[::-1, :])
-            self.assertAllEqual(x_tf_3, np.asarray(x_np)[:, ::-1])
-            self.assertAllEqual(x_tf_4, np.asarray(x_np)[:, ::-1])
-            self.assertAllEqual(x_tf_5, np.asarray(x_np)[::-1, ::-1])
+            with self.cached_session(use_gpu=use_gpu):
+              x_tf_1 = reverse_f(x_np,
+                                 constant_op.constant([0],
+                                                      dtype=axis_dtype)).eval()
+              x_tf_2 = reverse_f(x_np,
+                                 constant_op.constant([-2],
+                                                      dtype=axis_dtype)).eval()
+              x_tf_3 = reverse_f(x_np,
+                                 constant_op.constant([1],
+                                                      dtype=axis_dtype)).eval()
+              x_tf_4 = reverse_f(x_np,
+                                 constant_op.constant([-1],
+                                                      dtype=axis_dtype)).eval()
+              x_tf_5 = reverse_f(x_np,
+                                 constant_op.constant([1, 0],
+                                                      dtype=axis_dtype)).eval()
+              self.assertAllEqual(x_tf_1, np.asarray(x_np)[::-1, :])
+              self.assertAllEqual(x_tf_2, np.asarray(x_np)[::-1, :])
+              self.assertAllEqual(x_tf_3, np.asarray(x_np)[:, ::-1])
+              self.assertAllEqual(x_tf_4, np.asarray(x_np)[:, ::-1])
+              self.assertAllEqual(x_tf_5, np.asarray(x_np)[::-1, ::-1])
 
   # This test covers the axis validation in the shape function
   # (no eval())
+  @test_util.run_deprecated_v1
   def testInvalidAxis(self):
     x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
-    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
-                                "is out of valid range"):
+    with self.assertRaisesRegex(ValueError, "is out of valid range"):
       array_ops.reverse_v2(x_np, [-30])
-    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
-                                "is out of valid range"):
+    with self.assertRaisesRegex(ValueError, "is out of valid range"):
       array_ops.reverse_v2(x_np, [2])
-    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
-                                "axis 0 specified more than once"):
+    with self.assertRaisesRegex(ValueError, "axis 0 specified more than once"):
       array_ops.reverse_v2(x_np, [0, -2])
 
   # This is the version of reverse that uses axis indices rather than
@@ -399,24 +418,24 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
   #
   # Note: this test passes placeholder as constant axis is validated
   # in shape function (see testInvalidAxis)
+  @test_util.run_deprecated_v1
   def testInvalid(self):
     x_np = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
+    axis = array_ops.placeholder(dtypes.int32)
+    with self.cached_session():
+      with self.assertRaisesRegex(errors_impl.InvalidArgumentError,
+                                  "is out of.*range"):
+        array_ops.reverse_v2(x_np, axis).eval(feed_dict={axis: [-30]})
+      with self.assertRaisesRegex(errors_impl.InvalidArgumentError,
+                                  "is out of.*range"):
+        array_ops.reverse_v2(x_np, axis).eval(feed_dict={axis: [2]})
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          "(axis 0 specified more than once|canonicalized axis 0 was repeated.)"
+      ):
+        array_ops.reverse_v2(x_np, axis).eval(feed_dict={axis: [0, -2]})
 
-    @def_function.function
-    def func(ax):
-      return array_ops.reverse_v2(x_np, ax)
-
-    with self.assertRaisesRegex((ValueError, errors_impl.InvalidArgumentError),
-                                "is out of.*range"):
-      func([-30])
-    with self.assertRaisesRegex((ValueError, errors_impl.InvalidArgumentError),
-                                "is out of.*range"):
-      func([2])
-    with self.assertRaisesRegex(
-        (ValueError, errors_impl.InvalidArgumentError),
-        "(axis 0 specified more than once|canonicalized axis 0 was repeated.)"):
-      func([0, -2])
-
+  @test_util.run_deprecated_v1
   def testReverse1DimAuto(self):
     for dtype in [
         np.uint8, np.int8, np.uint16, np.int16, np.int32, np.int64, np.bool,
@@ -425,6 +444,7 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
     ]:
       self._reverse1DimAuto(dtype)
 
+  @test_util.run_deprecated_v1
   def testReverse2DimAuto(self):
     for dtype in [
         np.uint8, np.int8, np.uint16, np.int16, np.int32, np.int64, np.bool,
@@ -433,54 +453,75 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
     ]:
       self._reverse2DimAuto(dtype)
 
+  @test_util.run_deprecated_v1
+  def testUnknownDims(self):
+    reverse_v2 = array_ops.reverse_v2
+    data_t = array_ops.placeholder(dtypes.float32)
+    axis_known_t = array_ops.placeholder(dtypes.int32, shape=[3])
+    reverse_known_t = reverse_v2(data_t, axis_known_t)
+    # Unlike V1 we cannot know this anymore
+    self.assertEqual(None, reverse_known_t.get_shape().ndims)
+
+    axis_unknown_t = array_ops.placeholder(dtypes.int32)
+    reverse_unknown_t = reverse_v2(data_t, axis_unknown_t)
+    self.assertIs(None, reverse_unknown_t.get_shape().ndims)
+
+    data_2d_t = array_ops.placeholder(dtypes.float32, shape=[None, None])
+    axis_2d_t = array_ops.placeholder(dtypes.int32, shape=[3])
+    reverse_2d_t = reverse_v2(data_2d_t, axis_2d_t)
+    self.assertEqual(2, reverse_2d_t.get_shape().ndims)
+
+  @test_util.run_deprecated_v1
   def testReverseRowsOf3Channels(self):
     """Tests optimized code for reversing rows with last dim size = 3."""
-    for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
-      for outer_size in (1, 2):
-        for middle_size in list(range(50)) + [100000]:
-          with self.subTest(
-              reverse_f=reverse_f,
-              outer_size=outer_size,
-              middle_size=middle_size,
-              use_gpu=True):
-            x_np = np.reshape(
-                np.arange(outer_size * middle_size * 3, dtype=np.float32),
-                newshape=(outer_size, middle_size, 3))
-            x_tf = self.evaluate(reverse_f(x_np, [1]))
-            np_answer = x_np[:, ::-1, :]
-            self.assertAllEqual(x_tf, np_answer)
+    with self.session(use_gpu=True):
+      for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
+        for outer_size in (1, 2):
+          for middle_size in list(range(50)) + [100000]:
+            with self.subTest(
+                reverse_f=reverse_f,
+                outer_size=outer_size,
+                middle_size=middle_size):
+              x_np = np.reshape(
+                  np.arange(outer_size * middle_size * 3, dtype=np.float32),
+                  newshape=(outer_size, middle_size, 3))
+              x_tf = reverse_f(x_np, [1]).eval()
+              np_answer = x_np[:, ::-1, :]
+              self.assertAllEqual(x_tf, np_answer)
 
+  @test_util.run_deprecated_v1
   def testReverseRowsOf4Channels(self):
-    for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
-      for outer_size in (1, 2):
-        for middle_size in list(range(50)) + [100000]:
-          with self.subTest(
-              reverse_f=reverse_f,
-              outer_size=outer_size,
-              middle_size=middle_size,
-              use_gpu=True):
-            x_np = np.reshape(
-                np.arange(outer_size * middle_size * 4, dtype=np.float32),
-                newshape=(outer_size, middle_size, 4))
-            x_tf = self.evaluate(reverse_f(x_np, [1]))
-            np_answer = x_np[:, ::-1, :]
-            self.assertAllEqual(x_tf, np_answer)
+    with self.session(use_gpu=True):
+      for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
+        for outer_size in (1, 2):
+          for middle_size in list(range(50)) + [100000]:
+            with self.subTest(
+                reverse_f=reverse_f,
+                outer_size=outer_size,
+                middle_size=middle_size):
+              x_np = np.reshape(
+                  np.arange(outer_size * middle_size * 4, dtype=np.float32),
+                  newshape=(outer_size, middle_size, 4))
+              x_tf = reverse_f(x_np, [1]).eval()
+              np_answer = x_np[:, ::-1, :]
+              self.assertAllEqual(x_tf, np_answer)
 
+  @test_util.run_deprecated_v1
   def testReverseColumnsOf3Channels(self):
-    for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
-      for outer_size in list(range(50)) + [100000]:
-        for middle_size in (1, 2):
-          with self.subTest(
-              reverse_f=reverse_f,
-              outer_size=outer_size,
-              middle_size=middle_size,
-              use_gpu=True):
-            x_np = np.reshape(
-                np.arange(outer_size * middle_size * 3, dtype=np.float32),
-                newshape=(outer_size, middle_size, 3))
-            x_tf = self.evaluate(reverse_f(x_np, [0]))
-            np_answer = x_np[::-1, :, :]
-            self.assertAllEqual(x_tf, np_answer)
+    with self.session(use_gpu=True):
+      for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
+        for outer_size in list(range(50)) + [100000]:
+          for middle_size in (1, 2):
+            with self.subTest(
+                reverse_f=reverse_f,
+                outer_size=outer_size,
+                middle_size=middle_size):
+              x_np = np.reshape(
+                  np.arange(outer_size * middle_size * 3, dtype=np.float32),
+                  newshape=(outer_size, middle_size, 3))
+              x_tf = reverse_f(x_np, [0]).eval()
+              np_answer = x_np[::-1, :, :]
+              self.assertAllEqual(x_tf, np_answer)
 
   def testReverseInvalidShape(self):
     x = np.ndarray(shape=[0, 1, 1])
@@ -507,11 +548,12 @@ class MeshgridTest(test_util.TensorFlowTestCase):
           x += 1j
         inputs.append(x)
       numpy_out = np.meshgrid(*inputs, indexing=index)
-      with test_util.device(use_gpu=use_gpu):
+      with self.cached_session(use_gpu=use_gpu):
         tf_out = array_ops.meshgrid(*inputs, indexing=index)
         for x_np, x_tf in zip(numpy_out, tf_out):
           self.assertAllEqual(x_np, x_tf)
 
+  @test_util.run_deprecated_v1
   def testCompare(self):
     for t in (np.float16, np.float32, np.float64, np.int32, np.int64,
               np.complex64, np.complex128):
@@ -550,15 +592,15 @@ class StridedSliceChecker(object):
 
     def eval_if_tensor(x):
       try:
-        return self.test.evaluate(x)
-      except (AttributeError, TypeError, ValueError):
+        return x.eval()
+      except AttributeError:
         return x
 
     if isinstance(spec, bool) or \
       (isinstance(spec, ops.Tensor) and spec.dtype == dtypes.bool) or \
       (isinstance(spec, np.ndarray) and spec.dtype == bool) or \
       (isinstance(spec, (list, tuple)) and np.asarray(spec).dtype == bool):
-      tensor = self.test.evaluate(op)
+      tensor = op.eval()
       np_spec = eval_if_tensor(spec)
       self.test.assertAllEqual(self.x_np[np_spec], tensor)
       return tensor
@@ -566,7 +608,7 @@ class StridedSliceChecker(object):
     if not isinstance(spec, (list, tuple)):
       spec = [spec]
 
-    tensor = self.test.evaluate(op)
+    tensor = op.eval()
 
     # Make a numpy spec that pre-evals the tensors
     np_specs = []
@@ -595,32 +637,34 @@ STRIDED_SLICE_TYPES = [
 class StridedSliceTest(test_util.TensorFlowTestCase):
   """Test the strided slice operation with variants of slices."""
 
+  @test_util.run_deprecated_v1
   def test_basic_slice(self):
     for tensor_type in STRIDED_SLICE_TYPES:
-      with self.subTest(tensor_type=tensor_type, use_gpu=True):
-        checker = StridedSliceChecker(
-            self, StridedSliceChecker.REF_TENSOR, tensor_type=tensor_type)
-        _ = checker[:, :, :]
-        # Various ways of representing identity slice
-        _ = checker[:, :, :]
-        _ = checker[::, ::, ::]
-        _ = checker[::1, ::1, ::1]
-        # Not zero slice
-        _ = checker[::1, ::5, ::2]
-        # Reverse in each dimension independently
-        _ = checker[::-1, :, :]
-        _ = checker[:, ::-1, :]
-        _ = checker[:, :, ::-1]
-        ## negative index tests i.e. n-2 in first component
-        _ = checker[-2::-1, :, ::1]
-        # negative index tests i.e. n-2 in first component, non-unit stride
-        _ = checker[-2::-1, :, ::2]
+      with self.subTest(tensor_type=tensor_type):
+        with self.cached_session(use_gpu=True):
+          checker = StridedSliceChecker(
+              self, StridedSliceChecker.REF_TENSOR, tensor_type=tensor_type)
+          _ = checker[:, :, :]
+          # Various ways of representing identity slice
+          _ = checker[:, :, :]
+          _ = checker[::, ::, ::]
+          _ = checker[::1, ::1, ::1]
+          # Not zero slice
+          _ = checker[::1, ::5, ::2]
+          # Reverse in each dimension independently
+          _ = checker[::-1, :, :]
+          _ = checker[:, ::-1, :]
+          _ = checker[:, :, ::-1]
+          ## negative index tests i.e. n-2 in first component
+          _ = checker[-2::-1, :, ::1]
+          # negative index tests i.e. n-2 in first component, non-unit stride
+          _ = checker[-2::-1, :, ::2]
 
-        # Check rank-0 examples
-        checker2 = StridedSliceChecker(self, 5, tensor_type=tensor_type)
-        _ = checker2[None]
-        _ = checker2[...]
-        _ = checker2[tuple()]
+          # Check rank-0 examples
+          checker2 = StridedSliceChecker(self, 5, tensor_type=tensor_type)
+          _ = checker2[None]
+          _ = checker2[...]
+          _ = checker2[tuple()]
 
   def testInt64GPU(self):
     if not test_util.is_gpu_available():
@@ -650,8 +694,9 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       v = variables.Variable([1., 2.])
       v[0]  # pylint: disable=pointless-statement
 
+  @test_util.run_deprecated_v1
   def testDegenerateSlices(self):
-    with test_util.device(use_gpu=True):
+    with self.session(use_gpu=True):
       checker = StridedSliceChecker(self, StridedSliceChecker.REF_TENSOR)
       # degenerate by offering a forward interval with a negative stride
       _ = checker[0:-1:-1, :, :]
@@ -664,13 +709,15 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
                                     StridedSliceChecker.REF_TENSOR_ALIGNED)
       _ = checker[1:0]
 
+  @test_util.run_deprecated_v1
   def testSliceWithUndefinedDimension(self):
     t = constant_op.constant([1, 2, 3])
     d = tensor_shape.Dimension(None)
     self.assertAllEqual(t[d:d:d], t)
 
+  @test_util.run_deprecated_v1
   def testEllipsis(self):
-    with test_util.device(use_gpu=True):
+    with self.session(use_gpu=True):
       raw = [[[[[1, 2], [3, 4], [5, 6]]], [[[7, 8], [9, 10], [11, 12]]]]]
       checker = StridedSliceChecker(self, raw)
 
@@ -686,12 +733,12 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       # ellipsis at middle
       _ = checker[0:1, ..., 0:1]
       # multiple ellipses not allowed
-      with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
-                                  "Multiple ellipses"):
+      with self.assertRaisesRegex(ValueError, "Multiple ellipses"):
         _ = checker[..., :, ...].eval()
 
+  @test_util.run_deprecated_v1
   def testShrink(self):
-    with test_util.device(use_gpu=True):
+    with self.session(use_gpu=True):
       raw = [[[[[1, 2, 4, 5], [5, 6, 7, 8], [9, 10, 11, 12]]],
               [[[13, 14, 15, 16], [17, 18, 19, 20], [21, 22, 23, 24]]]]]
       checker = StridedSliceChecker(self, raw)
@@ -700,21 +747,17 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       _ = checker[:, 0]
       _ = checker[:, :, 0]
 
+  @test_util.run_deprecated_v1
   def testBothNewAxisAndShrink(self):
-    with test_util.device(use_gpu=True):
-
-      @def_function.function
-      def func(inp):
-        return inp[array_ops.newaxis, :, 0]
-
-      f = func.get_concrete_function(
-          tensor_spec.TensorSpec([2, 2], dtypes.int16))
-
-      ones = constant_op.constant([[1, 1], [1, 1]], dtypes.int16)
-      self.assertAllEqual([[1, 1]], self.evaluate(f(ones)))
+    with self.session(use_gpu=True):
+      ones = array_ops.placeholder(shape=[2, 2], dtype=dtypes.int16)
+      self.assertAllEqual(
+          ones[array_ops.newaxis, :,
+               0].eval(feed_dict={ones: [[1, 1], [1, 1]]}), [[1, 1]])
 
+  @test_util.run_deprecated_v1
   def testTensorIndexing(self):
-    with test_util.device(use_gpu=True):
+    with self.session(use_gpu=True):
       raw = [[[[[1, 2, 4, 5], [5, 6, 7, 8], [9, 10, 11, 12]]],
               [[[13, 14, 15, 16], [17, 18, 19, 20], [21, 22, 23, 24]]]]]
       checker = StridedSliceChecker(self, raw, check_type_infer=False)
@@ -742,8 +785,9 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       with self.assertRaisesRegex(TypeError, expected):
         _ = checker[[2.1, -0.7, 1.5]]
 
+  @test_util.run_deprecated_v1
   def testExpand(self):
-    with test_util.device(use_gpu=True):
+    with self.session(use_gpu=True):
       raw = [[[[[1, 2, 4, 5], [5, 6, 7, 8], [9, 10, 11, 12]]],
               [[[13, 14, 15, 16], [17, 18, 19, 20], [21, 22, 23, 24]]]]]
       checker = StridedSliceChecker(self, raw)
@@ -759,16 +803,18 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       # Ellipsis in middle of two newaxis
       _ = checker[np.newaxis, ..., np.newaxis]
 
+  @test_util.run_deprecated_v1
   def testExpandVariable(self):
-    with test_util.device(use_gpu=True):
+    with self.session(use_gpu=True):
       x = variables.Variable(7, dtype=dtypes.int32)
       self.evaluate(x.initializer)
-      y = self.evaluate(x[None])
+      y = x[None].eval()
       self.assertEqual(y.shape, (1,))
       self.assertAllEqual(y, (7,))
 
+  @test_util.run_deprecated_v1
   def testOptimizedCases(self):
-    with test_util.device(use_gpu=True):
+    with self.session(use_gpu=True):
       checker = StridedSliceChecker(self,
                                     StridedSliceChecker.REF_TENSOR_ALIGNED)
       # Identity
@@ -782,8 +828,9 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       # First axis slice
       _ = checker[np.newaxis, 1:]
 
+  @test_util.run_v1_only("currently failing on v2")
   def testMasks(self):
-    with test_util.device(use_gpu=True):
+    with self.session(use_gpu=True):
       scalar = np.array(0)
       # Test tensor type mask
       checker = StridedSliceChecker(self, StridedSliceChecker.REF_TENSOR)
@@ -808,225 +855,134 @@ class StridedSliceTest(test_util.TensorFlowTestCase):
       _ = checker2[ops.convert_to_tensor(mask)]
 
 
+class StridedSliceShapeChecker(object):
+
+  def __init__(self, x):
+    self.x = x
+
+  def __getitem__(self, spec):
+    op = self.x.__getitem__(spec)
+    return op.get_shape()
+
+
 class StridedSliceShapeTest(test_util.TensorFlowTestCase):
   """Test the shape inference of StridedSliceShapes."""
 
+  @test_util.run_deprecated_v1
   def testUnknown(self):
-    with test_util.device(use_gpu=True):
-
-      @def_function.function
-      def f(x):
-        y = x[...]
-        self.assertAllEqual(y.get_shape().ndims, None)
-
-      _ = f.get_concrete_function(tensor_spec.TensorSpec(None, dtypes.float32))
+    with self.session(use_gpu=True):
+      uncertain_tensor = array_ops.placeholder(dtypes.float32)
+      a = StridedSliceShapeChecker(uncertain_tensor)
+      a_slice_shape = a[...]
+      self.assertAllEqual(a_slice_shape.ndims, None)
 
   def tensorShapeEqual(self, x, y):
     self.assertTrue(x is not None and y is not None or x is None and y is None)
     self.assertEqual(x.as_list(), y.as_list())
 
+  @test_util.run_deprecated_v1
   def testTensorShapeUncertain(self):
-    with test_util.device(use_gpu=True):
-
-      @def_function.function
-      def f1(x):
-        y = x[3:5]
-        self.tensorShapeEqual(y.get_shape(),
-                              tensor_shape.TensorShape([2, None, 7]))
-
-      _ = f1.get_concrete_function(
-          tensor_spec.TensorSpec((5, None, 7), dtypes.float32))
-
-      @def_function.function
-      def f2(x):
-        y = x[3:5, :, 4]
-        self.tensorShapeEqual(y.get_shape(), tensor_shape.TensorShape([2,
-                                                                       None]))
-
-      _ = f2.get_concrete_function(
-          tensor_spec.TensorSpec((5, None, 7), dtypes.float32))
-
-      @def_function.function
-      def f3(x):
-        y = x[3:5, 3:4, 4]
-        self.tensorShapeEqual(y.get_shape(), tensor_shape.TensorShape([2,
-                                                                       None]))
-
-      _ = f3.get_concrete_function(
-          tensor_spec.TensorSpec((5, None, 7), dtypes.float32))
-
-      @def_function.function
-      def f4(x):
-        y = x[3:5, :, 5:10]
-        self.tensorShapeEqual(y.get_shape(),
-                              tensor_shape.TensorShape([2, None, 2]))
-
-      _ = f4.get_concrete_function(
-          tensor_spec.TensorSpec((5, None, 7), dtypes.float32))
-
-      @def_function.function
-      def f5(x):
-        y = x[3:5, :, 50:3]
-        self.tensorShapeEqual(y.get_shape(),
-                              tensor_shape.TensorShape([2, None, 0]))
-
-      _ = f5.get_concrete_function(
-          tensor_spec.TensorSpec((5, None, 7), dtypes.float32))
-
-      @def_function.function
-      def f6(x):
-        y = x[3:5, :, array_ops.newaxis, 50:3,]
-        self.tensorShapeEqual(y.get_shape(),
-                              tensor_shape.TensorShape([2, None, 1, 0]))
-
-      _ = f6.get_concrete_function(
-          tensor_spec.TensorSpec((5, None, 7), dtypes.float32))
-
-      @def_function.function
-      def f7(x):
-        y = x[1:5:2, :, array_ops.newaxis, 50:3,]
-        self.tensorShapeEqual(y.get_shape(),
-                              tensor_shape.TensorShape([2, None, 1, 0]))
-
-      _ = f7.get_concrete_function(
-          tensor_spec.TensorSpec((5, None, 7), dtypes.float32))
-
-      @def_function.function
-      def f8(x):
-        y = x[:5:3, :, array_ops.newaxis, 50:3,]
-        self.tensorShapeEqual(y.get_shape(),
-                              tensor_shape.TensorShape([2, None, 1, 0]))
-
-      _ = f8.get_concrete_function(
-          tensor_spec.TensorSpec((5, None, 7), dtypes.float32))
-
-      @def_function.function
-      def f9(x):
-        y = x[:2:3, :, array_ops.newaxis, 50:3,]
-        self.tensorShapeEqual(y.get_shape(),
-                              tensor_shape.TensorShape([1, None, 1, 0]))
-
-      _ = f9.get_concrete_function(
-          tensor_spec.TensorSpec((5, None, 7), dtypes.float32))
-
-      @def_function.function
-      def f10(x):
-        y = x[::-1, :, array_ops.newaxis, ::-2]
-        self.tensorShapeEqual(y.get_shape(),
-                              tensor_shape.TensorShape([5, None, 1, 4]))
-
-      _ = f10.get_concrete_function(
-          tensor_spec.TensorSpec((5, None, 7), dtypes.float32))
+    with self.session(use_gpu=True):
+      uncertain_tensor = array_ops.placeholder(
+          dtypes.float32, shape=(5, None, 7))
+      a = StridedSliceShapeChecker(uncertain_tensor)
+      self.tensorShapeEqual(a[3:5], tensor_shape.TensorShape([2, None, 7]))
+      self.tensorShapeEqual(a[3:5, :, 4], tensor_shape.TensorShape([2, None]))
+      self.tensorShapeEqual(a[3:5, 3:4, 4], tensor_shape.TensorShape([2, None]))
+      self.tensorShapeEqual(a[3:5, :, 5:10],
+                            tensor_shape.TensorShape([2, None, 2]))
+      self.tensorShapeEqual(a[3:5, :, 50:3],
+                            tensor_shape.TensorShape([2, None, 0]))
+      self.tensorShapeEqual(a[3:5, :, array_ops.newaxis, 50:3,],
+                            tensor_shape.TensorShape([2, None, 1, 0]))
+      self.tensorShapeEqual(a[1:5:2, :, array_ops.newaxis, 50:3,],
+                            tensor_shape.TensorShape([2, None, 1, 0]))
+      self.tensorShapeEqual(a[:5:3, :, array_ops.newaxis, 50:3,],
+                            tensor_shape.TensorShape([2, None, 1, 0]))
+      self.tensorShapeEqual(a[:2:3, :, array_ops.newaxis, 50:3,],
+                            tensor_shape.TensorShape([1, None, 1, 0]))
+      self.tensorShapeEqual(a[::-1, :, array_ops.newaxis, ::-2],
+                            tensor_shape.TensorShape([5, None, 1, 4]))
 
+  @test_util.run_deprecated_v1
   def testTensorValuedIndexShape(self):
     with self.session(use_gpu=True):
-
-      @def_function.function
-      def f1(x, y):
-        z = x[y]
-        self.tensorShapeEqual(z.get_shape(), tensor_shape.TensorShape([3, 7]))
-
-      _ = f1.get_concrete_function(
-          tensor_spec.TensorSpec((5, 3, 7)),
-          tensor_spec.TensorSpec((), dtypes.int32))
-
-      @def_function.function
-      def f2(x, y):
-        z = x[y, ::-1]
-        self.tensorShapeEqual(z.get_shape(), tensor_shape.TensorShape([3, 7]))
-
-      _ = f2.get_concrete_function(
-          tensor_spec.TensorSpec((5, 3, 7)),
-          tensor_spec.TensorSpec((), dtypes.int32))
-
-      @def_function.function
-      def f3(x, y):
-        z = x[y, ::-2]
-        self.tensorShapeEqual(z.get_shape(), tensor_shape.TensorShape([2, 7]))
-
-      _ = f3.get_concrete_function(
-          tensor_spec.TensorSpec((5, 3, 7)),
-          tensor_spec.TensorSpec((), dtypes.int32))
-
-      @def_function.function
-      def f4(x, y, s):
-        z = x[y, s:2]
-        self.tensorShapeEqual(z.get_shape(), tensor_shape.TensorShape([None,
-                                                                       7]))
-
-      _ = f4.get_concrete_function(
-          tensor_spec.TensorSpec((5, 3, 7)),
-          tensor_spec.TensorSpec((), dtypes.int32),
-          tensor_spec.TensorSpec((), dtypes.int32))
+      defined_shape_tensor = array_ops.placeholder(
+          dtypes.float32, shape=(5, 3, 7))
+      index_value = array_ops.placeholder(dtypes.int32, shape=())
+      a = StridedSliceShapeChecker(defined_shape_tensor)
+      self.tensorShapeEqual(a[index_value], tensor_shape.TensorShape([3, 7]))
+      self.tensorShapeEqual(a[index_value, ::-1],
+                            tensor_shape.TensorShape([3, 7]))
+      self.tensorShapeEqual(a[index_value, ::-2],
+                            tensor_shape.TensorShape([2, 7]))
+      other_scalar = array_ops.placeholder(dtypes.int32, shape=())
+      self.tensorShapeEqual(a[index_value, other_scalar:2],
+                            tensor_shape.TensorShape([None, 7]))
 
 
 class GradSliceChecker(object):
   """Tests that we can compute a gradient for var^2."""
 
-  def __init__(self, test, var, varnp, use_tape):
+  def __init__(self, test, sess, var, varnp):
     self.test = test
+    self.sess = sess
+    self.val = var * var
     self.var = var
     self.varnp = varnp
-    self.use_tape = use_tape
 
   def __getitem__(self, spec):
-    with test_util.AbstractGradientTape(
-        use_tape=self.use_tape, persistent=True) as tape:
-      tape.watch(self.var)
-      val = self.var * self.var
-      slice_var = self.var[spec]
-      slice_val = val[spec]
+    slice_var = self.var[spec]
+    slice_val = self.val[spec]
 
-      # compute analytic 2nd derivative
-      analytic_grad2 = 2 * slice_val
+    # compute analytic 2nd derivative
+    analytic_grad2 = 2 * slice_val
 
-      dy = variables.Variable(
-          array_ops.ones_like(slice_var, dtype=dtypes.float32))
-      assign = dy.assign(slice_var)
-
-      slice_val_grad = tape.gradient(slice_val, self.var, [dy])
-      slice_val_grad2 = tape.gradient(slice_val_grad, dy, [self.var])
-    self.test.evaluate(assign)
+    dy = variables.Variable(
+        array_ops.ones_like(slice_var, dtype=dtypes.float32))
+    assign = dy.assign(slice_var)
+    slice_val_grad, = gradients_impl.gradients(slice_val, self.var, grad_ys=dy)
+    slice_val_grad2, = gradients_impl.gradients(
+        slice_val_grad, dy, grad_ys=self.var)
+    self.sess.run(assign)
     slice_val_grad_evaled, slice_val_grad2_evaled = (
-        self.test.evaluate([slice_val_grad, slice_val_grad2]))
-    analytic_grad2_evaled = self.test.evaluate(analytic_grad2)
+        self.sess.run([slice_val_grad, slice_val_grad2]))
+    analytic_grad2_evaled = analytic_grad2.eval()
     self.test.assertAllEqual(slice_val_grad2_evaled, analytic_grad2_evaled)
 
     # compute analytic gradient for slice
     np_val_grad = (2 * self.varnp * self.varnp)
     np_sliceval_grad = np.zeros(self.var.get_shape())
     if isinstance(spec, ops.Tensor):
-      spec = self.test.evaluate([spec])
+      spec = self.sess.run([spec])
     np_sliceval_grad[spec] = np_val_grad[spec]
     # verify gradient
     self.test.assertAllEqual(slice_val_grad_evaled, np_sliceval_grad)
 
 
-class StridedSliceGradTest(test_util.TensorFlowTestCase,
-                           parameterized.TestCase):
+class StridedSliceGradTest(test_util.TensorFlowTestCase):
   """Test that strided slice's custom gradient produces correct gradients."""
 
-  @parameterized.parameters(set((True, context.executing_eagerly())))
-  def testGradient(self, use_tape):
-    with test_util.device(use_gpu=True):
+  @test_util.run_v1_only("b/120545219")
+  def testGradient(self):
+    with self.session(use_gpu=True) as sess:
       var = variables.Variable(
           array_ops.reshape(
               math_ops.range(1, 97, 1, dtype=dtypes.float32), shape=(6, 4, 4)))
-      self.evaluate(var.initializer)
+      init = variables.global_variables_initializer()
+      sess.run(init)
 
       raw = np.array(range(1, 97, 1)).reshape((6, 4, 4))
-      grad = GradSliceChecker(self, var, raw, use_tape)
+      grad = GradSliceChecker(self, sess, var, raw)
       _ = grad[2:6:2, 1:3, 1:3]
       _ = grad[3:0:-2, 1:3, 1:3]
       _ = grad[3:0:-2, array_ops.newaxis, 1:3, 2, array_ops.newaxis]
       _ = grad[3:0:-2, 1:3, 2]
       _ = grad[:, -1, :]
       _ = grad[:, -2, :]
-      with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
-                                  "out of bounds"):
+      with self.assertRaisesRegex(ValueError, "out of bounds"):
         _ = grad[:, -200, :]
-      with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
-                                  "out of bounds"):
+      with self.assertRaisesRegex(ValueError, "out of bounds"):
         _ = grad[:, 200, :]
 
       # Test numpy array type mask
@@ -1034,64 +990,75 @@ class StridedSliceGradTest(test_util.TensorFlowTestCase,
       # Test tensor type mask
       _ = grad[ops.convert_to_tensor(raw) <= 76]
 
-  @parameterized.parameters(set((True, context.executing_eagerly())))
-  def testGradientZero(self, use_tape):
-    with test_util.device(use_gpu=True):
+  @test_util.run_v1_only("b/120545219")
+  def testGradientZero(self):
+    with self.session(use_gpu=True) as sess:
       var = variables.Variable(8.)
-      self.evaluate(var.initializer)
-      grad = GradSliceChecker(self, var, np.array(8), use_tape)
+      init = variables.global_variables_initializer()
+      sess.run(init)
+      grad = GradSliceChecker(self, sess, var, np.array(8))
       _ = grad[tuple()]
 
-  @parameterized.parameters(set((True, context.executing_eagerly())))
-  def testInt64Indices(self, use_tape):
-    with test_util.AbstractGradientTape(use_tape=use_tape) as tape:
+  @test_util.run_deprecated_v1
+  def testInt64Indices(self):
+    with self.session(use_gpu=True) as sess:
       a = math_ops.range(3, dtype=dtypes.float32)
-      tape.watch(a)
       index = constant_op.constant(1, dtype=dtypes.int64)
       b = 2. * a[index]
-    grad = tape.gradient(b, a)
-    self.assertAllEqual(self.evaluate(grad), [0., 2., 0.])
+      grad, = gradients_impl.gradients(b, a)
+      self.assertAllEqual(self.evaluate(grad), [0., 2., 0.])
 
 
 class StridedSliceGradTypeTest(test_util.TensorFlowTestCase):
   """Test varied index types and host located memory."""
 
+  @test_util.run_deprecated_v1
   def testHostVsDevice(self):
-    var2 = variables.Variable(
-        array_ops.reshape(
-            math_ops.cast(math_ops.range(1, 5, 1), dtypes.float32),
-            shape=(4, 1, 1)))
-    varshape = variables.Variable([6, 4, 4], dtype=dtypes.int32)
-    begin = constant_op.constant([0, 0, 0])
-    end = constant_op.constant([4, 1, 1])
-    strides = constant_op.constant([1, 1, 1])
-    foo = array_ops.strided_slice_grad(varshape, begin, end, strides, var2)
-    self.evaluate(var2.initializer)
-    self.evaluate(varshape.initializer)
-    self.evaluate(foo)
+    with self.session(use_gpu=True) as sess:
+      var2 = variables.Variable(
+          array_ops.reshape(
+              math_ops.cast(math_ops.range(1, 5, 1), dtypes.float32),
+              shape=(4, 1, 1)))
+      varshape = variables.Variable([6, 4, 4], dtype=dtypes.int32)
+      self.evaluate(variables.global_variables_initializer())
+      begin = constant_op.constant([0, 0, 0])
+      end = constant_op.constant([4, 1, 1])
+      strides = constant_op.constant([1, 1, 1])
+      foo = array_ops.strided_slice_grad(varshape, begin, end, strides, var2)
+      sess.run(foo)
 
+  @test_util.run_deprecated_v1
   def testInt64Shape(self):
-    original_dy = array_ops.reshape(
-        math_ops.cast(math_ops.range(1, 5, 1), dtypes.float32), shape=(4, 1, 1))
-    original_shape = constant_op.constant([6, 4, 4], dtype=dtypes.int64)
-    begin = constant_op.constant([0, 0, 0], dtype=dtypes.int64)
-    end = constant_op.constant([4, 1, 1], dtype=dtypes.int64)
-    strides = constant_op.constant([1, 1, 1], dtype=dtypes.int64)
-    dx = array_ops.strided_slice_grad(original_shape, begin, end, strides,
-                                      original_dy)
-    self.evaluate(dx)
-
-  def testMixedIndexTypes(self):
-    original_dy = array_ops.reshape(
-        math_ops.cast(math_ops.range(1, 5, 1), dtypes.float32), shape=(4, 1, 1))
-    original_shape = constant_op.constant([6, 4, 4], dtype=dtypes.int64)
-    begin = constant_op.constant([0, 0, 0], dtype=dtypes.int32)
-    end = constant_op.constant([4, 1, 1], dtype=dtypes.int64)
-    strides = constant_op.constant([1, 1, 1], dtype=dtypes.int64)
-    with self.assertRaises((TypeError, errors_impl.InvalidArgumentError)):
+    with self.session(use_gpu=True) as sess:
+      original_dy = array_ops.reshape(
+          math_ops.cast(math_ops.range(1, 5, 1), dtypes.float32),
+          shape=(4, 1, 1))
+      original_shape = constant_op.constant([6, 4, 4], dtype=dtypes.int64)
+      self.evaluate(variables.global_variables_initializer())
+      begin = constant_op.constant([0, 0, 0], dtype=dtypes.int64)
+      end = constant_op.constant([4, 1, 1], dtype=dtypes.int64)
+      strides = constant_op.constant([1, 1, 1], dtype=dtypes.int64)
       dx = array_ops.strided_slice_grad(original_shape, begin, end, strides,
                                         original_dy)
-      self.evaluate(dx)
+      sess.run(dx)
+
+  @test_util.run_deprecated_v1
+  def testMixedIndexTypes(self):
+    with self.session(use_gpu=True) as sess:
+      original_dy = array_ops.reshape(
+          math_ops.cast(math_ops.range(1, 5, 1), dtypes.float32),
+          shape=(4, 1, 1))
+      original_shape = constant_op.constant([6, 4, 4], dtype=dtypes.int64)
+      self.evaluate(variables.global_variables_initializer())
+      begin = constant_op.constant([0, 0, 0], dtype=dtypes.int32)
+      end = constant_op.constant([4, 1, 1], dtype=dtypes.int64)
+      strides = constant_op.constant([1, 1, 1], dtype=dtypes.int64)
+      with self.assertRaisesRegex(
+          TypeError, "Input 'begin' of 'StridedSliceGrad' Op has type int32"
+          " that does not match type int64 of argument 'shape'"):
+        dx = array_ops.strided_slice_grad(original_shape, begin, end, strides,
+                                          original_dy)
+        sess.run(dx)
 
 
 class BenchmarkSlice(object):
@@ -1166,16 +1133,16 @@ class StridedSliceAssignChecker(object):
     if self.tensor_type.is_complex:
       value -= 1j * value
 
-    with test_util.device(use_gpu=True):
+    with self.test.test_session(use_gpu=True) as sess:
       if self._use_resource:
         var = resource_variable_ops.ResourceVariable(self.x)
       else:
         var = variables.Variable(self.x)
-      self.test.evaluate(var.initializer)
-      val = self.test.evaluate(var[index].assign(value))
+      sess.run(variables.variables_initializer([var]))
+      val = sess.run(var[index].assign(value))
       # val_copy is used to check that tf.compat.v1.assign works equivalently
       # to the assign method above.
-      val_copy = self.test.evaluate(state_ops.assign(var[index], value))
+      val_copy = sess.run(state_ops.assign(var[index], value))
       valnp = np.copy(self.x_np)
       valnp[index] = np.array(value)
       self.test.assertAllEqual(val, valnp)
@@ -1220,34 +1187,48 @@ class SliceAssignTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     checker2[...] = 6  # ellipsis
     checker2[None] = [6]  # new axis
 
+  @test_util.run_deprecated_v1
   @test_util.disable_xla("b/123559667")
   def testSliceAssign(self):
     self.doTestSliceAssign(use_resource=False)
 
+  @test_util.run_deprecated_v1
   @test_util.disable_xla("b/123559667")
   def testSliceAssignResource(self):
     self.doTestSliceAssign(use_resource=True)
 
+  @test_util.run_v1_only("b/120545219")
+  def testUninitialized(self):
+    with self.assertRaisesRegex(
+        errors.FailedPreconditionError,
+        "Attempting to use uninitialized value Variable"):
+      with self.cached_session() as sess:
+        v = variables.VariableV1([1, 2])
+        sess.run(v[:].assign([1, 2]))
+
+  @test_util.run_v1_only("b/120545219")
   def testTypeError(self):
     init_val = constant_op.constant([1, 2], dtype=dtypes.int32)
     too_small_val = constant_op.constant([3, 4], dtype=dtypes.int8)
     too_large_val = constant_op.constant([3, 4], dtype=dtypes.int64)
     v = variables.VariableV1(init_val)
-    with self.assertRaises((ValueError, TypeError)):
-      self.evaluate(v[:].assign(too_small_val))
-    with self.assertRaises((ValueError, TypeError)):
-      self.evaluate(v[:].assign(too_large_val))
+    with self.assertRaises(TypeError):
+      v[:].assign(too_small_val)
+    with self.assertRaises(TypeError):
+      v[:].assign(too_large_val)
 
+  @test_util.run_deprecated_v1
   def testTypeErrorResource(self):
     init_val = constant_op.constant([1, 2], dtype=dtypes.int32)
     too_small_val = constant_op.constant([3, 4], dtype=dtypes.int8)
     too_large_val = constant_op.constant([3, 4], dtype=dtypes.int64)
     v = resource_variable_ops.ResourceVariable(init_val)
-    self.evaluate(v.initializer)
-    with self.assertRaises(ValueError):
-      self.evaluate(v[:].assign(too_large_val))
-    with self.assertRaises(ValueError):
-      self.evaluate(v[:].assign(too_small_val))
+    with self.cached_session() as sess:
+      self.evaluate(v.initializer)
+      with self.assertRaises(ValueError):
+        sess.run(v[:].assign(too_large_val))
+      with self.assertRaises(ValueError):
+        sess.run(v[:].assign(too_small_val))
 
   @test_util.disable_xla("b/123559667")
   @test_util.run_in_graph_and_eager_modes
@@ -1345,46 +1326,61 @@ class SequenceMaskTest(test_util.TensorFlowTestCase):
       with self.assertRaisesRegex(ValueError, "maxlen must be scalar"):
         array_ops.sequence_mask([10, 20], [10, 20])
 
+  @test_util.run_deprecated_v1
   def testOneDimensionalWithMaxlen(self):
-    res = array_ops.sequence_mask(constant_op.constant([1, 3, 2]), 5)
-    self.assertAllEqual(res.get_shape(), [3, 5])
-    self.assertAllEqual(
-        res,
-        [[True, False, False, False, False], [True, True, True, False, False],
-         [True, True, False, False, False]])
+    with self.cached_session():
+      res = array_ops.sequence_mask(constant_op.constant([1, 3, 2]), 5)
+      self.assertAllEqual(res.get_shape(), [3, 5])
+      self.assertAllEqual(
+          res,
+          [[True, False, False, False, False], [True, True, True, False, False],
+           [True, True, False, False, False]])
 
+  @test_util.run_deprecated_v1
   def testOneDimensionalDtypeWithoutMaxlen(self):
-    # test dtype and default maxlen:
-    res = array_ops.sequence_mask(
-        constant_op.constant([0, 1, 4]), dtype=dtypes.float32)
-    self.assertAllEqual(res.get_shape().as_list(), [3, 4])
-    self.assertAllEqual(
-        res, [[0.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0]])
+    with self.cached_session():
+      # test dtype and default maxlen:
+      res = array_ops.sequence_mask(
+          constant_op.constant([0, 1, 4]), dtype=dtypes.float32)
+      self.assertAllEqual(res.get_shape().as_list(), [3, 4])
+      self.assertAllEqual(
+          res,
+          [[0.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0]])
 
+  @test_util.run_deprecated_v1
   def testOneDimensionalWithoutMaxlen(self):
-    res = array_ops.sequence_mask(constant_op.constant([0, 1, 4]))
-    self.assertAllEqual(res.get_shape().as_list(), [3, 4])
-    self.assertAllEqual(res,
-                        [[False, False, False, False],
-                         [True, False, False, False], [True, True, True, True]])
+    with self.cached_session():
+      res = array_ops.sequence_mask(constant_op.constant([0, 1, 4]))
+      self.assertAllEqual(res.get_shape().as_list(), [3, 4])
+      self.assertAllEqual(
+          res, [[False, False, False, False], [True, False, False, False],
+                [True, True, True, True]])
 
+  @test_util.run_deprecated_v1
   def testTwoDimensional(self):
-    res = array_ops.sequence_mask(constant_op.constant([[1, 3, 2]]), 5)
-    self.assertAllEqual(res.get_shape(), [1, 3, 5])
-    self.assertAllEqual(
-        res,
-        [[[True, False, False, False, False], [True, True, True, False, False],
-          [True, True, False, False, False]]])
+    with self.cached_session():
+      res = array_ops.sequence_mask(constant_op.constant([[1, 3, 2]]), 5)
+      self.assertAllEqual(res.get_shape(), [1, 3, 5])
+      self.assertAllEqual(res, [[[True, False, False, False, False],
+                                 [True, True, True, False, False],
+                                 [True, True, False, False, False]]])
 
-    # test dtype and default maxlen:
-    res = array_ops.sequence_mask(
-        constant_op.constant([[0, 1, 4], [1, 2, 3]]), dtype=dtypes.float32)
-    self.assertAllEqual(res.get_shape().as_list(), [2, 3, 4])
-    self.assertAllEqual(
-        res,
-        [[[0.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0]],
-         [[1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 0.0, 0.0], [1.0, 1.0, 1.0, 0.0]]])
+      # test dtype and default maxlen:
+      res = array_ops.sequence_mask(
+          constant_op.constant([[0, 1, 4], [1, 2, 3]]), dtype=dtypes.float32)
+      self.assertAllEqual(res.get_shape().as_list(), [2, 3, 4])
+      self.assertAllEqual(
+          res,
+          [[[0.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 1.0, 1.0]],
+           [[1.0, 0.0, 0.0, 0.0], [1.0, 1.0, 0.0, 0.0], [1.0, 1.0, 1.0, 0.0]]])
 
+  @test_util.run_deprecated_v1
+  def testUnknownShape(self):
+    lengths = array_ops.placeholder(dtype=dtypes.int32)
+    res = array_ops.sequence_mask(lengths)
+    self.assertEqual(res.shape, None)
+
+  @test_util.run_deprecated_v1
   def testDtypes(self):
 
     def check_dtypes(lengths_dtype, maxlen_dtype):
@@ -1397,10 +1393,11 @@ class SequenceMaskTest(test_util.TensorFlowTestCase):
           [[True, False, False, False, False], [True, True, True, False, False],
            [True, True, False, False, False]])
 
-    check_dtypes(dtypes.int32, dtypes.int32)
-    check_dtypes(dtypes.int32, dtypes.int64)
-    check_dtypes(dtypes.int64, dtypes.int32)
-    check_dtypes(dtypes.int64, dtypes.int64)
+    with self.cached_session():
+      check_dtypes(dtypes.int32, dtypes.int32)
+      check_dtypes(dtypes.int32, dtypes.int64)
+      check_dtypes(dtypes.int64, dtypes.int32)
+      check_dtypes(dtypes.int64, dtypes.int64)
 
   def testOutputDtype(self):
 
@@ -1434,6 +1431,7 @@ class SequenceMaskTest(test_util.TensorFlowTestCase):
 class ConcatSliceResourceTest(test_util.TensorFlowTestCase):
 
   @test_util.run_in_graph_and_eager_modes
+  @test_util.run_deprecated_v1
   def testConcatSlice(self):
     r1 = test_ops.stub_resource_handle_op(container="a", shared_name="b")
     r2 = test_ops.stub_resource_handle_op(container="a", shared_name="c")
@@ -1512,13 +1510,15 @@ class PadTest(test_util.TensorFlowTestCase):
 
 class InvertPermutationTest(test_util.TensorFlowTestCase):
 
+  @test_util.run_deprecated_v1
   def testInvertPermutation(self):
     for dtype in [dtypes.int32, dtypes.int64]:
-      with self.subTest(dtype=dtype, use_gpu=True):
-        x = constant_op.constant([3, 4, 0, 2, 1], dtype=dtype)
-        y = array_ops.invert_permutation(x)
-        self.assertAllEqual(y.get_shape(), [5])
-        self.assertAllEqual(y, [2, 4, 3, 0, 1])
+      with self.subTest(dtype=dtype):
+        with self.cached_session(use_gpu=True):
+          x = constant_op.constant([3, 4, 0, 2, 1], dtype=dtype)
+          y = array_ops.invert_permutation(x)
+          self.assertAllEqual(y.get_shape(), [5])
+          self.assertAllEqual(y, [2, 4, 3, 0, 1])
 
 
 class UnravelIndexTest(test_util.TensorFlowTestCase):
@@ -1557,43 +1557,50 @@ class UnravelIndexTest(test_util.TensorFlowTestCase):
 
 class GuaranteeConstOpTest(test_util.TensorFlowTestCase):
 
+  @test_util.run_deprecated_v1
   def testSimple(self):
-    a = array_ops.constant(10)
-    guarantee_a = array_ops.guarantee_const(a)
-    self.assertEqual(10, self.evaluate(guarantee_a))
+    with self.cached_session():
+      a = array_ops.constant(10)
+      guarantee_a = array_ops.guarantee_const(a)
+      self.assertEqual(10, self.evaluate(guarantee_a))
 
+  @test_util.run_deprecated_v1
   def testVariables(self):
-    for use_resource in [False, True]:
-      with self.subTest(use_resource=use_resource):
-        a = variable_scope.get_variable(
-            "var_{}".format(use_resource), [],
-            initializer=init_ops.constant_initializer(10.0),
-            use_resource=use_resource)
-        guarantee_a = array_ops.guarantee_const(a)
-        self.evaluate(a.initializer)
-        self.assertEqual(10.0, self.evaluate(guarantee_a))
+    with self.cached_session() as sess:
+      for use_resource in [False, True]:
+        with self.subTest(use_resource=use_resource):
+          a = variable_scope.get_variable(
+              "var_{}".format(use_resource), [],
+              initializer=init_ops.constant_initializer(10.0),
+              use_resource=use_resource)
+          guarantee_a = array_ops.guarantee_const(a)
+          self.evaluate(variables.global_variables_initializer())
+          self.assertEqual(10.0, self.evaluate(guarantee_a))
 
+  @test_util.run_deprecated_v1
   def testResourceRejection(self):
-    with ops.device("/cpu:0"):
+    with self.cached_session() as sess:
       a = variable_scope.get_variable(
           "resource_var", [],
           initializer=init_ops.constant_initializer(10.0),
           use_resource=True)
-    with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError,
-                                             "cannot be a resource variable"):
       guarantee_a = array_ops.guarantee_const(a.handle)
-      self.evaluate(a.initializer)
-      self.evaluate(guarantee_a)
+      self.evaluate(variables.global_variables_initializer())
+      with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError,
+                                               "cannot be a resource variable"):
+        self.evaluate(guarantee_a)
 
 
 class SnapshotOpTest(test_util.TensorFlowTestCase):
 
+  @test_util.run_deprecated_v1
   def testInvertPermutation(self):
     for dtype in [dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]:
-      with self.subTest(dtype=dtype, use_gpu=True):
-        x = constant_op.constant([0, 1, 2, 3], dtype=dtype)
-        y = gen_array_ops.snapshot(x)
-        self.assertAllEqual(y, [0, 1, 2, 3])
+      with self.subTest(dtype=dtype):
+        with self.cached_session(use_gpu=True):
+          x = constant_op.constant([0, 1, 2, 3], dtype=dtype)
+          y = gen_array_ops.snapshot(x)
+          self.assertAllEqual(y, [0, 1, 2, 3])
 
 
 @test_util.run_all_in_graph_and_eager_modes
@@ -2052,6 +2059,7 @@ class BatchGatherNdTest(test_util.TensorFlowTestCase):
     with self.assertRaises(ValueError):
       array_ops.batch_gather_nd(params=params, indices=indices, batch_dims=4)
 
+  @test_util.run_deprecated_v1
   def testNoneBatchDimensions(self):
     """Tests gather_nd works with None dimensions."""
     shapes = []
@@ -2078,18 +2086,19 @@ class BatchGatherNdTest(test_util.TensorFlowTestCase):
         params_ph_shape[i] = None
         indices_ph_shape[i] = None
 
-      @def_function.function
-      def func(params, indices):
-        return array_ops.batch_gather_nd(
-            params=params, indices=indices, batch_dims=batch_dims)  # pylint: disable=cell-var-from-loop
+      params = array_ops.placeholder(dtypes.float32, shape=params_ph_shape)
+      indices = array_ops.placeholder(dtypes.int32, shape=indices_ph_shape)
+      out = array_ops.batch_gather_nd(
+          params=params, indices=indices, batch_dims=batch_dims)
 
-      f = func.get_concrete_function(
-          tensor_spec.TensorSpec(params_ph_shape, dtypes.float32),
-          tensor_spec.TensorSpec(indices_ph_shape, dtypes.int32))
-
-      params_val = np.ones(dtype=np.float32, shape=params_shape)
-      indices_val = np.ones(dtype=np.int32, shape=indices_shape)
-      res = f(params_val, indices_val)
+      with self.cached_session() as sess:
+        params_val = np.ones(dtype=np.float32, shape=params_shape)
+        indices_val = np.ones(dtype=np.int32, shape=indices_shape)
+        res = sess.run(
+            out, feed_dict={
+                params: params_val,
+                indices: indices_val
+            })
       row_ndims = len(params_shape) - batch_dims - indices_shape[-1]
       expected_out_shape = indices_shape[:-1]
       if row_ndims > 0:
@@ -2097,6 +2106,16 @@ class BatchGatherNdTest(test_util.TensorFlowTestCase):
 
       self.assertSequenceEqual(res.shape, expected_out_shape)
 
+  @test_util.run_deprecated_v1
+  def testUnknownIndices(self):
+    """Tests whether indices with unknown rank works correctly."""
+    params = constant_op.constant(((0, 1, 2),))
+    indices = array_ops.placeholder(dtypes.int32)
+    gather_nd_t = array_ops.gather_nd(params, indices, batch_dims=1)
+    shape = gather_nd_t.get_shape()
+    self.assertEqual(None, shape.ndims)
+    self.assertEqual(None, tensor_shape.dimension_value(shape[0]))
+
 
 @test_util.run_all_in_graph_and_eager_modes
 class RepeatTest(test_util.TensorFlowTestCase, parameterized.TestCase):
diff --git a/tensorflow/python/kernel_tests/v1_compat_tests/BUILD b/tensorflow/python/kernel_tests/v1_compat_tests/BUILD
index e5512eb133e..bd9c02d8101 100644
--- a/tensorflow/python/kernel_tests/v1_compat_tests/BUILD
+++ b/tensorflow/python/kernel_tests/v1_compat_tests/BUILD
@@ -42,15 +42,3 @@ cuda_py_test(
         "//tensorflow/python:session_ops",
     ],
 )
-
-cuda_py_test(
-    name = "array_ops_test",
-    size = "small",
-    srcs = ["array_ops_test.py"],
-    deps = [
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:framework_test_lib",
-    ],
-)
diff --git a/tensorflow/python/kernel_tests/v1_compat_tests/array_ops_test.py b/tensorflow/python/kernel_tests/v1_compat_tests/array_ops_test.py
deleted file mode 100644
index 2203c4b0723..00000000000
--- a/tensorflow/python/kernel_tests/v1_compat_tests/array_ops_test.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for array_ops that only work in V1."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import test_util
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import variables
-from tensorflow.python.platform import test as test_lib
-
-
-class ReverseV2Test(test_util.TensorFlowTestCase):
-
-  # Pure shape inference test only V1
-  @test_util.run_deprecated_v1
-  def testUnknownDims(self):
-    reverse_v2 = array_ops.reverse_v2
-    data_t = array_ops.placeholder(dtypes.float32)
-    axis_known_t = array_ops.placeholder(dtypes.int32, shape=[3])
-    reverse_known_t = reverse_v2(data_t, axis_known_t)
-    # Unlike V1 we cannot know this anymore
-    self.assertIsNone(reverse_known_t.get_shape().ndims)
-
-    axis_unknown_t = array_ops.placeholder(dtypes.int32)
-    reverse_unknown_t = reverse_v2(data_t, axis_unknown_t)
-    self.assertIs(None, reverse_unknown_t.get_shape().ndims)
-
-    data_2d_t = array_ops.placeholder(dtypes.float32, shape=[None, None])
-    axis_2d_t = array_ops.placeholder(dtypes.int32, shape=[3])
-    reverse_2d_t = reverse_v2(data_2d_t, axis_2d_t)
-    self.assertEqual(2, reverse_2d_t.get_shape().ndims)
-
-
-class SequenceMaskTest(test_util.TensorFlowTestCase):
-
-  # Pure shape inference test only V1
-  @test_util.run_deprecated_v1
-  def testUnknownShape(self):
-    lengths = array_ops.placeholder(dtype=dtypes.int32)
-    res = array_ops.sequence_mask(lengths)
-    self.assertEqual(res.shape, None)  # pylint: disable=g-generic-assert
-
-
-class BatchGatherNdTest(test_util.TensorFlowTestCase):
-
-  # Pure shape inference test only V1
-  @test_util.run_deprecated_v1
-  def testUnknownIndices(self):
-    """Tests whether indices with unknown rank works correctly."""
-    params = constant_op.constant(((0, 1, 2),))
-    indices = array_ops.placeholder(dtypes.int32)
-    gather_nd_t = array_ops.gather_nd(params, indices, batch_dims=1)
-    shape = gather_nd_t.get_shape()
-    self.assertIsNone(shape.ndims)
-    self.assertIsNone(tensor_shape.dimension_value(shape[0]))
-
-
-class SliceAssignTest(test_util.TensorFlowTestCase):
-
-  @test_util.run_v1_only("Variables need initialization only in V1")
-  def testUninitialized(self):
-    with self.assertRaisesRegex(
-        errors.FailedPreconditionError,
-        "Attempting to use uninitialized value Variable"):
-      v = variables.VariableV1([1, 2])
-      self.evaluate(v[:].assign([1, 2]))
-
-
-if __name__ == "__main__":
-  test_lib.main()

From edc060801f9e049e67933a8fbf5059d4bced7f7a Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jpienaar@google.com>
Date: Sun, 13 Dec 2020 17:25:16 -0800
Subject: [PATCH 28/60] Split out dialect hooks into separate targets

This allows not linking in the hooks too if, for example, one wants the TF dialect but not constant folding via fallback hook.

PiperOrigin-RevId: 347295194
Change-Id: Iaf5af9c4c0c0ed00e5cc91ecc39cc4043c5ca0b6
---
 tensorflow/compiler/mlir/BUILD                |  1 -
 tensorflow/compiler/mlir/lite/BUILD           |  3 +-
 tensorflow/compiler/mlir/tensorflow/BUILD     | 43 +++++++++++++------
 .../tensorflow/transforms/constant_fold.cc    |  1 -
 .../tensorflow/transforms/constant_fold.h     |  6 +--
 .../transforms/decode_attributes_hook.cc      |  1 -
 tensorflow/compiler/mlir/tfjs/BUILD           |  3 +-
 7 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/tensorflow/compiler/mlir/BUILD b/tensorflow/compiler/mlir/BUILD
index 405471ab1e4..15a10c31237 100644
--- a/tensorflow/compiler/mlir/BUILD
+++ b/tensorflow/compiler/mlir/BUILD
@@ -109,7 +109,6 @@ cc_library(
         "//tensorflow/compiler/mlir/tensorflow:compile_mlir_util_pass",
         "//tensorflow/compiler/mlir/tensorflow:tensorflow_passes",
         "//tensorflow/compiler/mlir/tensorflow:tensorflow_test_passes",
-        "//tensorflow/compiler/mlir/tensorflow:tf_dialect_passes",
         "//tensorflow/compiler/mlir/tensorflow:tf_legalize_hlo",
         "//tensorflow/compiler/mlir/tfjs:tensorflow_js_passes",
         "//tensorflow/compiler/mlir/tosa:tf_passes",
diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD
index 664dfe0e3ba..72e5799c5c9 100644
--- a/tensorflow/compiler/mlir/lite/BUILD
+++ b/tensorflow/compiler/mlir/lite/BUILD
@@ -937,8 +937,7 @@ cc_library(
         "//tensorflow/compiler/mlir/tensorflow:decode_constant_pass",
         "//tensorflow/compiler/mlir/tensorflow:error_util",
         "//tensorflow/compiler/mlir/tensorflow:mlir_roundtrip_flags",
-        "//tensorflow/compiler/mlir/tensorflow:tf_dialect_lib",
-        "//tensorflow/compiler/mlir/tensorflow:tf_dialect_passes",
+        "//tensorflow/compiler/mlir/tensorflow:tf_dialect_hooks",
         "//tensorflow/compiler/mlir/tensorflow:translate_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:protos_all_cc",
diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD
index 721780856d6..82b00f303a3 100644
--- a/tensorflow/compiler/mlir/tensorflow/BUILD
+++ b/tensorflow/compiler/mlir/tensorflow/BUILD
@@ -1106,7 +1106,6 @@ cc_library(
         ":mlir_roundtrip_flags",
         ":tensorflow",
         ":tensorflow_attributes",
-        ":tensorflow_passes",
         ":tensorflow_types",
         ":tf_saved_model_passes",
         ":translate_utils",
@@ -1450,27 +1449,21 @@ cc_library(
 )
 
 cc_library(
-    name = "tf_dialect_passes",
+    name = "tf_constant_fallback_hook",
     srcs = [
         "transforms/constant_fold.cc",
-        "transforms/decode_attributes_hook.cc",
     ],
     hdrs = [
         "transforms/constant_fold.h",
     ],
     deps = [
-        ":convert_tensor",
         ":decode_constant_pass",
         ":eval_util",
         ":tensorflow",
         ":tensorflow_traits",
         ":tensorflow_types",
         "//tensorflow/c:tf_status",
-        "//tensorflow/c/eager:c_api",
-        "//tensorflow/core:framework",
         "//tensorflow/core:lib",
-        "//tensorflow/stream_executor",
-        "//tensorflow/stream_executor/lib",
         "@llvm-project//llvm:Support",
         "@llvm-project//mlir:IR",
         "@llvm-project//mlir:SideEffects",
@@ -1480,13 +1473,39 @@ cc_library(
 )
 
 cc_library(
-    name = "tf_dialect_lib",
+    name = "tf_decode_attributes_hook",
+    srcs = [
+        "transforms/decode_attributes_hook.cc",
+    ],
     deps = [
-        ":tf_dialect_passes",
+        ":convert_tensor",
+        ":decode_constant_pass",
+        ":tensorflow",
+        "//tensorflow/core:framework",
+        "//tensorflow/stream_executor",
+        "//tensorflow/stream_executor/lib",
+        "@llvm-project//llvm:Support",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Support",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "tf_dialect_hooks",
+    deps = [
+        ":tf_constant_fallback_hook",
+        ":tf_decode_attributes_hook",
         "@llvm-project//mlir:AllPassesAndDialectsNoRegistration",
     ],
 )
 
+# TODO(jpienaar): Remove post updating all.
+alias(
+    name = "tf_dialect_lib",
+    actual = ":tf_dialect_hooks",
+)
+
 cc_library(
     name = "tf_graph_optimization_pass",
     srcs = ["transforms/tf_graph_optimization_pass.cc"],
@@ -1702,8 +1721,8 @@ cc_library(
     name = "compile_mlir_util",
     hdrs = ["utils/compile_mlir_util.h"],
     deps = COMPILE_MLIR_UTIL_DEPS + [
-        "compile_mlir_util_no_tf_dialect_passes",
-        ":tf_dialect_passes",
+        ":compile_mlir_util_no_tf_dialect_passes",
+        ":tf_dialect_hooks",
     ],
 )
 
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.cc b/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.cc
index 31cfc5ebf9c..a3c487f6378 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.cc
@@ -20,7 +20,6 @@ limitations under the License.
 #include "mlir/IR/OpDefinition.h"  // from @llvm-project
 #include "mlir/Interfaces/SideEffectInterfaces.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
-#include "tensorflow/c/eager/c_api.h"
 #include "tensorflow/c/tf_status.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.h b/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.h
index 887eea745e7..54f296dcb2f 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.h
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.h
@@ -25,9 +25,9 @@ limitations under the License.
 namespace mlir {
 namespace TF {
 
-LogicalResult ConstantFoldFallbackHook(
-    Operation *inst, ArrayRef<Attribute> operands,
-    SmallVectorImpl<OpFoldResult> &results);  // NOLINT
+LogicalResult ConstantFoldFallbackHook(Operation *inst,
+                                       ArrayRef<Attribute> operands,
+                                       SmallVectorImpl<OpFoldResult> &results);
 
 }  // namespace TF
 }  // namespace mlir
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/decode_attributes_hook.cc b/tensorflow/compiler/mlir/tensorflow/transforms/decode_attributes_hook.cc
index 09fac6e0706..9dbf332fc67 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/decode_attributes_hook.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/decode_attributes_hook.cc
@@ -23,7 +23,6 @@ limitations under the License.
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
-#include "tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h"
 #include "tensorflow/core/framework/logging.h"
 #include "tensorflow/stream_executor/lib/statusor.h"
diff --git a/tensorflow/compiler/mlir/tfjs/BUILD b/tensorflow/compiler/mlir/tfjs/BUILD
index a337dc02a9e..66b9a5493ce 100644
--- a/tensorflow/compiler/mlir/tfjs/BUILD
+++ b/tensorflow/compiler/mlir/tfjs/BUILD
@@ -175,8 +175,7 @@ cc_library(
         "//tensorflow/compiler/mlir/tensorflow",
         "//tensorflow/compiler/mlir/tensorflow:decode_constant_pass",
         "//tensorflow/compiler/mlir/tensorflow:error_util",
-        "//tensorflow/compiler/mlir/tensorflow:tf_dialect_lib",
-        "//tensorflow/compiler/mlir/tensorflow:tf_dialect_passes",
+        "//tensorflow/compiler/mlir/tensorflow:tf_dialect_hooks",
         "//tensorflow/compiler/mlir/tensorflow:translate_cl_options",
         "//tensorflow/compiler/mlir/tensorflow:translate_lib",
         "//tensorflow/core:framework",

From 1e8c13a7f079a33f17e5f14ac12e13781e52df3c Mon Sep 17 00:00:00 2001
From: Tiezhen WANG <wangtz@google.com>
Date: Sun, 13 Dec 2020 18:43:06 -0800
Subject: [PATCH 29/60] TFLite Python API: allow float64, uint64 and complex128

I was confused by the error message saying that the interpreter got UNKNOWN type, which actually turns out to be float64.

Also removes a confusing comment. Since the input type is `int` not `enum`. -Wswitch-enum won't help.

PiperOrigin-RevId: 347300840
Change-Id: I732731871e969ae53e88c9cd7ffffdbd1ac8e314
---
 tensorflow/lite/python/interpreter_wrapper/numpy.cc | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tensorflow/lite/python/interpreter_wrapper/numpy.cc b/tensorflow/lite/python/interpreter_wrapper/numpy.cc
index 8e6e9e80b22..c75a3180161 100644
--- a/tensorflow/lite/python/interpreter_wrapper/numpy.cc
+++ b/tensorflow/lite/python/interpreter_wrapper/numpy.cc
@@ -73,6 +73,8 @@ TfLiteType TfLiteTypeFromPyType(int py_type) {
       return kTfLiteFloat32;
     case NPY_FLOAT16:
       return kTfLiteFloat16;
+    case NPY_FLOAT64:
+      return kTfLiteFloat64;
     case NPY_INT32:
       return kTfLiteInt32;
     case NPY_INT16:
@@ -83,6 +85,8 @@ TfLiteType TfLiteTypeFromPyType(int py_type) {
       return kTfLiteInt8;
     case NPY_INT64:
       return kTfLiteInt64;
+    case NPY_UINT64:
+      return kTfLiteUInt64;
     case NPY_BOOL:
       return kTfLiteBool;
     case NPY_OBJECT:
@@ -91,7 +95,8 @@ TfLiteType TfLiteTypeFromPyType(int py_type) {
       return kTfLiteString;
     case NPY_COMPLEX64:
       return kTfLiteComplex64;
-      // Avoid default so compiler errors created when new types are made.
+    case NPY_COMPLEX128:
+      return kTfLiteComplex128;
   }
   return kTfLiteNoType;
 }

From 9697081dac25ef5c3e95b98b6e1113261611a4b9 Mon Sep 17 00:00:00 2001
From: Christian Sigg <csigg@google.com>
Date: Sun, 13 Dec 2020 22:58:40 -0800
Subject: [PATCH 30/60] Use OpState::operator->() to get to member functions in
 Operation so we can remove the corresponding methods from OpState.

PiperOrigin-RevId: 347322042
Change-Id: I02b32db7aba1c33e4dd5510a294fcfff7b122d60
---
 .../lite/quantization/quantization_driver.cc  |  2 +-
 .../compiler/mlir/lite/utils/lstm_utils.cc    |  6 +++---
 .../compiler/mlir/tensorflow/ir/tf_ops_a_m.cc | 21 +++++++++++--------
 .../mlir/tensorflow/ir/tf_ops_helpers.inc     |  6 +++---
 .../compiler/mlir/tensorflow/ir/tf_ops_n_z.cc |  2 +-
 .../mlir/tensorflow/ir/tf_saved_model.cc      |  8 +++----
 .../mlir/tensorflow/translate/import_model.cc | 19 +++++++++--------
 .../mlir/tensorflow/utils/translate_utils.cc  |  6 +++---
 .../tensorflow/utils/xla_sharding_util.cc     |  5 +++--
 tensorflow/compiler/mlir/tfr/ir/tfr_ops.cc    |  4 ++--
 .../compiler/mlir/tfr/passes/decompose.cc     |  4 ++--
 .../compiler/mlir/tfr/passes/raise_to_tf.cc   |  2 +-
 tensorflow/compiler/mlir/tfr/utils/utils.cc   |  2 +-
 .../mlir/xla/transforms/legalize_tf.cc        |  6 +++---
 14 files changed, 49 insertions(+), 44 deletions(-)

diff --git a/tensorflow/compiler/mlir/lite/quantization/quantization_driver.cc b/tensorflow/compiler/mlir/lite/quantization/quantization_driver.cc
index d847a7d52e6..831a67078e1 100644
--- a/tensorflow/compiler/mlir/lite/quantization/quantization_driver.cc
+++ b/tensorflow/compiler/mlir/lite/quantization/quantization_driver.cc
@@ -532,7 +532,7 @@ void QuantizationDriver::QuantizeValue(Value value, QuantParams params,
   // quantization pass. These ops can be removed without losing original
   // program accuracy.
   // TODO(fengliuai): make the attribute being part of op definition.
-  quantize.setAttr(kVolatileOpAttrName, builder_.getUnitAttr());
+  quantize->setAttr(kVolatileOpAttrName, builder_.getUnitAttr());
 
   // `original_result` has a use to `quantize`, so this will replace that use
   // by the result of `dequantize`. Remember to reset that use afterwards
diff --git a/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc b/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc
index 357079c561b..1a503675f45 100644
--- a/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc
+++ b/tensorflow/compiler/mlir/lite/utils/lstm_utils.cc
@@ -438,7 +438,7 @@ LogicalResult ConvertLSTMCellSimpleToFusedLSTM::RewriteFunc() {
 }
 
 LogicalResult ConvertLSTMCellSimpleToFusedLSTM::InitializeFromFuncAttributes() {
-  auto attr = fused_func_op_.getAttrOfType<StringAttr>(kTFImplements);
+  auto attr = fused_func_op_->getAttrOfType<StringAttr>(kTFImplements);
   if (!attr) {
     return fused_func_op_.emitError()
            << "Invalid function attribute, expected " << kTFImplements
@@ -639,7 +639,7 @@ LogicalResult ConvertKerasLSTMLayer(mlir::FuncOp func_op, OpBuilder* builder) {
 
   // TFL lstm only supports time-majored inputs, so if it's not time-majored,
   // we will transpose the inputs and outputs.
-  auto time_major_attr = func_op.getAttrOfType<BoolAttr>("tf.time_major");
+  auto time_major_attr = func_op->getAttrOfType<BoolAttr>("tf.time_major");
   if (time_major_attr == nullptr) return failure();
 
   bool time_majored = time_major_attr.getValue();
@@ -654,7 +654,7 @@ LogicalResult ConvertKerasLSTMLayer(mlir::FuncOp func_op, OpBuilder* builder) {
 
   // Handle go_backwards:
   // LSTM in Keras semantic will reverse the input sequence if it's go_backwards
-  auto go_backwards_attr = func_op.getAttrOfType<BoolAttr>("tf.go_backwards");
+  auto go_backwards_attr = func_op->getAttrOfType<BoolAttr>("tf.go_backwards");
 
   if (go_backwards_attr != nullptr && go_backwards_attr.getValue()) {
     int time_dim = time_majored ? 0 : 1;
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc
index 513f8338343..93e0113ce4a 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.cc
@@ -1479,9 +1479,10 @@ LogicalResult Conv2DOp::UpdateDataFormat(StringRef data_format) {
   if (failed(::mlir::TF::UpdateDataFormat(data_format, this))) return failure();
 
   // Update convolution attributes.
-  setAttr("dilations", ShuffleArrayAttr(dilations(), perm));
-  setAttr("strides", ShuffleArrayAttr(strides(), perm));
-  setAttr("explicit_paddings", ShuffleArrayAttr(explicit_paddings(), perm, 2));
+  (*this)->setAttr("dilations", ShuffleArrayAttr(dilations(), perm));
+  (*this)->setAttr("strides", ShuffleArrayAttr(strides(), perm));
+  (*this)->setAttr("explicit_paddings",
+                   ShuffleArrayAttr(explicit_paddings(), perm, 2));
 
   return success();
 }
@@ -1553,9 +1554,10 @@ LogicalResult Conv2DBackpropFilterOp::UpdateDataFormat(StringRef data_format) {
   if (failed(::mlir::TF::UpdateDataFormat(data_format, this))) return failure();
 
   // Update convolution attributes.
-  setAttr("dilations", ShuffleArrayAttr(dilations(), perm));
-  setAttr("strides", ShuffleArrayAttr(strides(), perm));
-  setAttr("explicit_paddings", ShuffleArrayAttr(explicit_paddings(), perm, 2));
+  (*this)->setAttr("dilations", ShuffleArrayAttr(dilations(), perm));
+  (*this)->setAttr("strides", ShuffleArrayAttr(strides(), perm));
+  (*this)->setAttr("explicit_paddings",
+                   ShuffleArrayAttr(explicit_paddings(), perm, 2));
 
   // Permute filter sizes operand.
   OpBuilder builder(getOperation());
@@ -1618,9 +1620,10 @@ LogicalResult Conv2DBackpropInputOp::UpdateDataFormat(StringRef data_format) {
   if (failed(::mlir::TF::UpdateDataFormat(data_format, this))) return failure();
 
   // Update convolution attributes.
-  setAttr("dilations", ShuffleArrayAttr(dilations(), perm));
-  setAttr("strides", ShuffleArrayAttr(strides(), perm));
-  setAttr("explicit_paddings", ShuffleArrayAttr(explicit_paddings(), perm, 2));
+  (*this)->setAttr("dilations", ShuffleArrayAttr(dilations(), perm));
+  (*this)->setAttr("strides", ShuffleArrayAttr(strides(), perm));
+  (*this)->setAttr("explicit_paddings",
+                   ShuffleArrayAttr(explicit_paddings(), perm, 2));
 
   // Permute input sizes operand.
   OpBuilder builder(getOperation());
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc
index de8bc8311b7..dddb9bca67f 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_helpers.inc
@@ -370,7 +370,7 @@ LogicalResult UpdateDataFormat(StringRef data_format, Op *op) {
   if (perm.empty()) return failure();
 
   // Update data format attribute.
-  op->setAttr("data_format", StringAttr::get(data_format, op->getContext()));
+  (*op)->setAttr("data_format", StringAttr::get(data_format, op->getContext()));
 
   // Update types for all layout sensitive results.
   auto layout_sensitive = cast<LayoutSensitiveInterface>(op->getOperation());
@@ -421,12 +421,12 @@ LogicalResult FoldOperandsPermutation(
       GetDataFormatPermutation(op->data_format(), target_data_format);
   if (reverse_permutation.empty()) return failure();
 
-  op->setAttr("data_format", StringAttr::get(target_data_format, context));
+  (*op)->setAttr("data_format", StringAttr::get(target_data_format, context));
 
   for (auto pair : shuffle_attrs) {
     StringRef attr_name = pair.first;
     ArrayAttr attr_value = pair.second;
-    op->setAttr(attr_name, ShuffleArrayAttr(attr_value, reverse_permutation));
+    (*op)->setAttr(attr_name, ShuffleArrayAttr(attr_value, reverse_permutation));
   }
 
   auto fold = cast<FoldOperandsTransposeInterface>(op->getOperation());
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc
index 5d681295f61..0208e377d19 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_ops_n_z.cc
@@ -401,7 +401,7 @@ static LogicalResult Verify(ParseExampleV2Op op) {
 template <class OpClass>
 static LogicalResult VerifyPartitionedCall(OpClass op) {
   auto module = op->template getParentOfType<ModuleOp>();
-  SymbolRefAttr func = op.getAttr("f").template cast<SymbolRefAttr>();
+  SymbolRefAttr func = op->getAttr("f").template cast<SymbolRefAttr>();
 
   auto function =
       dyn_cast_or_null<FuncOp>(SymbolTable::lookupSymbolIn(module, func));
diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc
index 85cb8edb8c7..3edcbf505dd 100644
--- a/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc
+++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc
@@ -342,7 +342,7 @@ LogicalResult VerifyExportedFunc(FuncOp func) {
       continue;
     }
     if (func.getArgAttr(i, "tf.resource_name")) {
-      if (module.getAttr("tf_saved_model.under_construction")) continue;
+      if (module->getAttr("tf_saved_model.under_construction")) continue;
       return func.emitError() << "'tf.resource_name' attribute is not allowed "
                                  "unless it is being under construction";
     }
@@ -355,7 +355,7 @@ LogicalResult VerifyExportedFunc(FuncOp func) {
     if (auto attr = func.getArgAttrOfType<FlatSymbolRefAttr>(
             i, "tf_saved_model.bound_input")) {
       if (!unique_bound_inputs.insert(attr.getValue()).second) {
-        if (module.getAttr("tf_saved_model.under_construction")) continue;
+        if (module->getAttr("tf_saved_model.under_construction")) continue;
         return func.emitError()
                << "duplicate 'tf_saved_model.bound_input' binding";
       }
@@ -431,7 +431,7 @@ bool IsExported(Operation *op) {
 }
 
 bool HasTfSavedModelSemantics(ModuleOp module) {
-  return module.getAttr("tf_saved_model.semantics") != nullptr;
+  return module->getAttr("tf_saved_model.semantics") != nullptr;
 }
 
 Operation *LookupBoundInput(FuncOp func, int arg_index,
@@ -483,7 +483,7 @@ class OptimizeSessionInitializerPattern
     if (to_keep.empty())
       rewriter.eraseOp(op);
     else
-      op.setAttr("initializers", rewriter.getArrayAttr(to_keep));
+      op->setAttr("initializers", rewriter.getArrayAttr(to_keep));
 
     return success();
   }
diff --git a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc
index 3099554f5c7..d0ae9dc8aee 100644
--- a/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc
+++ b/tensorflow/compiler/mlir/tensorflow/translate/import_model.cc
@@ -3064,7 +3064,7 @@ Status CreateSavedModelIR(
             /*executor_type=*/builder.getStringAttr(""));
         body_builder.create<mlir::ReturnOp>(func.getLoc(), call.getResults());
       }
-      func.setAttr(
+      func->setAttr(
           "tf_saved_model.exported_names",
           builder.getStrArrayAttr(object_names.GetExportedNames(node_id)));
       const SavedConcreteFunction& concrete_function =
@@ -3162,7 +3162,7 @@ Status CreateSavedModelIR(
           value_attr,
           /*type=*/mlir::TypeAttr::get(type),
           /*is_mutable=*/builder.getUnitAttr());
-      op.setAttr(
+      op->setAttr(
           "tf_saved_model.exported_names",
           builder.getStrArrayAttr(object_names.GetExportedNames(node_id)));
     } else if (object.kind_case() == SavedObject::kConstant) {
@@ -3182,13 +3182,13 @@ Status CreateSavedModelIR(
           value_attr,
           /*type=*/mlir::TypeAttr::get(value_attr.Attribute::getType()),
           /*is_mutable=*/nullptr);
-      op.setAttr(
+      op->setAttr(
           "tf_saved_model.exported_names",
           builder.getStrArrayAttr(object_names.GetExportedNames(node_id)));
     }
   }
   AdjustBoundInputArgTypes(module);
-  module.setAttr("tf_saved_model.semantics", builder.getUnitAttr());
+  module->setAttr("tf_saved_model.semantics", builder.getUnitAttr());
   SortSavedModelModule(module);
   MarkSavedModelFunctionVisibility(module);
   return Status::OK();
@@ -3448,7 +3448,7 @@ Status SavedModelSignatureDefImporterLite::ConvertInitializer(
 
   // Set the exported name of init function to an reserved name for
   // tf_saved_model.
-  init_func_op.setAttr(
+  init_func_op->setAttr(
       "tf_saved_model.exported_names",
       builder.getStrArrayAttr({absl::StrCat(
           "__tf_saved_model_session_initializer_", target_node_name)}));
@@ -3508,8 +3508,8 @@ Status SavedModelSignatureDefImporterLite::ConvertSignature(
       << sig_def_key << ".";
 
   // Use unique SignatureDef key as exported name.
-  func_op.setAttr("tf_saved_model.exported_names",
-                  builder.getStrArrayAttr({sig_def_key}));
+  func_op->setAttr("tf_saved_model.exported_names",
+                   builder.getStrArrayAttr({sig_def_key}));
 
   // Transfer input and output parameter names to index_path attributes.
   for (auto input_and_idx : llvm::enumerate(inputs)) {
@@ -3623,7 +3623,7 @@ SavedModelSignatureDefImporterLite::ConvertSignatures() {
   builder.create<mlir::tf_saved_model::SessionInitializerOp>(
       module_->getLoc(), builder.getArrayAttr(init_sym_refs));
 
-  module_->setAttr("tf_saved_model.semantics", builder.getUnitAttr());
+  (*module_)->setAttr("tf_saved_model.semantics", builder.getUnitAttr());
 
   SortSavedModelModule(*module_);
   MarkSavedModelFunctionVisibility(*module_);
@@ -3653,7 +3653,8 @@ class SavedModelSignatureDefImporter {
                             context, upgrade_legacy, /*import_restore=*/false));
 
     mlir::OpBuilder builder(module->getContext());
-    module->setAttr("tf_saved_model.under_construction", builder.getUnitAttr());
+    (*module)->setAttr("tf_saved_model.under_construction",
+                       builder.getUnitAttr());
     TF_RETURN_IF_ERROR(LiftVariables(bundle, *module));
     module->removeAttr("tf_saved_model.under_construction");
 
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/translate_utils.cc b/tensorflow/compiler/mlir/tensorflow/utils/translate_utils.cc
index d7b9a5c2f45..075d33a348c 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/translate_utils.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/translate_utils.cc
@@ -30,9 +30,9 @@ void PopulateTfVersions(mlir::ModuleOp module, const VersionDef& versions) {
       "bad_consumers",
       b.getI32ArrayAttr(llvm::ArrayRef<int32_t>(
           versions.bad_consumers().begin(), versions.bad_consumers().end())));
-  module.setAttr("tf.versions",
-                 b.getDictionaryAttr(llvm::ArrayRef<mlir::NamedAttribute>(
-                     {producer, min_consumer, bad_consumers})));
+  module->setAttr("tf.versions",
+                  b.getDictionaryAttr(llvm::ArrayRef<mlir::NamedAttribute>(
+                      {producer, min_consumer, bad_consumers})));
 }
 
 mlir::LogicalResult ExtractTfVersions(mlir::ModuleOp module,
diff --git a/tensorflow/compiler/mlir/tensorflow/utils/xla_sharding_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/xla_sharding_util.cc
index a3f8e833ae3..82939c9d600 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/xla_sharding_util.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/xla_sharding_util.cc
@@ -92,8 +92,9 @@ mlir::LogicalResult CreateSplitOp(const int num_split,
   llvm::SmallVector<mlir::Type, 4> output_types(num_split, output_type);
   *split_op = builder->create<mlir::TF::SplitOp>(
       location, output_types, split_dimension_op.output(), src_input);
-  split_op->setAttr(kNumSplitAttr, builder->getIntegerAttr(
-                                       builder->getIntegerType(32), num_split));
+  (*split_op)->setAttr(
+      kNumSplitAttr,
+      builder->getIntegerAttr(builder->getIntegerType(32), num_split));
   return mlir::success();
 }
 
diff --git a/tensorflow/compiler/mlir/tfr/ir/tfr_ops.cc b/tensorflow/compiler/mlir/tfr/ir/tfr_ops.cc
index e1ef506ba1f..be01511510c 100644
--- a/tensorflow/compiler/mlir/tfr/ir/tfr_ops.cc
+++ b/tensorflow/compiler/mlir/tfr/ir/tfr_ops.cc
@@ -231,7 +231,7 @@ static LogicalResult Verify(TFRFuncOp func) {
   // Collect all the undefined attributes used in the inputs.
   llvm::SmallVector<StringAttr, 4> undefined_attrs;
   for (auto attr : input_used_attrs) {
-    if (!func.getAttr(attr.getValue())) {
+    if (!func->getAttr(attr.getValue())) {
       undefined_attrs.push_back(attr);
     }
   }
@@ -295,7 +295,7 @@ static LogicalResult Verify(TFRFuncOp func) {
 
   // Collect all the undefined attributes used in the outputs.
   for (auto attr : output_used_attrs) {
-    if (!func.getAttr(attr.getValue())) {
+    if (!func->getAttr(attr.getValue())) {
       undefined_attrs.push_back(attr);
     }
   }
diff --git a/tensorflow/compiler/mlir/tfr/passes/decompose.cc b/tensorflow/compiler/mlir/tfr/passes/decompose.cc
index 13d5f45e0ab..c532bc103a9 100644
--- a/tensorflow/compiler/mlir/tfr/passes/decompose.cc
+++ b/tensorflow/compiler/mlir/tfr/passes/decompose.cc
@@ -111,7 +111,7 @@ LogicalResult DecomposeTFOpsPass::RewriteUnregisteredTFOps() {
   FuncOp func = getFunction();
   SymbolTable table(external_tfr_module.hasValue()
                         ? *external_tfr_module
-                        : func.getParentOfType<ModuleOp>());
+                        : func->getParentOfType<ModuleOp>());
   OpBuilder builder(func);
   bool changed = false;
   func.walk([&table, &builder, &changed](Operation* op) {
@@ -244,7 +244,7 @@ LogicalResult DecomposeTFOpsPass::InlineTFRFuncCalls() {
   FuncOp func = getFunction();
   SymbolTable table(external_tfr_module.hasValue()
                         ? *external_tfr_module
-                        : func.getParentOfType<ModuleOp>());
+                        : func->getParentOfType<ModuleOp>());
 
   // The inliner only inlines the TFR call op.
   bool changed = false;
diff --git a/tensorflow/compiler/mlir/tfr/passes/raise_to_tf.cc b/tensorflow/compiler/mlir/tfr/passes/raise_to_tf.cc
index 7ffcd4c7b11..d3780a4ef26 100644
--- a/tensorflow/compiler/mlir/tfr/passes/raise_to_tf.cc
+++ b/tensorflow/compiler/mlir/tfr/passes/raise_to_tf.cc
@@ -450,7 +450,7 @@ void RaiseToTFOpsPass::runOnFunction() {
   MLIRContext* ctx = &getContext();
   SymbolTable table(external_tfr_module.hasValue()
                         ? *external_tfr_module
-                        : func.getParentOfType<ModuleOp>());
+                        : func->getParentOfType<ModuleOp>());
 
   OwningRewritePatternList patterns;
   patterns.insert<RewriteTFRCallOp>(ctx, table, materialize_derived_attrs);
diff --git a/tensorflow/compiler/mlir/tfr/utils/utils.cc b/tensorflow/compiler/mlir/tfr/utils/utils.cc
index 253a109358b..2dec56074af 100644
--- a/tensorflow/compiler/mlir/tfr/utils/utils.cc
+++ b/tensorflow/compiler/mlir/tfr/utils/utils.cc
@@ -142,7 +142,7 @@ LogicalResult CopyAllowedUnregisteredAttrs(Operation* src, CallOp dst,
 
     // Unregistered attribute.
     if (GetAllowedAttributes().contains(attr_name)) {
-      dst.setAttr(attr.first, attr.second);
+      dst->setAttr(attr.first, attr.second);
     } else {
       src->emitError("Denied unregistered attribute was found: " + attr_name);
       return failure();
diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
index 2d2e7197d0e..18041f98c07 100644
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
@@ -4637,11 +4637,11 @@ class ConvertInfeedDequeueTupleOp
       if (sharding_proto.type() == ::xla::OpSharding::TUPLE) {
         *sharding_proto.add_tuple_shardings() =
             ::xla::sharding_builder::AssignDevice(0);
-        data_and_token.setAttr(
+        data_and_token->setAttr(
             kShardingAttr,
             rewriter.getStringAttr(sharding_proto.SerializeAsString()));
       } else {
-        data_and_token.setAttr(kShardingAttr, op._XlaShardingAttr());
+        data_and_token->setAttr(kShardingAttr, op._XlaShardingAttr());
       }
     }
 
@@ -5157,7 +5157,7 @@ class ConvertXlaShardingOp : public OpRewritePattern<TF::XlaShardingOp> {
         /*call_target_name=*/rewriter.getStringAttr("Sharding"),
         /*has_side_effect=*/rewriter.getBoolAttr(false),
         /*backend_config=*/rewriter.getStringAttr(""));
-    custom_call.setAttr(kShardingAttr, op._XlaShardingAttr());
+    custom_call->setAttr(kShardingAttr, op._XlaShardingAttr());
     rewriter.replaceOp(op, custom_call.getResult(0));
 
     return success();

From 30dc4e6e60048a34747b5d3e741930f2ff7ab147 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 14 Dec 2020 01:05:18 -0800
Subject: [PATCH 31/60] compat: Update forward compatibility horizon to
 2020-12-14

PiperOrigin-RevId: 347335063
Change-Id: Ic7017b1eac0b1d3ec0d9f458ed96160d73eae40d
---
 tensorflow/python/compat/compat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py
index d92c999c404..eccea00b95f 100644
--- a/tensorflow/python/compat/compat.py
+++ b/tensorflow/python/compat/compat.py
@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 12, 13)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 12, 14)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = "TF_FORWARD_COMPATIBILITY_DELTA_DAYS"
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 

From 2fb25f58c8299b37e336d4da044bc39561c8c751 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 14 Dec 2020 01:05:19 -0800
Subject: [PATCH 32/60] Update GraphDef version to 615.

PiperOrigin-RevId: 347335068
Change-Id: Ifbb624eda094f9dbea529f5f6ac78e36ce247d43
---
 tensorflow/core/public/version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h
index fc3758e928d..8e5709160ca 100644
--- a/tensorflow/core/public/version.h
+++ b/tensorflow/core/public/version.h
@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 614  // Updated: 2020/12/13
+#define TF_GRAPH_DEF_VERSION 615  // Updated: 2020/12/14
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //

From 607161bb37cda7067df768e4b47efa95bafa4006 Mon Sep 17 00:00:00 2001
From: Jean-Baptiste Lespiau <jblespiau@google.com>
Date: Mon, 14 Dec 2020 02:01:00 -0800
Subject: [PATCH 33/60] Move part of jax_jit.cc to the header file.

PiperOrigin-RevId: 347341435
Change-Id: I900e9641df651b84910e39740acb92a5cebc383d
---
 tensorflow/compiler/xla/python/jax_jit.cc | 201 +++++++---------------
 tensorflow/compiler/xla/python/jax_jit.h  |  86 +++++++++
 2 files changed, 151 insertions(+), 136 deletions(-)

diff --git a/tensorflow/compiler/xla/python/jax_jit.cc b/tensorflow/compiler/xla/python/jax_jit.cc
index 7d216e0b4fe..0c624928d86 100644
--- a/tensorflow/compiler/xla/python/jax_jit.cc
+++ b/tensorflow/compiler/xla/python/jax_jit.cc
@@ -58,114 +58,39 @@ namespace py = pybind11;
 // TODO(phawkins): Add support for Tracers.
 // TODO(jblespiau): Use absl Status.
 
-namespace {
-
-thread_local bool disable_jit;
-void SetDisableJit(bool disable_jit_) { disable_jit = disable_jit_; }
-bool GetDisableJit() { return disable_jit; }
-
-// Describes the abstract shape and dtype of an argument.
-struct ArgSignature {
-  // This is the XLA dtype of the object.
-  xla::PrimitiveType dtype;
-  // JAX arguments can be of weak type, if and only if they are Python scalars
-  // or `DeviceArray` values such that `aval.weak_type` is true.
-  bool weak_type;
-  absl::InlinedVector<int64, 4> shape;
-  bool operator==(const ArgSignature& other) const {
-    return std::tie(dtype, weak_type, shape) ==
-           std::tie(other.dtype, other.weak_type, other.shape);
+std::string ArgSignature::DebugString() const {
+  std::string result = "";
+  if (weak_type) {
+    absl::StrAppend(&result, "weak_");
   }
-  bool operator!=(const ArgSignature& other) const { return !(*this == other); }
-
-  std::string DebugString() const {
-    std::string result = "";
-    if (weak_type) {
-      absl::StrAppend(&result, "weak_");
-    }
-    absl::StrAppend(&result, xla::PrimitiveType_Name(dtype));
-    absl::StrAppend(&result, "[", absl::StrJoin(shape, ","), "]");
-    return result;
-  }
-};
-
-template <typename H>
-H AbslHashValue(H h, const ArgSignature& s) {
-  h = H::combine(std::move(h), s.dtype);
-  h = H::combine_contiguous(std::move(h), s.shape.data(), s.shape.size());
-  return h;
+  absl::StrAppend(&result, xla::PrimitiveType_Name(dtype));
+  absl::StrAppend(&result, "[", absl::StrJoin(shape, ","), "]");
+  return result;
 }
 
-// The signature of Python jitted function call, partitioned into:
-// - dynamic positional arguments (i.e. positional args which are not static)
-// - static positional arguments (i.e. the args associated to static_argnums)
-// - keyword arguments
-// The CallSignature should unambiguously identify a function call, thus,
-// equality is based on:
-// (a) Same PyTree for all dynamic positional arguments and keyword arguments
-// (a) equality of the arguments and keyword arguments ArgSignature
-// (a) equality (delegated to Python) of the static arguments.
-struct CallSignature {
-  struct KwargEntry {
-    // To avoid comparing strings, we intern the kwargs strings.
-    // The compilation cache holds a reference to all the keys.
-    py::handle key;
-    PyTreeDef value_treedef;
-    bool operator==(const KwargEntry& other) const {
-      return key.ptr() == other.key.ptr() &&
-             value_treedef == other.value_treedef;
-    }
-    bool operator!=(const KwargEntry& other) const { return !(*this == other); }
-  };
-
-  // Only contains the arguments associated to `static_argnums`, sorted in the
-  // order of their argnum index.
-  std::vector<py::object> static_args;
-  // A PyTreeDef for each positional dynamic (i.e. not static) argument.
-  std::vector<PyTreeDef> dynamic_positional_args_treedef;
-  // Keyword arguments. Sorted by the keyword name.
-  std::vector<KwargEntry> keyword_args;
-  // Shape and dtype for both the dynamic positional arguments and the keyword
-  // arguments (sorted by keyword name).
-  std::vector<ArgSignature> dynamic_args_signatures;
-  PjRtDevice* device;
-
-  bool operator==(const CallSignature& other) const {
-    return std::tie(dynamic_positional_args_treedef, keyword_args,
-                    dynamic_args_signatures, device) ==
-               std::tie(other.dynamic_positional_args_treedef,
-                        other.keyword_args, other.dynamic_args_signatures,
-                        other.device) &&
-           // `==` on py:objects is the Python `is`. We need equal.
-           std::equal(
-               static_args.begin(), static_args.end(),
-               other.static_args.begin(), other.static_args.end(),
-               [](const py::object& a, const py::object& b) {
-                 try {
-                   return a.equal(b);
-                 } catch (const py::error_already_set& e) {
-                   throw std::invalid_argument(absl::StrCat(
-                       "static arguments should be comparable using __eq__."
-                       "The following error was raised when comparing two "
-                       "objects of types ",
-                       py::cast<std::string>(py::str(py::type::of(a))), " and ",
-                       py::cast<std::string>(py::str(py::type::of(b))),
-                       ". The error was:\n", e.what()));
-                 }
-               });
-  }
-  bool operator!=(const CallSignature& other) const {
-    return !(*this == other);
-  }
-
-  // To be used when we want to keep ownership of Python values referenced by
-  // the `CallSignature` (i.e. when we insert an entry).
-  void IncRef() const;
-  // The destructor of the cache should call this on all entries.
-  void DecRef() const;
-
-  std::string DebugString() const;
-};
+bool CallSignature::operator==(const CallSignature& other) const {
+  return std::tie(dynamic_positional_args_treedef, keyword_args,
+                  dynamic_args_signatures, device) ==
+             std::tie(other.dynamic_positional_args_treedef, other.keyword_args,
+                      other.dynamic_args_signatures, other.device) &&
+         // `==` on py:objects is the Python `is`. We need equal.
+         std::equal(
+             static_args.begin(), static_args.end(), other.static_args.begin(),
+             other.static_args.end(),
+             [](const py::object& a, const py::object& b) {
+               try {
+                 return a.equal(b);
+               } catch (const py::error_already_set& e) {
+                 throw std::invalid_argument(absl::StrCat(
+                     "static arguments should be comparable using __eq__."
+                     "The following error was raised when comparing two "
+                     "objects of types ",
+                     py::cast<std::string>(py::str(py::type::of(a))), " and ",
+                     py::cast<std::string>(py::str(py::type::of(b))),
+                     ". The error was:\n", e.what()));
+               }
+             });
+}
 
 void CallSignature::IncRef() const {
   for (const auto& kw : keyword_args) {
@@ -179,38 +104,13 @@ void CallSignature::DecRef() const {
   }
 }
 
-template <typename H>
-H AbslHashValue(H h, const CallSignature::KwargEntry& kw) {
-  h = H::combine(std::move(h), kw.key.ptr(), kw.value_treedef);
-  return h;
-}
+namespace {
 
-template <typename H>
-H AbslHashValue(H h, const CallSignature& s) {
-  h = H::combine_contiguous(std::move(h),
-                            s.dynamic_positional_args_treedef.data(),
-                            s.dynamic_positional_args_treedef.size());
-  h = H::combine_contiguous(std::move(h), s.keyword_args.data(),
-                            s.keyword_args.size());
-  h = H::combine_contiguous(std::move(h), s.dynamic_args_signatures.data(),
-                            s.dynamic_args_signatures.size());
-  h = H::combine(std::move(h), s.device);
-  for (const auto& static_arg : s.static_args) {
-    ssize_t hash;
-    try {
-      hash = py::hash(static_arg);
-    } catch (const py::error_already_set& e) {
-      throw std::invalid_argument(absl::StrCat(
-          "Non-hashable static arguments are not supported. An error occured "
-          "while trying to hash an object of type ",
-          py::cast<std::string>(py::str(py::type::of(static_arg))), ", ",
-          py::cast<std::string>(py::str(static_arg)), ". The error was:\n",
-          e.what(), "\n"));
-    }
-    h = H::combine(std::move(h), hash);
-  }
-  return h;
-}
+thread_local bool disable_jit;
+void SetDisableJit(bool disable_jit_) { disable_jit = disable_jit_; }
+bool GetDisableJit() { return disable_jit; }
+
+}  // namespace
 
 std::string CallSignature::DebugString() const {
   std::vector<std::string> static_args_str;
@@ -248,6 +148,35 @@ std::string CallSignature::DebugString() const {
       absl::StrJoin(tree_def_str, " | "));
 }
 
+template <typename H>
+H AbslHashValue(H h, const CallSignature& s) {
+  h = H::combine_contiguous(std::move(h),
+                            s.dynamic_positional_args_treedef.data(),
+                            s.dynamic_positional_args_treedef.size());
+  h = H::combine_contiguous(std::move(h), s.keyword_args.data(),
+                            s.keyword_args.size());
+  h = H::combine_contiguous(std::move(h), s.dynamic_args_signatures.data(),
+                            s.dynamic_args_signatures.size());
+  h = H::combine(std::move(h), s.device);
+  for (const auto& static_arg : s.static_args) {
+    ssize_t hash;
+    try {
+      hash = py::hash(static_arg);
+    } catch (const py::error_already_set& e) {
+      throw std::invalid_argument(absl::StrCat(
+          "Non-hashable static arguments are not supported. An error occured "
+          "while trying to hash an object of type ",
+          py::cast<std::string>(py::str(py::type::of(static_arg))), ", ",
+          py::cast<std::string>(py::str(static_arg)), ". The error was:\n",
+          e.what(), "\n"));
+    }
+    h = H::combine(std::move(h), hash);
+  }
+  return h;
+}
+
+namespace {
+
 struct CacheEntry {
   std::shared_ptr<xla::PyExecutable> executable;
   PyTreeDef out_pytree_def;
diff --git a/tensorflow/compiler/xla/python/jax_jit.h b/tensorflow/compiler/xla/python/jax_jit.h
index 2b1603aac27..8cb81fdfbce 100644
--- a/tensorflow/compiler/xla/python/jax_jit.h
+++ b/tensorflow/compiler/xla/python/jax_jit.h
@@ -16,10 +16,96 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_PYTHON_JAX_JIT_H_
 #define TENSORFLOW_COMPILER_XLA_PYTHON_JAX_JIT_H_
 
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
 #include "pybind11/pybind11.h"
+#include "tensorflow/compiler/xla/pjrt/pjrt_client.h"
+#include "tensorflow/compiler/xla/python/pytree.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
 
 namespace xla {
 
+// Describes the abstract shape and dtype of an argument.
+struct ArgSignature {
+  // This is the XLA dtype of the object.
+  xla::PrimitiveType dtype;
+  // JAX arguments can be of weak type, if and only if they are Python scalars
+  // or `DeviceArray` values such that `aval.weak_type` is true.
+  bool weak_type;
+  absl::InlinedVector<int64, 4> shape;
+  bool operator==(const ArgSignature& other) const {
+    return std::tie(dtype, weak_type, shape) ==
+           std::tie(other.dtype, other.weak_type, other.shape);
+  }
+  bool operator!=(const ArgSignature& other) const { return !(*this == other); }
+  std::string DebugString() const;
+};
+
+template <typename H>
+H AbslHashValue(H h, const ArgSignature& s) {
+  h = H::combine(std::move(h), s.dtype);
+  h = H::combine_contiguous(std::move(h), s.shape.data(), s.shape.size());
+  return h;
+}
+
+// The signature of Python jitted function call, partitioned into:
+// - dynamic positional arguments (i.e. positional args which are not static)
+// - static positional arguments (i.e. the args associated to static_argnums)
+// - keyword arguments
+// The CallSignature should unambiguously identify a function call, thus,
+// equality is based on:
+// (a) Same PyTree for all dynamic positional arguments and keyword arguments
+// (a) equality of the arguments and keyword arguments ArgSignature
+// (a) equality (delegated to Python) of the static arguments.
+struct CallSignature {
+  struct KwargEntry {
+    // To avoid comparing strings, we intern the kwargs strings.
+    // The compilation cache holds a reference to all the keys.
+    pybind11::handle key;
+    PyTreeDef value_treedef;
+    bool operator==(const KwargEntry& other) const {
+      return key.ptr() == other.key.ptr() &&
+             value_treedef == other.value_treedef;
+    }
+    bool operator!=(const KwargEntry& other) const { return !(*this == other); }
+  };
+
+  // Only contains the arguments associated to `static_argnums`, sorted in the
+  // order of their argnum index.
+  std::vector<pybind11::object> static_args;
+  // A PyTreeDef for each positional dynamic (i.e. not static) argument.
+  std::vector<PyTreeDef> dynamic_positional_args_treedef;
+  // Keyword arguments. Sorted by the keyword name.
+  std::vector<KwargEntry> keyword_args;
+  // Shape and dtype for both the dynamic positional arguments and the keyword
+  // arguments (sorted by keyword name).
+  std::vector<ArgSignature> dynamic_args_signatures;
+  PjRtDevice* device;
+
+  bool operator==(const CallSignature& other) const;
+  bool operator!=(const CallSignature& other) const {
+    return !(*this == other);
+  }
+
+  // To be used when we want to keep ownership of Python values referenced by
+  // the `CallSignature` (i.e. when we insert an entry).
+  void IncRef() const;
+  // The destructor of the cache should call this on all entries.
+  void DecRef() const;
+
+  std::string DebugString() const;
+};
+
+template <typename H>
+H AbslHashValue(H h, const CallSignature::KwargEntry& kw) {
+  h = H::combine(std::move(h), kw.key.ptr(), kw.value_treedef);
+  return h;
+}
+
+template <typename H>
+H AbslHashValue(H h, const CallSignature& s);
+
+// The function to call in `xla.cc` to add the bindings for this module.
 void BuildJaxjitSubmodule(pybind11::module& m);
 
 }  // namespace xla

From 8600972d6a9eb5abe0741db9de5697583f683176 Mon Sep 17 00:00:00 2001
From: David Rim <davidrim@google.com>
Date: Mon, 14 Dec 2020 02:06:56 -0800
Subject: [PATCH 34/60] Avoid error with dequantize and extra quantize ops when
 combining post-training quantize with QAT and supported_ops =
 [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]

PiperOrigin-RevId: 347342275
Change-Id: If40e8f2ea52647d06a56483cc4726d2875d8dde4
---
 tensorflow/lite/tools/optimize/BUILD          |   1 +
 .../lite/tools/optimize/quantize_model.cc     |  47 ++++++++++++++
 .../tools/optimize/quantize_model_test.cc     |  58 ++++++++++++++++++
 tensorflow/lite/tools/optimize/test_util.cc   |   1 +
 tensorflow/lite/tools/optimize/test_util.h    |   3 +
 .../lite/tools/optimize/testdata/fc_qat.bin   | Bin 0 -> 9656 bytes
 6 files changed, 110 insertions(+)
 create mode 100644 tensorflow/lite/tools/optimize/testdata/fc_qat.bin

diff --git a/tensorflow/lite/tools/optimize/BUILD b/tensorflow/lite/tools/optimize/BUILD
index 88015d7634d..4eae02c3bd7 100644
--- a/tensorflow/lite/tools/optimize/BUILD
+++ b/tensorflow/lite/tools/optimize/BUILD
@@ -321,6 +321,7 @@ tf_cc_test(
         "//tensorflow/lite/tools/optimize:testdata/argmax.bin",
         "//tensorflow/lite/tools/optimize:testdata/concat.bin",
         "//tensorflow/lite/tools/optimize:testdata/fc.bin",
+        "//tensorflow/lite/tools/optimize:testdata/fc_qat.bin",
         "//tensorflow/lite/tools/optimize:testdata/lstm_calibrated.bin",
         "//tensorflow/lite/tools/optimize:testdata/lstm_calibrated2.bin",
         "//tensorflow/lite/tools/optimize:testdata/lstm_quantized.bin",
diff --git a/tensorflow/lite/tools/optimize/quantize_model.cc b/tensorflow/lite/tools/optimize/quantize_model.cc
index 713bafdc441..43e3d3ba001 100644
--- a/tensorflow/lite/tools/optimize/quantize_model.cc
+++ b/tensorflow/lite/tools/optimize/quantize_model.cc
@@ -218,6 +218,41 @@ bool TensorTypeChangeRequired(const TensorT* tensor, const TensorType& type) {
   return (int8check || int16check);
 }
 
+// Check if input is consumed by quantize, which means we don't need to
+// requantize if the output scale is the same as the input tensor's.
+bool InputQuantizeRequired(const ModelT* model, const SubGraphT* subgraph,
+                           int32_t input_idx) {
+  std::vector<OperatorT*> quantize_ops;
+  for (size_t op_idx = 0; op_idx < subgraph->operators.size(); op_idx++) {
+    OperatorT* op = subgraph->operators[op_idx].get();
+    if (std::find(op->inputs.begin(), op->inputs.end(), input_idx) !=
+        op->inputs.end()) {
+      const BuiltinOperator op_code =
+          GetBuiltinCode(model->operator_codes[op->opcode_index].get());
+      if (op_code != BuiltinOperator_QUANTIZE) {
+        return true;
+      }
+      quantize_ops.push_back(op);
+    }
+  }
+  if (quantize_ops.size() == 1) {
+    const auto* tensor = subgraph->tensors[input_idx].get();
+    const auto* op = quantize_ops[0];
+    const int32_t output_idx = op->outputs[0];
+    const auto output_type = subgraph->tensors[output_idx]->type;
+    const float output_scale =
+        subgraph->tensors[output_idx]->quantization->scale[0];
+    const int64_t output_zero_point =
+        subgraph->tensors[output_idx]->quantization->zero_point[0];
+    if (output_type == tensor->type &&
+        output_scale == tensor->quantization->scale[0] &&
+        output_zero_point == tensor->quantization->zero_point[0]) {
+      return false;
+    }
+  }
+  return true;
+}
+
 // Sets the input type, adding a Leading Op node at the start of the model if
 // necessary.
 // Returns the new input tensor index.
@@ -258,6 +293,13 @@ int32_t SetInputType(ModelT* model, SubGraphT* subgraph,
           leading_op_name, tensor->shape, tensor->shape_signature, input_type,
           scale, zero_point + 128, &leading_op_input);
     }
+
+    // Check if quantize op already exists.
+    if (!InputQuantizeRequired(model, subgraph, tensor_idx)) {
+      subgraph->tensors[tensor_idx] = std::move(leading_op_input);
+      return tensor_idx;
+    }
+
     const int32_t leading_op_input_idx = subgraph->tensors.size();
     subgraph->tensors.push_back(std::move(leading_op_input));
 
@@ -963,6 +1005,11 @@ TfLiteStatus QuantizeWeightsInputOutput(
             EnumNameBuiltinOperator(op_code));
         quantization_not_supported = true;
       } else if (!property.quantizable && !allow_float) {
+        if (op_code == BuiltinOperator_DEQUANTIZE &&
+            std::find(subgraph->outputs.begin(), subgraph->outputs.end(),
+                      op->outputs[0]) != subgraph->outputs.end()) {
+          continue;
+        }
         TF_LITE_REPORT_ERROR(error_reporter,
                              "Quantization not yet supported for op: '%s'.\n",
                              EnumNameBuiltinOperator(op_code));
diff --git a/tensorflow/lite/tools/optimize/quantize_model_test.cc b/tensorflow/lite/tools/optimize/quantize_model_test.cc
index 9afd163efd2..92df9718e6d 100644
--- a/tensorflow/lite/tools/optimize/quantize_model_test.cc
+++ b/tensorflow/lite/tools/optimize/quantize_model_test.cc
@@ -1639,6 +1639,64 @@ TEST_F(QuantizeTransposeTest, VerifyTranspose) {
             transpose_output->quantization->zero_point[0]);
 }
 
+class QuantizeQatTest : public QuantizeModelTest {
+ protected:
+  QuantizeQatTest() {
+    input_model_ = ReadModel(internal::kQatModelWithFc);
+    readonly_model_ = input_model_->GetModel();
+    readonly_model_->UnPackTo(&model_);
+  }
+};
+
+TEST_F(QuantizeQatTest, VerifySingleQuantize) {
+  auto status = QuantizeModelAllOperators(
+      &builder_, &model_, TensorType_FLOAT32, TensorType_FLOAT32, false,
+      TensorType_INT8, &error_reporter_);
+  ASSERT_EQ(kTfLiteOk, status);
+
+  const auto& subgraph = model_.subgraphs[0];
+  auto op = subgraph->operators[0].get();
+  ASSERT_EQ(GetBuiltinCode(model_.operator_codes[op->opcode_index].get()),
+            BuiltinOperator_QUANTIZE);
+  op = subgraph->operators[1].get();
+  ASSERT_EQ(GetBuiltinCode(model_.operator_codes[op->opcode_index].get()),
+            BuiltinOperator_RESHAPE);
+  op = subgraph->operators[2].get();
+  ASSERT_EQ(GetBuiltinCode(model_.operator_codes[op->opcode_index].get()),
+            BuiltinOperator_FULLY_CONNECTED);
+
+  ASSERT_EQ(op->inputs.size(), 3);
+  ASSERT_EQ(op->outputs.size(), 1);
+
+  auto qat_graph = readonly_model_->subgraphs()->Get(0);
+  // Verify FC input and weight is quantized.
+  ASSERT_EQ(qat_graph->tensors()->Get(op->inputs[0])->type(), TensorType_INT8);
+  EXPECT_EQ(subgraph->tensors[op->inputs[0]].get()->type, TensorType_INT8);
+  ASSERT_EQ(qat_graph->tensors()->Get(op->inputs[1])->type(), TensorType_INT8);
+  EXPECT_EQ(subgraph->tensors[op->inputs[1]].get()->type, TensorType_INT8);
+
+  // Verify FC bias should be int32 quantized.
+  ASSERT_EQ(qat_graph->tensors()->Get(op->inputs[2])->type(), TensorType_INT32);
+  EXPECT_EQ(subgraph->tensors[op->inputs[2]].get()->type, TensorType_INT32);
+
+  // The output of FC should be quantized.
+  ASSERT_EQ(qat_graph->tensors()->Get(op->outputs[0])->type(), TensorType_INT8);
+  EXPECT_EQ(subgraph->tensors[op->outputs[0]].get()->type, TensorType_INT8);
+
+  // check op and versioning.
+  EXPECT_EQ(model_.operator_codes.size(), 4);
+  EXPECT_EQ(GetBuiltinCode(model_.operator_codes[0].get()),
+            BuiltinOperator_QUANTIZE);
+  EXPECT_EQ(GetBuiltinCode(model_.operator_codes[1].get()),
+            BuiltinOperator_RESHAPE);
+  EXPECT_EQ(GetBuiltinCode(model_.operator_codes[2].get()),
+            BuiltinOperator_FULLY_CONNECTED);
+  EXPECT_EQ(GetBuiltinCode(model_.operator_codes[3].get()),
+            BuiltinOperator_DEQUANTIZE);
+  EXPECT_EQ(model_.operator_codes[1]->version, 1);
+  EXPECT_EQ(model_.operator_codes[2]->version, 4);
+}
+
 }  // namespace
 }  // namespace optimize
 }  // namespace tflite
diff --git a/tensorflow/lite/tools/optimize/test_util.cc b/tensorflow/lite/tools/optimize/test_util.cc
index 5565fc4d657..433a10b57f3 100644
--- a/tensorflow/lite/tools/optimize/test_util.cc
+++ b/tensorflow/lite/tools/optimize/test_util.cc
@@ -73,6 +73,7 @@ const char* kSvdfCalibrated = "svdf_calibrated.bin";
 const char* kSvdfQuantized = "svdf_quantized.bin";
 
 const char* kModelWithUnpack = "unpack.bin";
+const char* kQatModelWithFc = "fc_qat.bin";
 
 int FailOnErrorReporter::Report(const char* format, va_list args) {
   char buf[1024];
diff --git a/tensorflow/lite/tools/optimize/test_util.h b/tensorflow/lite/tools/optimize/test_util.h
index 4341a67d1ae..62297297d88 100644
--- a/tensorflow/lite/tools/optimize/test_util.h
+++ b/tensorflow/lite/tools/optimize/test_util.h
@@ -116,6 +116,9 @@ extern const char* kSvdfQuantized;
 // Test model with an unpack op.
 extern const char* kModelWithUnpack;
 
+// Test QAT model with fc op.
+extern const char* kQatModelWithFc;
+
 // An error reporter that fails on testing.
 class FailOnErrorReporter : public ErrorReporter {
  public:
diff --git a/tensorflow/lite/tools/optimize/testdata/fc_qat.bin b/tensorflow/lite/tools/optimize/testdata/fc_qat.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f121f7eb0dd59efe0838a1eb772af40d6d1bfd32
GIT binary patch
literal 9656
zcmb1PU|<Mw^D$;%5Mq#FkYEsD5Mbb8;9y{2U||qqU|^77U|{fIU|?7v&%m%jo`FF{
zm4Shofq{XMfq{XCfq{VoteOR^gA=TafkBvofgv|DFTSWWuOu@!HNGshs5mn}kAVwh
zmplW5k30i|3CJvY28K6s3=B`?7#K_#7#MUI7#LKbW_)_^|33=@1A_nq1B0QSp^2UW
z102ZQ`Tt+x&j0^n4<Tw6$T2Xmu=6l6e*4DxmzUw&?@!E(U;ch&`u_U|(_hA4Je=Qt
zF@5{`kMsBQhhLZ(ZZf`SeE9v-=PzFvzWw9<&&k2e_vY>I4_6MqczJ8*w#$cho;!2<
z#ItAY-#&A4F#i1Y^69nL&))ug`TWWKFE_5;xpC#ek2~MqF|vJOWxMq9-s7+TUVr%U
z{TJiEzaQUyd->ztgU6rW{bqgr>(ZI0_y4^9{r>~A5R(|2*st&3f8RQE_{qN?%<pbK
z`uXY2=SOc@*+n>cd1Qt8S(v%L?K*Jl>F=kvZ(V!x_5DLe_P>95m?T6cRQUf0@~}Q#
za^!_5<Ik5D-#`2OjQJM_H!qifq^<}%8yAQ0tBt$g{`~jj)w7qs-@N|D^_NwEnUPmo
zLPF>lx7goRJ0E=f{OQBnkDva2Vt@C8f&0(<AOB@U_?Tqm1y3FN^zZ-6S9gAVVP$*s
z>)-E>*Y_QL%g8DwFC`)I;_w@;_pjf-{J_rr;V&!amnR!Hth@Sxorg(QNrmP8&#w$$
zA3ggf#`>R|f%*HH70cG%`TqGovyzd{pBHbw-oJV2i!g_jB##*H+oP)&ZF}<eGn0sr
zl-&E{??1nJbo-q!gOHd6ANSMEixwPz_K$^uM^KTA@xjYCA0J))!zv~ut}4uQc<q9X
zx1Rpx=i+B%<#_dggX`tH7Yq_20^;&o|E?_Ce)09EUw?o8=44=aa{1GXA9t_6W?<(O
zmQnrv@WkmGjDP<8`p+r&?aRAYj2z7Gul@bY$j2|B!Oi~e#pj=YSbuzC{`~g&%QqkX
z3B0-f`Wx#HE@2Tx!GElO{xQ9N^_}JW*T)~8vV8ye>Cz7-K1LQ^VFf-R&i`MZKYRV<
z_n!~H-oE4i^Y7)|TOZhjg_xxU1l0uj|NZ;*`t6&yKR*0qVEo4Z?c#-3-x#>KL={xT
z6gj^$J-B=A;*)1Te?0un_5IbuFZbSj{QR1Uk%@`()rU8?4xc%G{=&UK9KX5${r!0R
z{N+>o4(-^pV(Hv<YiDiTe(m1Hn@>Lc{Kfg3{oS`856<u1aqz^^BioN2-M{blz1I)F
z{Qb+x$@Ycm%U{ON@1NYhbNkiR7dNjx`~LCWpEvKB{{Q>>jpg5Wj&HwybADlF;`sE9
zo%I_hFBju)9=2cK|NLj*=455yegB>D$NSG8`58Yvd;jg_dj^(2?A-qufBydSpZ&|%
z*Uv7$eD_m;OZ4r}Pd~2QyZQDL0~6C<#&2w2UtGO-?f%~D?_PZQ{_5ea%a4v<eg5bV
z^WVRmpFh9*`03e$pTD?xxrG0GWce-5#`)#e+2>5(zw$6KfB*C9?xim;nAsRuU;Tc^
z&aTKO`u5y?Hm)BZelR?HdH2DQ7w?}v{qXS4_20k$|K(zOyzR*^wr@XQKYelT$*H@y
zpa1y&`QFoKkNz<KfARR>)>p6CU;lgf`r(~R*RI}s{_Hy=H^cuwpPoLyd~W~72cLg`
zdH>?u)u*4XKEM3!r+}uK2>ZXMPw(wsv3}R}JKuO-e0})f_Vf48U%mRwZDg<U>&=Hp
z_xJ8za{Ae4wyzwIK0bK-;ML35cOMG{cu9SI`|QPw<CnHSe=hXr{rA5w@4tBd;`QBA
z*Ofyp#9zIBdjHFvcPF0QWoG>L>EEB*k3PM=ae31%!(hArUmst5a_i=u{a0_jVP^jG
z|L@&vZ$94IcKo%0xe?p7Q|B+-e0cNLg(t7R{^aHQfA!w$Cr=Mu{Ho(6{^{U}O=s?2
z+j-!^y%)^POgHbIxpd>kyN`D{G%eX4Zai}K%8Q!^ZXJ02@+<R)_YV%AzVYD8v#(s1
zX3Xa=oW1el-jgS99zXu{M&S0POM90c`SA7)gMfy@j~n;jeP;Q4_xY!XAKCbSzdCk$
z&-Ev-S^qHeN(eB1`_206E8~wJAGw&Bd4FBHa_P?RPd|9re=+>{^WndMz^|XbKEMC=
z<I^AhA1}VKNbquV{b2g~?Cm>&-@+2Y?3_;@e`aC)%*=ZCJ)@2YKNIW6>o?ARW)+g)
z{m=gK+Q(OKnScNO_3`OXS#9CJ%&+f0eEa#oxDqGlyQfbsJ@_NZ@c-}6ClC1<f4shV
z`Nq{V*B@T_&iv=w%O~%?|Kw(7{>Ap<{`I4qFCCvh`QYyDJLl~>boJVYFCV_JG5r1e
z;|uHOXHOoU+<o%uv9s51oWJ_u@$)zTU;SqN@%sxa@4t^MtRKF8dG_Vo^Plg&Jp26R
z!}q_R*#9v9{Koz3|6fjyFF(I>eP!ojWa9nB^^@f*H~X*O-@dW^{L9A9^yTBHkI!yj
zd-jG~lJ(x3+YjHf{N-liX8-c#=eM6<UcdeHpPTdBUnUl|pHH5jzx0!Xo$UwHZ;sE;
zzx@{B7S)v15D{VI{`d0Ht7q3Pef!VE`2WX`pTE9x@$gD3NU4eN$?)*9zJBrI;?{GY
zUcYDj@%{}H1D~j{w7d$BJg<V1q}Zn)uio4`f9=P&&kSFl|KSmol;LJ&5t0z%6aT~X
z?c?wJC)e(M#rWmPi{HFF5}e!|Oj7@t1z3Kve*gdP&!<}(ZeM=J@aEGOIYB;VR$+c=
z1{PlSU%y$He>47kF>~$JyI($j{42x5^@o+6?e`C69sy1sCLYEg|DX5ny#C_Pmv?{I
zxmX^2dimhP(|gZfvGe@<`TgUUdwVZE|M~OZ&F3#(U)g<h&*ojrR&0Iz^vPF#j;}x7
ze`fvqnu+P{gE!ZgFWt6(|NfN+&z`*h^6Nk5Z|@&{5)$Cz`10rZ+10BzAGv(t?!8BE
z9=yK!n(^)D2e1EV8St`yefHw&mWy}qJpA>Oo9)ZfH$Q%UWqA7d1GA2Wz}H`IxqdPJ
z{`rOJH#>tcFZ;hw??1i${OE(SuC>7b->(Fu*hN{s{NWLimE-4R;r{XC2h(336-Qf1
z)_cD=gyn^JIhm!zRmE6&nEwj={UIbOY#gl0`|iyLR!tcN5q4p2IY|{xCO!df23B5n
zO+yc5A@*-i7<q+6g~S9U<rJiN1en-aco}%bWHg=Bxc|KT@t#9aTu@LzfRE?bUj|N2
zVSY{-esxuIiT|Iz{rdZ#iA$7ISWNui_x~UNaEVCqipi_1>q~t8&Bh`u$-%`VE5pvo
z^zqlf&&=X-ViNLlk`iy2d4B!+{^7;<{|xLu{{8;_^2N{ZoU%f)JZxM-Z+RF$|G4?+
z%$Ym)FW-6n_SeU+-#@c|ea-rlRb1jf+YjbX_pk3fa%j<}jqA=`yK(*bt*a*=-FbTD
z6C=lG_Wz6@-ru@+<>sjkXXY$fxpT|jQ)iyud-$F6$KUV2KQl0V{{HXn^Y@>=-Fp4*
z@!gkqK7V-f?j;u&4>va_7XvfrAGZJhc^O}R`Tpbcmw*5Mec}4T$;8FU&dBze;pdxg
z@4vkI`TpC-PwzgweE0n6TZYfSdA@S|VB~!J_4n61=bpT{`{u#BN6)|9eR1p7m%rb?
z@bdiq`})~m_MiWm1!Sbel*Gj(**VxAUp#XAJtOZgZodCQV&byON<zxQO576c|9SpA
zesTW5;R~PG*#0pq%c+VBNs4lFiT!35;Q#XT$HUvV&Yj$M^B=q5H+~^e2~IH~X>J}d
zF#&cyhF||*z1e$q@tJQNe9z@1WTkk8IC#0)#KnZT#aNix-@m!Ce$JJT%%7h#>IzCR
zfBnhw>mM_lBpU-Sm*D?zS3CPIzhn9Nl1-D3^XKo+PtHC2B&4Ju!OJ5p`TuhB(wpBI
z-@THS|MBtdxx+iQ-+C>nEF>t!FTnTl>B>uYU;ljZSN`9#I~Pt~Ir`+GfS4>FFApcf
zqvscnUw!eB<<&Ko*H4ZfJ9hu`n?Kw_5?uUzU!Gjwedzk9k8i%cy8Y$h!`r8xy}I{<
zTY`s;m4WHUqf@7@p1%0t3)6oF{@-^m-g^A#$y*^#W-bPnKcC;edUEs9)n|<FIXQSg
zzkGD+=HsVdnb;W_o-+OAmSFz>@7b>FzyFBINeO*<d-vpxTOa@aWO()TCx?KT6t@`1
zo@-ASL=^Qj82&xGcV^q+3-?byX5kf+=jM?Ye7gV9gBL=w2I7i>?_ZzWf9B+&`#+fl
zWQBQ@#aQm{{`CC^8;h-yCKvCoXYVebzw??|Oi@snhg;z5wKbPt{rtvds3|MJCB*#h
z-Lu#4S-FK}Wx2TDe|@m=!zWgLF@0e<89}~(Uq8J5!uW@smqUpA?T?$MF8=5I%%rBo
z$Irpf@R#%7k8dn&T#Q10*}r_gxA)ms7A|gi6JrHAX?8vy9#+P`EMM6q7+*fWdG+c$
z&d)#pvq~zfD=Nqd$f!!n^YU{3|MuY1o4YTby#2@fm7U|m9|k3DMGIXuEe&A?hHpQ<
ze}DV!?RUn1+#Jk|AKv}{^x@^xZyzqde17x8qc{KFKl}NC<@>j<-~TfGd(ZIe`<v^J
zZr{3p<J#H%S8n`#`RCP7b{2Ndum3qXc=))!eC1|(^_JoNmw&9xfB&-m`_K62&)@It
zpWidRe)sReg~x|Z-M)6|(eqoE-@g3wne!{}FYYgIU%!5M|Kag<(^qfWcKp!kwf8UG
zdinUn50+mHKYspv|K`Jkvxk>&nKR?y(cO1We0t6L<HLt9e;L1iWx31t?d|*fx36D6
zedpx8+n=8Qm*f#t<6`>$o0sqD&%dnf-#@bac=+hi`}aJ|LYyoD0?ML4p1fq{djE!j
zkK;GTyVozCoIY{k^8M#;J~Qy^NdNi!>BooL?^ymbGrs@v;^EEf`wm^a_WJqTAAI7>
zOdlWqd;k9J@2~Iwef;(2@y!SO_8fWr^YcH(|NlO`c=wv&&5hG<KfZbU`OEJw?;aoC
z|MJVn-=E%oe|zcJm8WcPUf+EA_QNYq;lJD*Pq*LUWME+a_~+s2ONS4AWqEh!`lF{G
z8F}RSRTa*y`XwX#{oB`XPmk=ra_!^qU*})m{m#ZGDq(7+_U`OIBQeJR%)gjlJ-Yw$
z+qb`0o_=K(RaR9qH<A4Nm0M9xl3PZM<JG%wkA8mq_UN~WsD_G+p0u&bk6+v>x@syS
zT>t;veg5RnTmHYlC4>c(<kdyFnHZQgEe*tESeO~!+<Wlt2iwaR?_T`n)m9f{<7MSg
zG|^Mw=Vj&k{Oaz_hp*ZHuwHxloQ+M8;nxpF86^#EVS%4aOiv#_J@E1S2L{Hw51u^!
z#`Nyhx1Tav(t<xfy?g!n(VYk9zx`zX^YGQvdpB;LK6~KelaE|XTyI}|xbuAf@e^0R
zFmV6-^yuNqll%9dIDYHV{qH|MetZAv`NR7UpTB<a?FZBATMy3c+IZ~T(;Khv{QB|j
z^Y4#ee|`V-=+&FwOrO8sxqa^9^%wV^eSCNC>-RtZ*f<yjfB*jfjfIEd^T*rQcfEV{
z{_U@4uikw9`JF|QM@anB#}7aLfB*aK+si|j-+lT0{^ifNPyhV<`jeaO7yFl=??1d@
zXZiF0>7{d5PTjow;OfD1SI-|mapv@`+s|J<dhwC_3lHnp=Xa0o-ZpR3tfh@D^XK$0
zTfgnnoge@IGjlL9uy8Ve`tbMdy*n?iUB7wj@xAkpUVMM|gX8;GW?m-7Z;U^=xLN<O
zGJJUZ?aP0TZ-04z{Q36#|2M9`96xy({(b+-{O9fSOZT7LyM62K-G}%8vM_z+{`UJD
z+ncZd-#vf)^y=%&o7bJV^^WQ5`#TTrz5dV4%khVe_xGDSm+n0H{`%ANcb|U#<>%)X
zU|{&gz{&LM+sBtT9^Sn9?9r1?U!K0_{wc=F_nVbRM37712N&nJ7dtQAd-m+hU)J|;
zKK}nM#Kyob#3QC;EdP)B<KrX8?mhg&#{8F)pN-|~Uk(mQ4n|3FO9M$khF^~lF248Z
z9|s?sFw4K+zrO$F;barym9_Vm5`O#T_O5LYp1)=kmk|EV@aENvFYnpeg+y&bLNz46
z{(f}m=-n?&?1CJMLaZO}UcCG66C0<ZQ&EKp@8>U9PCkG5lS@uaK}|-8<<;XmS8hFJ
z`KugJ<j%zO<KoHJe<Z}kctoW&l{vn>x_0I8zP-D@syV9v`F8*D?N6Li|2TR1jf^G#
zy*hK@_LWmhrcBxLU*Yw;udhCHbF*<W@k+?)^D#WQa{J-?&$o`P+;xlTHuHxI&l#Ej
zzu@GOmXiMc<=&+ik3KRy{qW$*$>Z;tzhC%v_u8GeuXs3^_}@Rg`RLR4?;jcdeRz5M
z<g0%#?)~`m>C^d(4?etq{Orz?d+)xm|Nr-si|xVLFT5`wbN%4uetiDYp(~f}J$%Q-
zB>4Xy`(JLhGpFDE`gHFDn~1E;uRC|{UVHib69dzKb{+=)fBX;LJ>z`#;kOu{j3^fm
zGdt5CuHTF-g3SE9+!8_`Zv6W4>-W3gT+*D1BCMQ@9AEx2aSF3DG6}Q%{P~sr=j(TW
zdB6PQ=V6y%`On4vpON(!JEx4e3_J6iPoMwZ`N8q&_gBV$JpXuk`9wMYF>!N=33IY@
z{^Mfi{{EMP<LfsjUOrJtHX#84PL|(HjK7)x@qFj{!upZ%-H)#f-`M#i1qEd}n79~z
zF?{^XEcop!GXu*9hCeS~yk_QL7ZwoZV&@V0_wzNEAfMQ;->ly_c>cY;as9&O=P!T#
ze*g5vhgWYN{dx82`&-VhJYQM=e*XLG!`oX=zdU;X=*-bu7oL3l`QpoW7S^x4|GzW-
z{`{GP<JZ@}-`IXIGJpR3orm??|NlHcn7)5z<NEf8M?gSKMMTv|`ahe1q@<vbm=Gre
z_pdKp--UjN@Jc9%t7_P&%gabeNXhBw=t!w>fBC|}#rE}|z&|cQX<jitMo}RF0SRR#
z1sOGcu^&Hp{xZM)%go5c_vbg;pDzrb{&0y2DC(-H$w>bG%FOdq=+W;ljGvjA|2=v5
z=GCu1!kkhvf&vPH?0@<H|Nr~&@#puSKYqCP{OyMyU*G-z`uFqizwF<>edA(d`2FDe
zvnPKT-hO=f?9sOu_paZ5e)Gwb%g-L(`@s0?`>E#-o;?5l{n?LKci-H)baU&8^EYo?
z*uC@CqtAa|Uw(b}=dX{Ce?5Bt>G|EWTh<>u_xRM!y~_??|IYaG`o-7JezCv!@$v2N
zr)PGZICJ&x(^vQJuV43&<^S7f_x><)@^b$F!NK_K=+)bIuY6`@Q}}&r|5v83cdy=M
z<PrSM#V5-3@7k@8kDt8zDlB2B{_6CbfA5~%e8nimCcwzUCctouf#c2Rf1h}@EaV?w
z`OWy{;r(+=QY^f@qRJwln0~Rm{P9MBOG;7b$K977f4sP{?H`8_ue78J*H>Zw-;6JR
z|KevC;Qn;(>-Qhu@0|I>#w;qKY0bqZ%<=K-Po95_Y;3>2pMCM;!?WY--oEGMQ83cs
z<NWja<!?@Q_J0fvpP!$9{O$YaOQ%k~WD*jR(bf|D&B4tj$H~sh_WtdM7f;y!eK~pb
z+QauBg_I1`BzZ*Tc?B4mzO#J&eCgxcAIv|`?A?3w{--~(QiifB5~BQEV!wVrfB5pn
zgKsQsUml;?bL7^?&w{dY8j30s;(Y9kFTOr`^W-fn>;Fg3&MsJg?#A0^KmV}^{rUCv
z&D&QWZ(n`*?$3|kpIP3V+O}}><^5-`UVr}N+0zHlUwwRV^XQ$gFaLh!`tbeb<-I$W
zubQ=P)!yBwE?&93X#a^DXD)yI%JP-z%bN#pPwqXscH8+2bLTEuJZ0|UI~QMl{rUAD
z7yH*=Uq5~O^5piVyB9AX*t>V*u_L!W+<o))+y8$Y|Ns8v<oe3^`{(apUl@NreE8+r
z$1k5=zWK?*`uP_-)6aiwUw$z&a{c@N_b=P;Z!A1)?97a;KYy@(|NZ~xw_pFBd}IFk
z==uE@4}QOS`|Iudw;w)#Vfw+#%k=vn6XVD4kFVXieC5`IvwI)Cy7KP*uQy*lvNALM
z`S#__ix&^CZ#j8#{r3H5E}S@g`qt&gPoDf?|Noiy7t{Nvw@z$3vU%&Kb6dAAoinAc
z_tfQ6cR&1NV*kQ&_a5Wd&+nhVyY}ef!7~TWp5D4*%bq1Guf2N7z|O_|i-BF7Ur2z9
z;rXjikH0*9_VC@igZmHPc+2rqP@bKaTUtd*Nrv|~%Rf#bf&W}TUcbD4?9{7|d=hdT
zO5!2{!ZM0V>^zEsqA~(pzy3YDa_qp1{|e$tN-_eXoPU^vc|{czH1y;ZIN86yyt3)^
z8!jy!MJ;X?J`T?R48N5W9G%tmc%%ea-<;fY;v18gk+KrkPuBmxIhjB3GRs&gDhP`T
z{CjcW=>7NXf{M11vJ4!|3~wKu+I5FRQ=E_C4=dBR!|&g7e-V*a5EA;$`0M%g{e270
zeiUWr5D@vw_36*oKd*i=nuyCX|NQdm`H{`@mYw?ZPexoxM1)=V&)2K(SU&Txaed<Y
zcK^za4g1gC{q<E|TggCGmgn1>d$0a~{mj6{$oAvi{VPYWKK#hdDX*X^CC>2c)zzEt
zeti7Q$oA#^omW?mKYaFwf#d&w(Z7s8f4%<r;mha$EUe%DJU;&5%86U|UUKsO{`u|O
zucr_0AG!RV^*bvA)2ok<5AMBo^8Ql>)_)%!et7ik;njmX&fj_b{wwS4%O{TQI(hr@
z;}2iCINyH0e(LIxV;4?bdGPAb=TA>h+`aJp$+L&Ap8tKv^6$Zm>u0X-J#zZ$!%uAN
zj89J8d-d!4pRcdKfBpH6_2tXEmyVs@yY23yyBuG5zCY&X5td^A$Mo+17cTCv@7_MX
zaOCQa>$kuE`^oZ~Us%dWo>y2v;Ey;9*T0{CAHR5dZRgvk@0q`{eNj+UQPGf+5>r!D
z7FUs0Wa0Vr^Ub|iZ<rasef|9Y{ofCK>|*?U?7~{ghH|QM|9%NFec)sJ#l!vcH;XvC
z2p5+)r?{n@mXe{i)IUDKf4_b+{9<7L%Kh`_=dU~*-`N>|vVCX&_lukH$1lcz-&lYC
zWBvO1`=|GRzI}f3>fEc(ukOG9`sUH^7eBbZFfeib{mb_A->W-MkDflca^}*d6E~dN
zc=*bxD=$9tFmwI?_~pUphnF88ymEE_wG&s59zA#E+^g?DKfQSShllyg(@Q5VKK=3b
z-FsFRR$dOKU$1_=czgTXH}>D(I9@#8efh!1?{B}e@o)=>$#Ai=vi<z@@YQdwkBp!1
z-nsPh`m@iU82)hbadPnp|6%$4oAC$ZpI@K;ygYm9`P0Aue*XBz$nf(A6ZdBZUIrdU
zK4yk5+<!hDdGwZpU65OXnS<;9-{-%-vvG2AO0Wz6WD)rN;pCfte>mABg~Wtd{=dEX
z^y3c^W_}L--w*!&d-L`23r<csJ{1KKE^fA$x1Rs`%FWHgFZlN6k6*t(Ubx32#-pet
zBE%;o$@S^wt8d(*obpm)jN)7j&rf|~6cyzA$@k;0ptcYLE5rZaa^i{#S{y%F-@bbL
z_U(sHU%q~Q^jAWekC*BDR}P;4%naXuy!!j?(}Q!j-#ol^>A^i=DP1AS|BT<?-hFlM
z-ouyYUj2FX<^07<r|-Xf_L*B$TJry=S5F^2xO3*wou{{+{QCIf>gIE|KfP!8$0DW3
z#LW2W!QE%qo;<mI<<!%!k3Sqee)ldjznG+w2Ft%!zu&%m{rKtEx1ToMeEjPB{dH&F
z{AA{q<r3v){P^+r*Kco`zWx37e(Qy=Oh4}*diwY4e->dbrr!@<eE9zM7w?x(zj$7q
zyz!Ii^X<J)KK&N@{e|J(w@;rwzGh(h!@()a{P@7zzrR17zjXhjAlHw7zdrqBV*2yr
z-zPSP?|<3OUwFX$>*L8c_g{Qr{`-TGnO{bhnc*87>)(HWUO&2X``>q-cXw`{z4n2H
z{fDTWw49_gHy0Z_=a-N7-@at{^Y!zMJqM3Gd-wS#rwF?w4=*3D2>-WlpYA+(`}GIQ
zkC#_Y-Z*vc#_NZ-pM2%|#LD>Q#m$qCZv6Yg%J$>?w^xskpS^eE_|ct5md{+V{=l~V
zm)`zj`tkkC*MFS9UVs1m@7u*Y=ia=$d-B4aCtv=2diI`$_a6fz=Z_!XSpNLr{l&=o
z^BL<Gu3vwDGyM6>{PoA@pS=H=zp;P&^y>eoXAkeazWw3$=T~>1JbnM0neq2;ZdRTT
z&)@$1_u=u~Q>T}&-?Dqlo<kQNzIpha_1m|9fB!vx^ZoV9D~GqQo-=*cglU`h95{L4
z#KYI$KYjno%lzu+<H!5<FI{%!^pX8X)~-La=fv{4`_A2d@a6xPzfYc@*|KuawU<9X
zy}ft!^vP3~&L5xOwdl&zFJIohx_WNQj`Qa}y!|UIt|ThM&HVAz?ThP|F1`8g^SAQ{
zckjP^=Jn$*+`Q5z=K89FzdpUXxMlmci&wvVxN-RSwI?^fz4-f)flo=E?-%Qf&z~P$
z+qwDHotL)`9zFK%`{yrAzxkLr7@2;)Vfpg&)vaSYmv4Xe>G6q+7nvDE1cf<y`Cj~Z
z`Tx%!Ms9|$uMeL*{N~MvJ5Rq!32@1a33LB?_>PfFO;|==jQjPzJ-6Pl+&%w9LP1f0
zQ&FDjC4+#7w6>DGmWlw+#U;n?{{3|KkBp3{s4VNB?{9@<BviB%RaGVV9&cO!`0c+}
zm;Uh!igGgj`Sa!XcWyo{H7R~>j<?5F9=!XT;m_HBVoV$??9A+6-u!+nE+fIn`0d5p
zWAirN{{8F2)6X2gKCu1yBgFLk<$Fc}Ue3=iZ@oUS>gb7Q|6jiU@aoM^j$bVNEKE;+
zv5N7$`}Fh9)kCY!o%{Cf#ltIiZ-4#zl9B1-%Xfc%GkyE=@z=9UM-N}W_3+R43+K*X
zz483!z57?6eft0J)w{R9K3=<d`RTu}e?EV|aPivn+m~;iIeGfb*N2aPeSGun^~(>B
z*}1>HXJveF`_ZH4*PlMTb^F=F&tD(CeDUt-SH@o)-*5e7=J<T?#fyjU{(brP=GWI>
zFCIU8_3j7TKVgwikACuSzj}QB&HI<X{(buN`|*RfUp~G0@}E~wkdN#3-yh%q9s2&8
zM_iDJi~0SxH=jQ92}>v_E2;ha@R#!o<IhKbB%~BXIV5;Ec{uqbR8%#zEtMpHF}?rH
z%)`zn&Z8wREzQR-qo|@RE+wiUA@T1&!{2`#e>s2s`t#-g*B|egUj4lM_0z3;cmMOU
zFudUT!T$NeumAs#{fEp~YcMb{INbaH|F<3kLoH|?{rUg@!l21?CkBSlAOHX7sWUKW
zulWC;{oDWl9KZknXJBAs;9y{3U}FHy%7f<I85sWm|NmcrnSnv$_y7MozyJT|XJBCP
z_3;c+aL&&wOD!r%P0<6-(TgyE03XuKJ_`dM0|Ns{95na;gn@zK1p@=a8>rd2iJ5sI
z9iX}QH4F?46$}jExp@#j@Bjb*pp^k2r-J7H*`ezIK<;8-K-j^}fUpC!L;y6053-Mm
zfq~)1@BjZndO><X>j*&RvNAx{6fiJwL02EJfY%Cu>;dTosRv<@T96qa4B~^-gVcb+
z2DG*S*=-0lkhKahw}SizQul(HfuV(&f#DAm149iH1H%qR1_mBR28IG?IQabe{~xsW
zK>-w3AWwiaGB7Z^FfcHH%m#%&$WD_#kT3!H59FqG5OrV@#4icHWec*;GbJ^zB(tOv
zBmvS7k^|}Af@F^k0|NudoeT^NoF!=X%z@ei3lBYzLD29A*#Xj9oLX24vL!J`zpykh
zuOvPtHLo~T-#4+uw=~BZhpba(VzFaN3dlHQcP;z-|3Ao2pmiCbxB|JU1GGNlFRFh*
z`d}F3r-Y=7*3k?M3^;7V?-rlL;*zk$oYK@lkn{E360;$!@XV47-^@JU#EP)QqGE9P
zgTfA!{=fW1#1kkCKxql&);UPw2U2$j>TX#0HGm@UKO`+SHZ)j+0szC!X*r1{C8>G(
zL8-+Vi3O<`L6}*U8lRI`nOdapn3Dr`crn=JnN_Jc9S?OED6By4;`<K?XVCH%Q2GW*
zConKD^!$T_Gt9lnVLZj;lDQe~Fowpceo$&+N?2l1W@1uKs(*ogQEG91X;E@2BtL+{
z&)`2K>_BU7K<OHkA3<T~1I>@1AOe*UU<}Rspfmw;J1qP_?gH^aVF=2LC22W&;KY|%
zl?svrnF*2yn*kDlmJ`Czbwi-C2$au27?l1%{)dDG3j-)T<gob@7B9~EdBr6V|A6$0
zfLAbyL(345Wh~%w4rDgSOpw1oZUK1@#0I$y*^fNn6-}V>l?BWK<y&mw5ArK0pJwJ2
zl$OLB!q-E={0MS)2m=Fy1t`Bj%aafP|NjT&Bak~87#P@i7z7wVWj3hn0)-oF?Gz~d
bzx@CIUkv1Y1_m*Zo4CPjh?|PQY=$BLsJ({W

literal 0
HcmV?d00001


From dd3499e26c7dceaad5333ea0762903de242150b5 Mon Sep 17 00:00:00 2001
From: Chris Jones <cjfj@google.com>
Date: Mon, 14 Dec 2020 02:11:19 -0800
Subject: [PATCH 35/60] [XLA-GPU] NFC: Simplify NCCL clique cache.

PiperOrigin-RevId: 347342866
Change-Id: If65dab9bc10a4ed6f7dbf2255e7e57485aaceeef
---
 tensorflow/compiler/xla/service/gpu/BUILD     |  1 +
 .../xla/service/gpu/nccl_collective_thunk.cc  | 26 +++----------------
 .../xla/service/gpu/nccl_collective_thunk.h   | 18 ++++++++-----
 .../gpu/nccl_collective_thunk_dummy.cc        |  2 +-
 4 files changed, 17 insertions(+), 30 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD
index 696ac9646a5..0e8b0d3d228 100644
--- a/tensorflow/compiler/xla/service/gpu/BUILD
+++ b/tensorflow/compiler/xla/service/gpu/BUILD
@@ -457,6 +457,7 @@ tf_cuda_library(
         "@com_google_absl//absl/container:flat_hash_set",
         "@com_google_absl//absl/strings",
         "@com_google_absl//absl/strings:str_format",
+        "@com_google_absl//absl/synchronization",
         "//tensorflow/compiler/xla/service:collective_ops_utils",
         "//tensorflow/compiler/xla/service:global_device_id",
         "//tensorflow/compiler/xla/service:hlo",
diff --git a/tensorflow/compiler/xla/service/gpu/nccl_collective_thunk.cc b/tensorflow/compiler/xla/service/gpu/nccl_collective_thunk.cc
index 03d289ed54a..7174eefcfb9 100644
--- a/tensorflow/compiler/xla/service/gpu/nccl_collective_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/nccl_collective_thunk.cc
@@ -24,12 +24,12 @@ limitations under the License.
 
 #include "absl/container/flat_hash_set.h"
 #include "absl/strings/str_format.h"
+#include "absl/synchronization/mutex.h"
 #include "tensorflow/compiler/xla/service/collective_ops_utils.h"
 #include "tensorflow/compiler/xla/service/global_device_id.h"
 #include "tensorflow/compiler/xla/service/gpu/nccl_utils.h"
 #include "tensorflow/compiler/xla/service/hlo_instructions.h"
 #include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/platform/mutex.h"
 #include "tensorflow/stream_executor/gpu/gpu_activation.h"
 
 namespace xla {
@@ -47,20 +47,6 @@ namespace gpu {
 //    GPUs are participating in the op, so we get or create a NcclClique
 //    containing those GPUs.
 //  - We perform the NCCL operation using the clique.
-//
-// Creating NCCL cliques is expensive, so we cache them.  Our policy is, a thunk
-// keeps alive all cliques it's ever used.  When the thunk is destroyed, it
-// releases its handle on the cliques, and cliques whose refcounts go to 0 are
-// destroyed.
-
-// Extra data stored in NcclCollectiveThunk that we didn't want to expose in the
-// header.  In particular, this stores the thunk's cache of all NcclCliques it's
-// ever used.  This causes those cliques to stay alive as long as the thunk
-// lives, which is how we avoid expensive reinitialization of NCCL cliques.
-struct NcclCollectiveConfig::AuxData {
-  tensorflow::mutex mu;
-  absl::flat_hash_set<std::shared_ptr<NcclClique>> cliques TF_GUARDED_BY(mu);
-};
 
 NcclCollectiveConfig::NcclCollectiveConfig() = default;
 NcclCollectiveConfig::NcclCollectiveConfig(NcclCollectiveConfig&&) = default;
@@ -87,7 +73,6 @@ NcclCollectiveConfig GetNcclCollectiveConfig(const HloInstruction* hlo,
     config.collective_op_kind = RendezvousKey::kCrossReplica;
     config.op_id = static_cast<int64>(hlo->GetModule()->unique_id());
   }
-  config.aux_data = std::make_unique<NcclCollectiveConfig::AuxData>();
   return config;
 }
 
@@ -137,12 +122,9 @@ Status NcclCollectiveThunk::ExecuteOnStream(const ExecuteParams& params) {
 
   TF_RETURN_IF_ERROR(RunNcclCollective(params, comm));
 
-  // Keep the clique we used alive for as long as this Thunk lives.  Creating
-  // new NCCL cliques is expensive, and this is how we avoid thrashing them.
-  {
-    tensorflow::mutex_lock lock(config().aux_data->mu);
-    config().aux_data->cliques.insert(std::move(locked_clique.clique));
-  }
+  // Keep the clique we used alive for as long as this thunk lives.
+  absl::MutexLock lock(&mu_);
+  cliques_.insert(std::move(locked_clique.clique));
   return Status::OK();
 }
 
diff --git a/tensorflow/compiler/xla/service/gpu/nccl_collective_thunk.h b/tensorflow/compiler/xla/service/gpu/nccl_collective_thunk.h
index 7f60c70c3bd..3343fc5aee8 100644
--- a/tensorflow/compiler/xla/service/gpu/nccl_collective_thunk.h
+++ b/tensorflow/compiler/xla/service/gpu/nccl_collective_thunk.h
@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_NCCL_COLLECTIVE_THUNK_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_NCCL_COLLECTIVE_THUNK_H_
 
+#include "absl/synchronization/mutex.h"
 #include "tensorflow/compiler/xla/service/collective_ops_utils.h"
 #include "tensorflow/compiler/xla/service/gpu/thunk.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
@@ -28,6 +29,8 @@ using ncclComm_t = ncclComm*;
 namespace xla {
 namespace gpu {
 
+struct NcclClique;
+
 struct NcclCollectiveConfig {
   NcclCollectiveConfig();
   NcclCollectiveConfig(NcclCollectiveConfig&&);
@@ -41,12 +44,6 @@ struct NcclCollectiveConfig {
   std::vector<ReplicaGroup> replica_groups;
   RendezvousKey::CollectiveOpKind collective_op_kind;
   int64 op_id;
-  // Extra data stored in NcclCollectiveConfig whose types we don't want exposed
-  // in the header file.  (This is mainly because the implementation of
-  // NcclCollectiveConfig is different depending on whether CUDA is enabled in
-  // the build, and we don't want to expose *that* mess in the header.)
-  struct AuxData;
-  std::unique_ptr<AuxData> aux_data;
 };
 
 NcclCollectiveConfig GetNcclCollectiveConfig(const HloInstruction* hlo,
@@ -65,12 +62,19 @@ class NcclCollectiveThunk : public Thunk {
   // error.
   static bool NcclIsEnabled();
 
-  Status ExecuteOnStream(const ExecuteParams& params) override;
+  Status ExecuteOnStream(const ExecuteParams& params) override
+      ABSL_LOCKS_EXCLUDED(mu_);
 
  protected:
   virtual Status RunNcclCollective(const ExecuteParams& params,
                                    ncclComm_t comm) = 0;
   virtual const NcclCollectiveConfig& config() const = 0;
+
+ private:
+  // Creating NCCL cliques is expensive, so we cache them.
+  absl::Mutex mu_;
+  absl::flat_hash_set<std::shared_ptr<NcclClique>> cliques_
+      ABSL_GUARDED_BY(mu_);
 };
 
 }  // namespace gpu
diff --git a/tensorflow/compiler/xla/service/gpu/nccl_collective_thunk_dummy.cc b/tensorflow/compiler/xla/service/gpu/nccl_collective_thunk_dummy.cc
index 0c49b2d690a..fc5ea04ca6a 100644
--- a/tensorflow/compiler/xla/service/gpu/nccl_collective_thunk_dummy.cc
+++ b/tensorflow/compiler/xla/service/gpu/nccl_collective_thunk_dummy.cc
@@ -19,7 +19,7 @@ limitations under the License.
 namespace xla {
 namespace gpu {
 
-struct NcclCollectiveConfig::AuxData {};
+struct NcclClique {};
 
 NcclCollectiveConfig::NcclCollectiveConfig() = default;
 NcclCollectiveConfig::NcclCollectiveConfig(NcclCollectiveConfig &&) = default;

From 3a002164777a5584f86165cc12b61cfe47f2ff29 Mon Sep 17 00:00:00 2001
From: Alexander Belyaev <pifon@google.com>
Date: Mon, 14 Dec 2020 05:46:09 -0800
Subject: [PATCH 36/60] [KERNEL_GEN] Switch the pipeline to Linalg-on-Tensors.

PiperOrigin-RevId: 347368063
Change-Id: I14d8f1bd3c072d4d0e9b79f75de515c9ff401393
---
 .../Dialect/mhlo/transforms/rewriters.h       |  5 ++
 .../mhlo/transforms/hlo_legalize_to_lhlo.cc   | 90 +++++++++++--------
 .../mlir/hlo/tests/hlo-legalize-to-lhlo.mlir  | 23 +++--
 .../mlir/tools/kernel_gen/kernel_creator.cc   | 51 +++++------
 .../mlir/tools/kernel_gen/tests/isinf.mlir    |  6 +-
 .../mlir/tools/kernel_gen/tests/tanh.mlir     |  8 +-
 .../tests/tf-legalize-to-lmhlo.mlir           |  2 +-
 .../tools/kernel_gen/transforms/bufferize.cc  | 24 +++--
 .../kernel_gen/transforms/bufferize_pass.cc   | 70 ++++++++++++---
 9 files changed, 182 insertions(+), 97 deletions(-)

diff --git a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h
index a2066df8233..1fb9ba6fc42 100644
--- a/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h
+++ b/tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h
@@ -52,6 +52,11 @@ void PopulateGatherToTorchIndexSelectPatterns(
 void PopulateMhloToStdPatterns(OwningRewritePatternList *patterns,
                                MLIRContext *ctx);
 
+// Collection of rewrite patterns for lowering of dynamic HLOs to LHLO dialect.
+void populateDynamicHLOToLHLOConversionPattern(
+    MLIRContext *context, BufferizeTypeConverter *converter,
+    OwningRewritePatternList *patterns, bool insert_copy = true);
+
 // Collection of rewrite patterns for lowering of HLO to LHLO dialect.
 void populateHLOToLHLOConversionPattern(MLIRContext *context,
                                         BufferizeTypeConverter *converter,
diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc
index 0d5e52c7239..822fa566dc9 100644
--- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc
+++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/hlo_legalize_to_lhlo.cc
@@ -192,24 +192,56 @@ struct HloToLhloCustomCallOpConverter
   }
 };
 
-struct HloToLhloDynamicBroadcastInDimOpConverter
+// TODO(pifon): Consider inserting lhlo.copy as in
+// HloToLhloDynamicBroadcastInDimOpConverter.
+struct HloToLhloDynamicReshapeConverter
+    : public BaseOpConversion<mhlo::DynamicReshapeOp> {
+ public:
+  using BaseOpConversion<mhlo::DynamicReshapeOp>::BaseOpConversion;
+
+  LogicalResult matchAndRewrite(
+      mhlo::DynamicReshapeOp op, ArrayRef<Value> operands,
+      ConversionPatternRewriter& rewriter) const final {
+    Type result_type;
+    if (auto ranked_type = op.getType().dyn_cast<RankedTensorType>()) {
+      result_type =
+          MemRefType::get(ranked_type.getShape(), ranked_type.getElementType());
+    } else if (auto unranked_type =
+                   op.getType().dyn_cast<UnrankedTensorType>()) {
+      result_type = UnrankedMemRefType::get(unranked_type.getElementType(), 0);
+    } else {
+      return failure();
+    }
+    mhlo::DynamicReshapeOp::Adaptor adaptor(operands);
+    rewriter.replaceOpWithNewOp<MemRefReshapeOp>(
+        op, result_type, adaptor.operand(), adaptor.output_shape());
+    return success();
+  }
+};
+
+class HloToLhloDynamicBroadcastInDimOpConverter
     : public BaseOpConversion<mhlo::DynamicBroadcastInDimOp> {
  public:
-  using BaseOpConversion<mhlo::DynamicBroadcastInDimOp>::BaseOpConversion;
+  HloToLhloDynamicBroadcastInDimOpConverter(TypeConverter& converter,
+                                            MLIRContext* ctx,
+                                            bool insert_copy = true)
+      : BaseOpConversion<mhlo::DynamicBroadcastInDimOp>(converter, ctx),
+        insert_copy_(insert_copy) {}
 
   LogicalResult matchAndRewrite(
       mhlo::DynamicBroadcastInDimOp op, ArrayRef<Value> operands,
       ConversionPatternRewriter& rewriter) const final {
-    auto loc = op.getLoc();
-    Value resultBuffer = InsertDynamicAllocAndDealloc(
-        loc, op.getResult(), op.output_dimensions(), &rewriter);
+    Value result = InsertDynamicMemrefCastOp(op, operands.front(), &rewriter);
 
-    Value transformed_operand =
-        InsertDynamicMemrefCastOp(op, operands.front(), &rewriter);
-    rewriter.create<lmhlo::CopyOp>(loc, transformed_operand, resultBuffer);
-
-    rewriter.replaceOp(op, {resultBuffer});
+    if (insert_copy_) {
+      auto loc = op.getLoc();
+      Value result_buffer = InsertDynamicAllocAndDealloc(
+          loc, op.getResult(), op.output_dimensions(), &rewriter);
 
+      rewriter.create<lmhlo::CopyOp>(loc, result, result_buffer);
+      result = result_buffer;
+    }
+    rewriter.replaceOp(op, {result});
     return success();
   }
 
@@ -307,31 +339,10 @@ struct HloToLhloDynamicBroadcastInDimOpConverter
         static_strides, llvm::None, sizes, strides);
     return transformed_operand;
   }
-};
 
-struct HloToLhloDynamicReshapeConverter
-    : public BaseOpConversion<mhlo::DynamicReshapeOp> {
- public:
-  using BaseOpConversion<mhlo::DynamicReshapeOp>::BaseOpConversion;
-
-  LogicalResult matchAndRewrite(
-      mhlo::DynamicReshapeOp op, ArrayRef<Value> operands,
-      ConversionPatternRewriter& rewriter) const final {
-    Type result_type;
-    if (auto ranked_type = op.getType().dyn_cast<RankedTensorType>()) {
-      result_type =
-          MemRefType::get(ranked_type.getShape(), ranked_type.getElementType());
-    } else if (auto unranked_type =
-                   op.getType().dyn_cast<UnrankedTensorType>()) {
-      result_type = UnrankedMemRefType::get(unranked_type.getElementType(), 0);
-    } else {
-      return failure();
-    }
-    mhlo::DynamicReshapeOp::Adaptor adaptor(operands);
-    rewriter.replaceOpWithNewOp<MemRefReshapeOp>(
-        op, result_type, adaptor.operand(), adaptor.output_shape());
-    return success();
-  }
+  // Keep the copy semantics and allocate a buffer for the result of the memref
+  // cast.
+  bool insert_copy_;
 };
 
 struct HloToLhloDotGeneralOpConverter
@@ -593,15 +604,22 @@ struct HloLegalizeToLhlo
 };
 }  // namespace
 
+void populateDynamicHLOToLHLOConversionPattern(
+    MLIRContext* context, BufferizeTypeConverter* converter,
+    OwningRewritePatternList* patterns, bool insert_copy) {
+  patterns->insert<HloToLhloDynamicBroadcastInDimOpConverter>(
+      *converter, context, insert_copy);
+  patterns->insert<HloToLhloDynamicReshapeConverter>(*converter, context);
+}
+
 void populateHLOToLHLOConversionPattern(MLIRContext* context,
                                         BufferizeTypeConverter* converter,
                                         OwningRewritePatternList* patterns) {
+  populateDynamicHLOToLHLOConversionPattern(context, converter, patterns);
   // clang-format off
   patterns->insert<
       HloToLhloCustomCallOpConverter,
       HloToLhloDotGeneralOpConverter,
-      HloToLhloDynamicBroadcastInDimOpConverter,
-      HloToLhloDynamicReshapeConverter,
       HloToLhloOpConverter<mhlo::AbsOp>,
       HloToLhloOpConverter<mhlo::AddOp>,
       HloToLhloOpConverter<mhlo::AndOp>,
diff --git a/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-lhlo.mlir b/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-lhlo.mlir
index 0c1ee243a04..5c05d5e946d 100644
--- a/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-lhlo.mlir
+++ b/tensorflow/compiler/mlir/hlo/tests/hlo-legalize-to-lhlo.mlir
@@ -170,24 +170,31 @@ func @dyn_broadcast(%operand: memref<?x?xf32>) -> index {
   return %rank : index
 }
 // CHECK: %[[SHAPE:.*]] = tensor_from_elements
+
 // CHECK: %[[C0:.*]] = constant 0 : index
-// CHECK: %[[EL0:.*]] = extract_element %[[SHAPE]]{{\[}}%[[C0]]] : tensor<3xi64>
-// CHECK: %[[SIZE_0:.*]] = index_cast %[[EL0]] : i64 to index
 // CHECK: %[[C1:.*]] = constant 1 : index
-// CHECK: %[[EL1:.*]] = extract_element %[[SHAPE]]{{\[}}%[[C1]]] : tensor<3xi64>
-// CHECK: %[[SIZE_1:.*]] = index_cast %[[EL1]] : i64 to index
-// CHECK: %[[C2:.*]] = constant 2 : index
-// CHECK: %[[EL2:.*]] = extract_element %[[SHAPE]]{{\[}}%[[C2]]] : tensor<3xi64>
-// CHECK: %[[SIZE_2:.*]] = index_cast %[[EL2]] : i64 to index
-// CHECK: %[[RESULT:.*]] = alloc(%[[SIZE_0]], %[[SIZE_1]], %[[SIZE_2]]) : memref<?x?x?xf32>
 // CHECK: %[[OPER_DIM_1:.*]] = dim %[[OPERAND]], %[[C1]] : memref<?x?xf32>
 // CHECK: %[[OP_STRIDE_0:.*]] = muli %[[C1]], %[[OPER_DIM_1]] : index
 // CHECK: %[[OPER_DIM_0:.*]] = dim %[[OPERAND]], %[[C0]] : memref<?x?xf32>
+
+// CHECK: %[[EL0:.*]] = extract_element %[[SHAPE]]{{\[}}%[[C0]]] : tensor<3xi64>
+// CHECK: %[[SIZE_0:.*]] = index_cast %[[EL0]] : i64 to index
+// CHECK: %[[EL1:.*]] = extract_element %[[SHAPE]]{{\[}}%[[C1]]] : tensor<3xi64>
+
+// CHECK: %[[SIZE_1:.*]] = index_cast %[[EL1]] : i64 to index
 // CHECK: %[[EXPAND_1:.*]] = cmpi "slt", %[[OPER_DIM_0]], %[[SIZE_1]] : index
 // CHECK: %[[STRIDE_1:.*]] = select %[[EXPAND_1]], %[[C0]], %[[OP_STRIDE_0]] : index
+
+// CHECK: %[[C2:.*]] = constant 2 : index
+// CHECK: %[[EL2:.*]] = extract_element %[[SHAPE]]{{\[}}%[[C2]]] : tensor<3xi64>
+// CHECK: %[[SIZE_2:.*]] = index_cast %[[EL2]] : i64 to index
 // CHECK: %[[EXPAND_2:.*]] = cmpi "slt", %[[OPER_DIM_1]], %[[SIZE_2]] : index
 // CHECK: %[[STRIDE_2:.*]] = select %[[EXPAND_2]], %[[C0]], %[[C1]] : index
+
 // CHECK: %[[TRANSFORMED_MEMREF:.*]] = memref_reinterpret_cast %[[OPERAND]] to offset: [0], sizes: {{\[}}%[[SIZE_0]], %[[SIZE_1]], %[[SIZE_2]]], strides: {{\[}}%[[C0]], %[[STRIDE_1]], %[[STRIDE_2]]]: memref<?x?xf32> to memref<?x?x?xf32, #map>
+
+// CHECK: %[[RESULT:.*]] = alloc(%[[SIZE_0]], %[[SIZE_1]], %[[SIZE_2]]) : memref<?x?x?xf32>
+
 // CHECK: "lmhlo.copy"(%[[TRANSFORMED_MEMREF]], %[[RESULT]]) : (memref<?x?x?xf32, #map>, memref<?x?x?xf32>) -> ()
 // CHECK: dealloc %[[RESULT]] : memref<?x?x?xf32>
 
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc b/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc
index ceac254500d..5221a87dfbd 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc
@@ -34,6 +34,7 @@ limitations under the License.
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"  // from @llvm-project
 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"  // from @llvm-project
 #include "mlir/Dialect/Linalg/Passes.h"  // from @llvm-project
+#include "mlir/Dialect/Linalg/Transforms/Transforms.h"  // from @llvm-project
 #include "mlir/Dialect/SCF/Passes.h"  // from @llvm-project
 #include "mlir/Dialect/SCF/SCF.h"  // from @llvm-project
 #include "mlir/Dialect/SCF/Transforms.h"  // from @llvm-project
@@ -111,45 +112,42 @@ Status LowerTFtoGPU(mlir::ModuleOp module, bool gpu_binary_only,
     pm.addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
   }
 
-  // Partial bufferization: Transforms inparticular HLO operation to their
-  // corresponding LHLO operations and converts the function signature. Leaves
-  // shape operations untouched.
-  pm.addPass(mlir::kernel_gen::transforms::CreateHloBufferizePass());
-  // Run CSE to ensure that loads and stores to the same location get recognized
-  // as such.
-  pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass());
-  // Forward stores to buffers to loads.
-  pm.addNestedPass<mlir::FuncOp>(xla::mlir_gpu::createStoreForwardingPass());
-
-  // Clean up the IR for further processing.
+  // Transform HLO operations to LinAlg.
+  pm.addNestedPass<mlir::FuncOp>(::mlir::mhlo::createLegalizeHloToLinalgPass());
   pm.addPass(mlir::createCanonicalizerPass());
   pm.addNestedPass<mlir::FuncOp>(mlir::createCSEPass());
   // We have to anticipate later unrolling in tiling to make sure that we get
   // the requested tiling after unrolling. Compute the new tiling here if
   // needed.
-  llvm::SmallVector<unsigned, 4> tiling_for_unrolling;
-  llvm::SmallVector<int64_t, 4> as_int64;
+  llvm::SmallVector<int64_t, 4> tiling_for_unrolling;
   tiling_for_unrolling.reserve(tile_sizes.size());
   for (auto pair : llvm::zip(tile_sizes, unroll_factors)) {
     tiling_for_unrolling.push_back(std::get<0>(pair) * std::get<1>(pair));
-    as_int64.push_back(std::get<1>(pair));
   }
   tiling_for_unrolling.append(
       tile_sizes.drop_front(unroll_factors.size()).begin(), tile_sizes.end());
-  // Transform LHLO operations to LinAlg.
-  pm.addNestedPass<mlir::FuncOp>(
-      ::mlir::lmhlo::createLegalizeLhloToLinalgPass());
+  // Fuse linalg operations.
+  pm.addNestedPass<mlir::FuncOp>(mlir::createLinalgFusionOfTensorOpsPass());
+
+  // Partial bufferization: Transforms inparticular HLO and Linalg operations to
+  // their corresponding LHLO operations and converts the function signature.
+  // Leaves shape operations untouched.
+  //
+  // TODO(pifon): Rename the pass to CreateHloLinalgBufferizePass or bufferize
+  // in 2 steps: first Linalg, then Hlo. That would need refactoring of
+  // BufferizeTypeConverter.
+  pm.addPass(mlir::kernel_gen::transforms::CreateHloBufferizePass());
+  pm.addNestedPass<::mlir::FuncOp>(::mlir::createCanonicalizerPass());
+  pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass());
   if (!gpu_binary_only) {
     // Find candidates for buffer reuse. This is only successful if buffer size
     // equality can be determined based on `linalg.generic` operations.
     pm.addNestedPass<mlir::FuncOp>(
         mlir::kernel_gen::transforms::CreateBufferReusePass());
   }
-  // Fuse linalg operations.
-  pm.addNestedPass<mlir::FuncOp>(::mlir::lmhlo::createLhloFuseLinalgPass(
-      /*use_parallel_loops=*/true, tiling_for_unrolling));
-  // Transform the Linalg operations inside of the loop nest into parallel
-  // loops.
+  pm.addNestedPass<mlir::FuncOp>(
+      mlir::createLinalgTilingToParallelLoopsPass((tiling_for_unrolling)));
+  // Transform the Linalg ops inside of the loop nest into parallel loops.
   pm.addNestedPass<mlir::FuncOp>(
       ::mlir::createConvertLinalgToParallelLoopsPass());
   // Canonicalize the code to simplify index computations. This is needed so
@@ -162,14 +160,9 @@ Status LowerTFtoGPU(mlir::ModuleOp module, bool gpu_binary_only,
   // Run CSE to ensure that loads and stores to the same subview get
   // recognized as such.
   pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass());
-  // Forward stores to buffers to loads.
-  pm.addNestedPass<mlir::FuncOp>(xla::mlir_gpu::createStoreForwardingPass());
-  // Remove now unused temporary buffers.
-  pm.addNestedPass<mlir::FuncOp>(
-      xla::mlir_gpu::createDeadTempBufferRemovalPass());
   if (!unroll_factors.empty()) {
     pm.addNestedPass<mlir::FuncOp>(
-        ::mlir::createParallelLoopTilingPass(as_int64));
+        ::mlir::createParallelLoopTilingPass(tiling_for_unrolling));
   }
   // Some basic cleanup.
   pm.addNestedPass<::mlir::FuncOp>(::mlir::createCanonicalizerPass());
@@ -340,7 +333,7 @@ StatusOr<std::string> ExtractGpuBinary(mlir::ModuleOp module) {
     return InternalError("There should be exactly one GPU Module");
   }
   mlir::gpu::GPUModuleOp gpu_mod = *gpu_modules.begin();
-  auto blob = gpu_mod->getAttrOfType<mlir::StringAttr>(kGpuBinaryAttrName);
+  auto blob = gpu_mod.getAttrOfType<mlir::StringAttr>(kGpuBinaryAttrName);
   if (blob == nullptr) {
     return InternalError("No binary blob found in the module");
   }
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/isinf.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/isinf.mlir
index e5d124beee5..dce998b3255 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/isinf.mlir
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/tests/isinf.mlir
@@ -1,4 +1,8 @@
-// RUN: tf-opt %s --test-tf-lower-tf --xla-legalize-tf | mlir-hlo-opt --transform-unranked-hlo | kernel-gen-opt -allow-unregistered-dialect --hlo-bufferize --canonicalize --shape-to-descriptors --canonicalize --final-bufferize | FileCheck %s
+// RUN: tf-opt %s --test-tf-lower-tf --xla-legalize-tf | \
+// RUN: mlir-hlo-opt --transform-unranked-hlo --hlo-legalize-to-linalg  | \
+// RUN: kernel-gen-opt -allow-unregistered-dialect --hlo-bufferize \
+// RUN: --canonicalize --shape-to-descriptors --canonicalize --final-bufferize \
+// RUN: | FileCheck %s
 
 // Test whether all shape computations required for isinf can be lowered to
 // the standard dialect, scf and descriptors.
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tanh.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tanh.mlir
index 7cd4841cbac..26cf44095e4 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tanh.mlir
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tanh.mlir
@@ -1,4 +1,8 @@
-// RUN: tf-opt %s --xla-legalize-tf | mlir-hlo-opt --transform-unranked-hlo | kernel-gen-opt -allow-unregistered-dialect --hlo-bufferize --shape-to-descriptors --canonicalize --final-bufferize | FileCheck %s
+// RUN: tf-opt %s --xla-legalize-tf | \
+// RUN: mlir-hlo-opt --transform-unranked-hlo --hlo-legalize-to-linalg  | \
+// RUN: kernel-gen-opt -allow-unregistered-dialect --hlo-bufferize \
+// RUN: --canonicalize --shape-to-descriptors --canonicalize --final-bufferize \
+// RUN: | FileCheck %s
 
 // Test whether all shape computations required for tanh can be lowered to
 // the standard dialect, scf and descriptors. We check for a sparse pattern here,
@@ -13,7 +17,7 @@ func @tanh(%arg0: tensor<*xf32>) -> tensor<*xf32> {
   // CHECK: scf.for
   // CHECK-NOT: tensor_from_elements
   // CHECK: memref_reshape
-  // CHECK: lmhlo.tanh
+  // CHECK: linalg.generic
   // CHECK: memref_reshape
   %0 = "tf.Tanh"(%arg0) { } : (tensor<*xf32>) -> tensor<*xf32>
   return %0 : tensor<*xf32>
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf-legalize-to-lmhlo.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf-legalize-to-lmhlo.mlir
index 4a2b2da64b5..13b37f32c38 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf-legalize-to-lmhlo.mlir
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf-legalize-to-lmhlo.mlir
@@ -1,5 +1,5 @@
 // RUN: tf-opt %s --xla-legalize-tf='legalize-chlo=false' | \
-// RUN: mlir-hlo-opt --transform-unranked-hlo --chlo-legalize-to-hlo | \
+// RUN: mlir-hlo-opt --transform-unranked-hlo --chlo-legalize-to-hlo --hlo-legalize-to-linalg | \
 // RUN: kernel-gen-opt --hlo-bufferize --shape-to-descriptors --canonicalize --final-bufferize
 
 func @acos(%arg0: tensor<*xf32>) -> tensor<*xf32> {
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize.cc
index 9935f6b20b7..8ff10c9d38e 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize.cc
@@ -40,15 +40,25 @@ class BufferizeConstantOp : public OpConversionPattern<ConstantOp> {
     // We only need to bufferize tensor constants.
     Location loc = op.getLoc();
     auto result_type = op.getType().dyn_cast<RankedTensorType>();
-    if (!result_type || !result_type.hasStaticShape() ||
-        result_type.getRank() != 1)
+    int64_t result_rank = result_type.getRank();
+    if (!result_type || !result_type.hasStaticShape() || result_rank > 1)
       return failure();
 
-    auto memref_type = MemRefType::get({result_type.getNumElements()},
-                                       result_type.getElementType());
+    auto memref_type =
+        MemRefType::get(result_type.getShape(), result_type.getElementType());
+    auto elements_attr = op.value().cast<DenseElementsAttr>();
+
+    if (result_rank == 0) {
+      Value buffer = rewriter.create<AllocOp>(loc, memref_type);
+      Value constant =
+          rewriter.create<ConstantOp>(loc, elements_attr.getValue({}));
+      rewriter.create<StoreOp>(loc, constant, buffer);
+      rewriter.replaceOp(op, {buffer});
+      return success();
+    }
+
     Value buffer = rewriter.create<AllocaOp>(loc, memref_type);
 
-    auto elements_attr = op.getValue().dyn_cast<DenseElementsAttr>();
     bool all_same_elems = elements_attr.isSplat();
     Value value;
     if (all_same_elems)
@@ -92,8 +102,8 @@ class BufferizeRankOp : public OpConversionPattern<RankOp> {
 void populateExtraStdBufferizePattern(MLIRContext *context,
                                       BufferizeTypeConverter *converter,
                                       OwningRewritePatternList *patterns) {
-  patterns->insert<BufferizeConstantOp, BufferizeDimOp,
-                   BufferizeRankOp>(*converter, context);
+  patterns->insert<BufferizeConstantOp, BufferizeDimOp, BufferizeRankOp>(
+      *converter, context);
 }
 
 }  // namespace transforms
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc
index 78d6f9687a4..a43d910e96e 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/bufferize_pass.cc
@@ -19,7 +19,10 @@ limitations under the License.
 #include <memory>
 
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"  // from @llvm-project
+#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"  // from @llvm-project
+#include "mlir/Dialect/Linalg/Transforms/Transforms.h"  // from @llvm-project
 #include "mlir/Dialect/SCF/SCF.h"  // from @llvm-project
 #include "mlir/Dialect/SCF/Transforms.h"  // from @llvm-project
 #include "mlir/Dialect/Shape/IR/Shape.h"  // from @llvm-project
@@ -51,6 +54,50 @@ namespace {
 #define GEN_PASS_CLASSES
 #include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/kernel_gen_passes.h.inc"
 
+/// A helper type converter class that automatically populates the relevant
+/// materializations and type conversions for bufferization.
+
+static Value materializeTensorLoad(OpBuilder& builder, TensorType type,
+                                   ValueRange inputs, Location loc) {
+  assert(inputs.size() == 1);
+  assert(inputs[0].getType().isa<BaseMemRefType>());
+  return builder.create<TensorLoadOp>(loc, type, inputs[0]);
+}
+
+// TODO(pifon): Remove as soon as https://reviews.llvm.org/D93126 is landed.
+class CustomBufferizeTypeConverter : public BufferizeTypeConverter {
+ public:
+  CustomBufferizeTypeConverter() {
+    // Keep all types unchanged.
+    addConversion([](Type type) { return type; });
+    // Convert RankedTensorType to MemRefType.
+    addConversion([](RankedTensorType type) -> Type {
+      return MemRefType::get(type.getShape(), type.getElementType());
+    });
+    // Convert UnrankedTensorType to UnrankedMemRefType.
+    addConversion([](UnrankedTensorType type) -> Type {
+      return UnrankedMemRefType::get(type.getElementType(), 0);
+    });
+    addArgumentMaterialization(materializeTensorLoad);
+    addSourceMaterialization(materializeTensorLoad);
+    addTargetMaterialization([](OpBuilder& builder, BaseMemRefType type,
+                                ValueRange inputs, Location loc) -> Value {
+      assert(inputs.size() == 1);
+      // Target materialization is invoked if the new operand type does not
+      // match the expected type. A special case is when the new operand type is
+      // a memref with a specified layout, i.e. non-empty affine map.
+      // TODO(pifon) : Change how target materialization is invoked in dialect
+      // conversion.
+      if (auto memref_type = inputs[0].getType().dyn_cast<MemRefType>()) {
+        assert(!memref_type.getAffineMaps().empty());
+        return inputs[0];
+      }
+      assert(inputs[0].getType().isa<TensorType>());
+      return builder.create<TensorToMemrefOp>(loc, type, inputs[0]);
+    });
+  }
+};
+
 struct HloBufferizePass : public HloBufferizePassBase<HloBufferizePass> {
   // TODO(b/173201243): Move to tablegen.
   void getDependentDialects(DialectRegistry& registry) const override {
@@ -62,13 +109,13 @@ struct HloBufferizePass : public HloBufferizePassBase<HloBufferizePass> {
     OwningRewritePatternList patterns;
     auto& context = getContext();
     ConversionTarget target(context);
-    target.addLegalDialect<lmhlo::LmhloDialect>();
-    target.addLegalDialect<StandardOpsDialect>();
+    target.addLegalDialect<lmhlo::LmhloDialect, StandardOpsDialect>();
     target.addIllegalDialect<mhlo::MhloDialect>();
 
-    BufferizeTypeConverter converter;
+    CustomBufferizeTypeConverter converter;
     // Configure bufferize pattern for functions and lhlo.
-    mhlo::populateHLOToLHLOConversionPattern(&context, &converter, &patterns);
+    mhlo::populateDynamicHLOToLHLOConversionPattern(
+        &context, &converter, &patterns, /*insert_copy=*/false);
     populateFuncOpTypeConversionPattern(patterns, &context, converter);
     populateCallOpTypeConversionPattern(patterns, &context, converter);
     populateBranchOpInterfaceAndReturnOpTypeConversionPattern(
@@ -76,6 +123,7 @@ struct HloBufferizePass : public HloBufferizePassBase<HloBufferizePass> {
 
     // Configure legality and structural patterns.
     populateBufferizeMaterializationLegality(target);
+    linalg::populateLinalgBufferizePatterns(&context, converter, patterns);
     populateShapeStructuralTypeConversionsAndLegality(&context, converter,
                                                       patterns, target);
     scf::populateSCFStructuralTypeConversionsAndLegality(&context, converter,
@@ -87,8 +135,9 @@ struct HloBufferizePass : public HloBufferizePassBase<HloBufferizePass> {
       return converter.isLegal(inputs) && converter.isLegal(results) &&
              converter.isLegal(&op.getBody());
     });
-    target.addDynamicallyLegalOp<CallOp, ReturnOp>(
-        [&converter](Operation* op) { return converter.isLegal(op); });
+    auto isLegalOp = [&](Operation* op) { return converter.isLegal(op); };
+    target.addDynamicallyLegalDialect<linalg::LinalgDialect>(isLegalOp);
+    target.addDynamicallyLegalOp<CallOp, ReturnOp>(isLegalOp);
 
     if (failed(applyPartialConversion(getOperation(), target,
                                       std::move(patterns))))
@@ -109,19 +158,14 @@ struct FinalBufferizePass : public FinalBufferizePassBase<FinalBufferizePass> {
     ConversionTarget target(context);
     target.addLegalDialect<scf::SCFDialect, StandardOpsDialect,
                            tf_framework::TFFrameworkDialect, AffineDialect,
-                           shape::ShapeDialect, lmhlo::LmhloDialect>();
+                           shape::ShapeDialect, lmhlo::LmhloDialect,
+                           linalg::LinalgDialect>();
     target.addLegalOp<FuncOp, ModuleOp, ModuleTerminatorOp>();
 
     target.addIllegalDialect<mhlo::MhloDialect>();
     target.addIllegalOp<DynamicTensorFromElementsOp, ExtractElementOp,
                         TensorFromElementsOp, TensorCastOp, TensorLoadOp,
                         TensorToMemrefOp>();
-    // Certain operations are no longer legal on tensors but otherwise are.
-    target.addDynamicallyLegalOp<ConstantOp, SelectOp>([&](Operation* op) {
-      return llvm::none_of(op->getResultTypes(),
-                           [](Type t) { return t.isa<TensorType>(); });
-    });
-
     BufferizeTypeConverter converter;
     auto typesAreLegal = [&converter](Operation* op) {
       return converter.isLegal(op->getOperandTypes()) &&

From 9618f238aad1d0547b7e96519e089ab0ab38cab1 Mon Sep 17 00:00:00 2001
From: Stephan Herhut <herhut@google.com>
Date: Mon, 14 Dec 2020 05:47:34 -0800
Subject: [PATCH 37/60] Enable MLIR generated unranked abs and tanh kernel by
 default.

These replace the MLIR generated ranked abs and tanh kernel.

PiperOrigin-RevId: 347368201
Change-Id: I38ef068238fc5d88ee7b80cba902ce96e30c3478
---
 tensorflow/core/kernels/mlir_generated/BUILD | 81 +++++++-------------
 1 file changed, 27 insertions(+), 54 deletions(-)

diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD
index 7632da50f9c..caca0208675 100644
--- a/tensorflow/core/kernels/mlir_generated/BUILD
+++ b/tensorflow/core/kernels/mlir_generated/BUILD
@@ -78,20 +78,6 @@ filegroup(
     compatible_with = get_compatible_with_cloud(),
 )
 
-filegroup(
-    name = "unary_kernel_srcs",
-    srcs = if_mlir_unranked_kernels_enabled(
-        if_false = [
-            "cwise_op_gpu_abs.cc",
-            "cwise_op_gpu_base.cc",
-            "cwise_op_gpu_base.h",
-            "cwise_op_gpu_tanh.cc",
-        ],
-        if_true = [":unary_unranked_kernel_srcs"],
-    ),
-    compatible_with = get_compatible_with_cloud(),
-)
-
 cc_library(
     name = "unranked_op_gpu_base",
     srcs = ["unranked_op_gpu_base.cc"],
@@ -111,49 +97,36 @@ cc_library(
 
 tf_kernel_library(
     name = "cwise_unary_op",
-    srcs = [":unary_kernel_srcs"],
+    srcs = [":unary_unranked_kernel_srcs"],
     tags = [
         "manual",
     ],
-    deps = if_mlir_unranked_kernels_enabled(
-        if_false = [
-            ":abs_kernels",
-            ":tanh_kernels",
-            "@com_google_absl//absl/strings",
-            "@com_google_absl//absl/synchronization",
-            "@com_google_absl//absl/types:span",
-            "//third_party/eigen3",
-            "//tensorflow/core:framework",
-            "//tensorflow/core:lib",
-            "//tensorflow/core/platform:stream_executor",
-        ],
-        if_true = [
-            # Technically we only need to depend on the kernel libraries for the
-            # unranked kernels which are enabled by default. But this would
-            # make our BUILD target structure uglier. We already need to make
-            # sure that those targets can be built, so it should not hurt to
-            # link them in even if they are currently not needed yet.
-            ":abs_unranked_kernels",
-            ":ceil_unranked_kernels",
-            ":conj_unranked_kernels",
-            ":cos_unranked_kernels",
-            ":exp_unranked_kernels",
-            ":floor_unranked_kernels",
-            ":imag_unranked_kernels",
-            ":is_inf_unranked_kernels",
-            ":log_unranked_kernels",
-            ":logical_not_unranked_kernels",
-            ":neg_unranked_kernels",
-            ":real_unranked_kernels",
-            ":rsqrt_unranked_kernels",
-            ":sign_unranked_kernels",
-            ":sin_unranked_kernels",
-            ":sqrt_unranked_kernels",
-            ":tanh_unranked_kernels",
-            ":unranked_op_gpu_base",
-            "//third_party/eigen3",
-        ],
-    ),
+    deps = [
+        # Technically we only need to depend on the kernel libraries for the
+        # unranked kernels which are enabled by default. But this would
+        # make our BUILD target structure uglier. We already need to make
+        # sure that those targets can be built, so it should not hurt to
+        # link them in even if they are currently not needed yet.
+        ":abs_unranked_kernels",
+        ":ceil_unranked_kernels",
+        ":conj_unranked_kernels",
+        ":cos_unranked_kernels",
+        ":exp_unranked_kernels",
+        ":floor_unranked_kernels",
+        ":imag_unranked_kernels",
+        ":is_inf_unranked_kernels",
+        ":log_unranked_kernels",
+        ":logical_not_unranked_kernels",
+        ":neg_unranked_kernels",
+        ":real_unranked_kernels",
+        ":rsqrt_unranked_kernels",
+        ":sign_unranked_kernels",
+        ":sin_unranked_kernels",
+        ":sqrt_unranked_kernels",
+        ":tanh_unranked_kernels",
+        ":unranked_op_gpu_base",
+        "//third_party/eigen3",
+    ],
 )
 
 tf_kernel_library(

From fbcb024d7f107700a35226cd83f7ef25a476036e Mon Sep 17 00:00:00 2001
From: Alexander Belyaev <pifon@google.com>
Date: Mon, 14 Dec 2020 07:17:20 -0800
Subject: [PATCH 38/60] [KERNEL_GEN] Remove any trace of ranked MLIR kernels.

PiperOrigin-RevId: 347379437
Change-Id: Ia840f07c6e01d3cbe19059fb02dc676fd0b67136
---
 .../compiler/mlir/tools/kernel_gen/BUILD      |  23 --
 .../mlir/tools/kernel_gen/kernel_creator.cc   |  63 ++---
 .../mlir/tools/kernel_gen/kernel_creator.h    |   6 +-
 .../kernel_gen/tests/tf_to_gpu_binary/BUILD   |  17 --
 .../tests/tf_to_gpu_binary/abs.mlir           |   6 -
 .../tests/tf_to_gpu_binary/ceil.mlir          |   6 -
 .../tests/tf_to_gpu_binary/tanh.mlir          |   5 -
 .../mlir/tools/kernel_gen/tf_to_gpu_binary.cc |  96 --------
 .../mlir/tools/kernel_gen/tf_to_kernel.cc     |   6 +-
 .../mlir/tools/kernel_gen/transforms/BUILD    |   4 -
 .../transforms/materialize_broadcasts_pass.cc |  61 -----
 .../mlir/tools/kernel_gen/transforms/passes.h |   6 -
 .../tools/kernel_gen/transforms/passes.td     |  10 -
 .../transforms/unfuse_batch_norm_pass.cc      |  45 ----
 tensorflow/core/kernels/mlir_generated/BUILD  |  24 --
 .../kernels/mlir_generated/build_defs.bzl     | 232 +-----------------
 .../mlir_generated/cwise_op_gpu_abs.cc        |  40 ---
 .../mlir_generated/cwise_op_gpu_base.cc       | 129 ----------
 .../mlir_generated/cwise_op_gpu_base.h        |  76 ------
 .../mlir_generated/cwise_op_gpu_tanh.cc       |  36 ---
 20 files changed, 36 insertions(+), 855 deletions(-)
 delete mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/BUILD
 delete mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/abs.mlir
 delete mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/ceil.mlir
 delete mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/tanh.mlir
 delete mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/tf_to_gpu_binary.cc
 delete mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/transforms/materialize_broadcasts_pass.cc
 delete mode 100644 tensorflow/compiler/mlir/tools/kernel_gen/transforms/unfuse_batch_norm_pass.cc
 delete mode 100644 tensorflow/core/kernels/mlir_generated/cwise_op_gpu_abs.cc
 delete mode 100644 tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.cc
 delete mode 100644 tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.h
 delete mode 100644 tensorflow/core/kernels/mlir_generated/cwise_op_gpu_tanh.cc

diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD
index 4cfb216b532..731e882ea25 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/BUILD
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/BUILD
@@ -86,29 +86,6 @@ cc_library(
     ],
 )
 
-tf_cc_binary(
-    name = "tf_to_gpu_binary",
-    srcs = [
-        "crash_handler.h",
-        "tf_to_gpu_binary.cc",
-    ],
-    visibility = [
-        "//tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary:__pkg__",
-        "//tensorflow/core/kernels/mlir_generated:__pkg__",
-    ],
-    deps = [
-        ":kernel_creator",
-        "//tensorflow/compiler/mlir:init_mlir",
-        "//tensorflow/compiler/mlir/tensorflow",
-        "//tensorflow/core:lib",
-        "//tensorflow/core/platform",
-        "//tensorflow/stream_executor/lib",
-        "@com_google_absl//absl/strings",
-        "@llvm-project//llvm:Support",
-        "@llvm-project//mlir:Pass",
-    ],
-)
-
 tf_cc_binary(
     name = "tf_to_kernel",
     srcs = ["tf_to_kernel.cc"],
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc b/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc
index 5221a87dfbd..192ef4f9cce 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.cc
@@ -90,27 +90,17 @@ struct RemoveUnusedTensorToMemrefOperations
 };
 }  // end anonymous namespace
 
-Status LowerTFtoGPU(mlir::ModuleOp module, bool gpu_binary_only,
-                    llvm::ArrayRef<uint32_t> tile_sizes,
+Status LowerTFtoGPU(mlir::ModuleOp module, llvm::ArrayRef<uint32_t> tile_sizes,
                     llvm::ArrayRef<uint32_t> unroll_factors,
                     bool embed_memref_prints) {
   mlir::PassManager pm(module.getContext());
   applyTensorflowAndCLOptions(pm);
 
-  if (gpu_binary_only) {
-    pm.addNestedPass<mlir::FuncOp>(mlir::mhlo::createLegalizeTFPass(
-        /*allow_partial_conversion=*/false, /*legalize_chlo=*/true));
-    pm.addNestedPass<mlir::FuncOp>(
-        mlir::kernel_gen::transforms::CreateMaterializeBroadcastsPass());
-    pm.addNestedPass<mlir::FuncOp>(
-        mlir::kernel_gen::transforms::CreateUnfuseBatchNormPass());
-  } else {
-    pm.addNestedPass<mlir::FuncOp>(mlir::mhlo::createLegalizeTFPass(
-        /*allow_partial_conversion=*/false, /*legalize_chlo=*/false));
-    pm.addNestedPass<mlir::FuncOp>(mlir::createTransformUnrankedHloPass());
-    pm.addNestedPass<mlir::FuncOp>(mlir::mhlo::createChloLegalizeToHloPass());
-    pm.addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
-  }
+  pm.addNestedPass<mlir::FuncOp>(mlir::mhlo::createLegalizeTFPass(
+      /*allow_partial_conversion=*/false, /*legalize_chlo=*/false));
+  pm.addNestedPass<mlir::FuncOp>(mlir::createTransformUnrankedHloPass());
+  pm.addNestedPass<mlir::FuncOp>(mlir::mhlo::createChloLegalizeToHloPass());
+  pm.addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
 
   // Transform HLO operations to LinAlg.
   pm.addNestedPass<mlir::FuncOp>(::mlir::mhlo::createLegalizeHloToLinalgPass());
@@ -139,12 +129,10 @@ Status LowerTFtoGPU(mlir::ModuleOp module, bool gpu_binary_only,
   pm.addPass(mlir::kernel_gen::transforms::CreateHloBufferizePass());
   pm.addNestedPass<::mlir::FuncOp>(::mlir::createCanonicalizerPass());
   pm.addNestedPass<::mlir::FuncOp>(::mlir::createCSEPass());
-  if (!gpu_binary_only) {
-    // Find candidates for buffer reuse. This is only successful if buffer size
-    // equality can be determined based on `linalg.generic` operations.
-    pm.addNestedPass<mlir::FuncOp>(
-        mlir::kernel_gen::transforms::CreateBufferReusePass());
-  }
+  // Find candidates for buffer reuse. This is only successful if buffer size
+  // equality can be determined based on `linalg.generic` operations.
+  pm.addNestedPass<mlir::FuncOp>(
+      mlir::kernel_gen::transforms::CreateBufferReusePass());
   pm.addNestedPass<mlir::FuncOp>(
       mlir::createLinalgTilingToParallelLoopsPass((tiling_for_unrolling)));
   // Transform the Linalg ops inside of the loop nest into parallel loops.
@@ -188,15 +176,13 @@ Status LowerTFtoGPU(mlir::ModuleOp module, bool gpu_binary_only,
       std::make_unique<RemoveUnusedTensorToMemrefOperations>());
   pm.addPass(mlir::createCanonicalizerPass());
   pm.addNestedPass<mlir::FuncOp>(mlir::createCSEPass());
-  if (!gpu_binary_only) {
-    // Before inserting more allocs, map the ones we already have to the
-    // tf runtime. That ensures that all allocations for the actual computation
-    // end up on the device, whereas allocations for shape computation and host
-    // side things remain on the host.
-    // Longer term, this should be handled by proper device placement.
-    pm.addPass(mlir::kernel_gen::tf_framework::
-                   CreateEmbedTFFrameworkFunctionAndAllocPass());
-  }
+  // Before inserting more allocs, map the ones we already have to the
+  // tf runtime. That ensures that all allocations for the actual computation
+  // end up on the device, whereas allocations for shape computation and host
+  // side things remain on the host.
+  // Longer term, this should be handled by proper device placement.
+  pm.addPass(mlir::kernel_gen::tf_framework::
+                 CreateEmbedTFFrameworkFunctionAndAllocPass());
   pm.addPass(mlir::kernel_gen::transforms::CreateFinalBufferizePass());
   pm.addNestedPass<mlir::FuncOp>(mlir::createPromoteBuffersToStackPass(64));
   // TODO(herhut): Depends on https://bugs.llvm.org/show_bug.cgi?id=48385.
@@ -223,11 +209,6 @@ Status LowerTFtoGPU(mlir::ModuleOp module, bool gpu_binary_only,
   // Take launches to launches with kernels.
   pm.addPass(::mlir::createGpuKernelOutliningPass());
 
-  if (gpu_binary_only) {
-    // Make kernel signature deterministic so that we can call it externally.
-    pm.addNestedPass<::mlir::FuncOp>(
-        xla::mlir_gpu::createRewriteKernelSignaturePass());
-  }
   pm.addPass(::mlir::createLowerAffinePass());
   // Constraints are removed as late as possible and before lowering to CFG.
   pm.addNestedPass<::mlir::FuncOp>(::mlir::createConvertShapeConstraintsPass());
@@ -295,7 +276,7 @@ Status LowerHostSideToFinalForm(mlir::ModuleOp module) {
 }  // namespace
 
 StatusOr<mlir::OwningModuleRef> GenerateKernelForTfCode(
-    mlir::MLIRContext& context, llvm::StringRef tf_code, bool gpu_binary_only,
+    mlir::MLIRContext& context, llvm::StringRef tf_code,
     llvm::ArrayRef<std::string> architectures,
     llvm::ArrayRef<uint32_t> tile_sizes,
     llvm::ArrayRef<uint32_t> unroll_factors, bool embed_memref_prints,
@@ -304,8 +285,8 @@ StatusOr<mlir::OwningModuleRef> GenerateKernelForTfCode(
   mlir::RegisterAllTensorFlowDialects(registry);
   registry.insert<mlir::chlo::HloClientDialect, mlir::mhlo::MhloDialect>();
   mlir::OwningModuleRef module = mlir::parseSourceString(tf_code, &context);
-  TF_RETURN_IF_ERROR(LowerTFtoGPU(module.get(), gpu_binary_only, tile_sizes,
-                                  unroll_factors, embed_memref_prints));
+  TF_RETURN_IF_ERROR(LowerTFtoGPU(module.get(), tile_sizes, unroll_factors,
+                                  embed_memref_prints));
 #if !defined(TENSORFLOW_USE_ROCM) && !defined(GOOGLE_CUDA)
   return InternalError(
       "Neither TENSORFLOW_USE_ROCM nor GOOGLE_CUDA are defined."
@@ -321,9 +302,7 @@ StatusOr<mlir::OwningModuleRef> GenerateKernelForTfCode(
   TF_RETURN_IF_ERROR(GenerateDeviceCode(module.get(), kGpuBinaryAttrName,
                                         architectures, generate_fatbin,
                                         print_ptx));
-  if (!gpu_binary_only) {
-    TF_RETURN_IF_ERROR(LowerHostSideToFinalForm(module.get()));
-  }
+  TF_RETURN_IF_ERROR(LowerHostSideToFinalForm(module.get()));
   return module;
 }
 
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.h b/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.h
index 33be8ae9ef2..ac8ce845713 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.h
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.h
@@ -33,11 +33,9 @@ limitations under the License.
 namespace tensorflow {
 namespace kernel_gen {
 
-// Converts TF code to LLVM/NVVM. If `gpu_binary_only` is true, then the
-// conversion stops after gpu_binary blob is generated. If `gpu_binary_only` is
-// false, lowers the host side to LLVM Dialect.
+// Converts TF code to LLVM/NVVM. Lowers the host side to LLVM Dialect.
 xla::StatusOr<mlir::OwningModuleRef> GenerateKernelForTfCode(
-    mlir::MLIRContext& context, llvm::StringRef tf_code, bool gpu_binary_only,
+    mlir::MLIRContext& context, llvm::StringRef tf_code,
     llvm::ArrayRef<std::string> architectures = {"sm_75"},
     llvm::ArrayRef<uint32_t> tile_sizes = {16, 64},
     llvm::ArrayRef<uint32_t> unroll_factors = {},
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/BUILD
deleted file mode 100644
index 6aef5c05fe9..00000000000
--- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/BUILD
+++ /dev/null
@@ -1,17 +0,0 @@
-load("//tensorflow/compiler/mlir:glob_lit_test.bzl", "glob_lit_tests")
-
-package(licenses = ["notice"])
-
-glob_lit_tests(
-    data = [
-        "//tensorflow/compiler/mlir/tools/kernel_gen:tf_to_gpu_binary",
-        "@llvm-project//mlir:run_lit.sh",
-    ],
-    default_tags = [
-        # We need access to the CUDA SDK.
-        "gpu",
-        "no_rocm",
-    ],
-    driver = "//tensorflow/compiler/mlir:run_lit.sh",
-    test_file_exts = ["mlir"],
-)
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/abs.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/abs.mlir
deleted file mode 100644
index 51773093564..00000000000
--- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/abs.mlir
+++ /dev/null
@@ -1,6 +0,0 @@
-// RUN: tf_to_gpu_binary --input=%s --output=%t --unroll_factors=4 --tile_sizes=256 --arch=sm_70
-func @abs(%arg0: tensor<?xf16>) -> tensor<?xf16> attributes {tf_entry} {
-  %0 = "tf.Abs"(%arg0) { }
-    : (tensor<?xf16>) -> tensor<?xf16>
-  return %0 : tensor<?xf16>
-}
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/ceil.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/ceil.mlir
deleted file mode 100644
index bb505809abe..00000000000
--- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/ceil.mlir
+++ /dev/null
@@ -1,6 +0,0 @@
-// RUN: tf_to_gpu_binary --input=%s --output=%t --unroll_factors=4 --tile_sizes=256 --arch=sm_70
-func @ceil(%arg0: tensor<?xf64>) -> tensor<?xf64> attributes {tf_entry} {
-  %0 = "tf.Ceil"(%arg0) { }
-    : (tensor<?xf64>) -> tensor<?xf64>
-  return %0 : tensor<?xf64>
-}
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/tanh.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/tanh.mlir
deleted file mode 100644
index fa88fc76c90..00000000000
--- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/tf_to_gpu_binary/tanh.mlir
+++ /dev/null
@@ -1,5 +0,0 @@
-// RUN: tf_to_gpu_binary --input=%s --output=%t --unroll_factors=4 --tile_sizes=256 --arch=sm_70
-func @tanh(%arg0: tensor<?xf32>) -> tensor<?xf32> attributes {tf_entry} {
-  %0 = "tf.Tanh"(%arg0) : (tensor<?xf32>) -> tensor<?xf32>
-  return %0 : tensor<?xf32>
-}
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_gpu_binary.cc b/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_gpu_binary.cc
deleted file mode 100644
index 6f1de7dc1bc..00000000000
--- a/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_gpu_binary.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-// Copyright 2020 The TensorFlow Runtime Authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//===- tf_to_gpu_binary.cc --------------------------------------*- C++ -*-===//
-//
-// This file implements the entry point to compile a tf op to a gpu binary
-//
-//===----------------------------------------------------------------------===//
-#include <string>
-#include <utility>
-#include <vector>
-
-#include "absl/strings/string_view.h"
-#include "llvm/Support/CommandLine.h"
-#include "mlir/Pass/PassManager.h"  // from @llvm-project
-#include "tensorflow/compiler/mlir/init_mlir.h"
-#include "tensorflow/compiler/mlir/tools/kernel_gen/crash_handler.h"
-#include "tensorflow/compiler/mlir/tools/kernel_gen/kernel_creator.h"
-#include "tensorflow/core/platform/env.h"
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/stream_executor/lib/statusor.h"
-
-namespace tensorflow {
-namespace kernel_gen {
-namespace {
-
-xla::Status Run(llvm::StringRef input_file, llvm::StringRef output_file,
-                std::string architecture, llvm::ArrayRef<uint32_t> tile_sizes,
-                llvm::ArrayRef<uint32_t> unroll_factors) {
-  // Read TF code.
-  std::string tf_code;
-  TF_RETURN_IF_ERROR(
-      ReadFileToString(Env::Default(), input_file.str(), &tf_code));
-  // Compile.
-  mlir::MLIRContext context;
-  TF_ASSIGN_OR_RETURN(
-      mlir::OwningModuleRef module,
-      GenerateKernelForTfCode(context, tf_code, /*gpu_binary_only=*/true,
-                              architecture, tile_sizes, unroll_factors,
-                              /*embed_memref_prints=*/false,
-                              /*generate_fatbin=*/false));
-  // Extract gpu_binary.
-  TF_ASSIGN_OR_RETURN(std::string gpu_binary, ExtractGpuBinary(*module));
-
-  // Write gpu_binary blob.
-  TF_RETURN_IF_ERROR(
-      WriteStringToFile(Env::Default(), output_file.str(), gpu_binary));
-  return xla::Status::OK();
-}
-
-}  // namespace
-}  // namespace kernel_gen
-}  // namespace tensorflow
-
-int main(int argc, char** argv) {
-  tensorflow::kernel_gen::SetCrashReportMessage();
-  llvm::cl::opt<std::string> input_file("input", llvm::cl::desc("input file"),
-                                        llvm::cl::value_desc("filename"),
-                                        llvm::cl::init("foo.mlir"));
-  llvm::cl::opt<std::string> output_file(
-      "output", llvm::cl::desc("output file"), llvm::cl::value_desc("filename"),
-      llvm::cl::init("foo.bin"));
-  llvm::cl::opt<std::string> architecture(
-      "arch", llvm::cl::desc("target architecture (e.g. sm_50)"),
-      llvm::cl::init("sm_50"));
-  llvm::cl::list<uint32_t> tile_sizes(
-      "tile_sizes", llvm::cl::desc("tile sizes to use"), llvm::cl::ZeroOrMore,
-      llvm::cl::CommaSeparated);
-  llvm::cl::list<uint32_t> unroll_factors(
-      "unroll_factors",
-      llvm::cl::desc("factors to unroll by, separated by commas"),
-      llvm::cl::ZeroOrMore, llvm::cl::CommaSeparated);
-
-  tensorflow::InitMlir y(&argc, &argv);
-  mlir::registerPassManagerCLOptions();
-  llvm::cl::ParseCommandLineOptions(argc, argv, "TF op GPU kernel generator\n");
-
-  auto status = tensorflow::kernel_gen::Run(
-      input_file, output_file, architecture, tile_sizes, unroll_factors);
-  if (!status.ok()) {
-    LOG(ERROR) << status;
-    return 1;
-  }
-  return 0;
-}
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_kernel.cc b/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_kernel.cc
index a62a4136b4e..e0ad2349e89 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_kernel.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/tf_to_kernel.cc
@@ -115,9 +115,9 @@ xla::Status Run(llvm::StringRef input_file, llvm::StringRef output_file,
   mlir::MLIRContext context;
   TF_ASSIGN_OR_RETURN(
       mlir::OwningModuleRef module,
-      GenerateKernelForTfCode(context, tf_code, /*gpu_binary_only=*/false,
-                              architectures, tile_sizes, unroll_factors,
-                              embed_memref_prints, /*generate_fatbin=*/true,
+      GenerateKernelForTfCode(context, tf_code, architectures, tile_sizes,
+                              unroll_factors, embed_memref_prints,
+                              /*generate_fatbin=*/true,
                               /*print_ptx=*/print_ptx));
   // Get binary.
   TF_ASSIGN_OR_RETURN(std::string binary, EmitToBinary(*module));
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD
index 4fa6f025cc3..a1648745e44 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/BUILD
@@ -77,20 +77,16 @@ cc_library(
         "embed_memref_prints.cc",
         "embed_tf_framework_pass.cc",
         "gpu_kernel_to_blob_pass.cc",
-        "materialize_broadcasts_pass.cc",
         "parallel_loops_to_sequential.cc",
         "same_shape_propagation.cc",
         "shape_to_descriptors_pass.cc",
         "tensorflow_abi_knowledge_propagation.cc",
         "tf_kernel_to_llvm_pass.cc",
-        "unfuse_batch_norm_pass.cc",
     ],
     hdrs = ["passes.h"],
     copts = if_cuda_is_configured(["-DGOOGLE_CUDA=1"]) + if_rocm_is_configured(["-DTENSORFLOW_USE_ROCM=1"]),
     deps = [
         "@llvm-project//mlir:Affine",
-        "//tensorflow/compiler/mlir/hlo:materialize_broadcasts",  # buildcleaner: keep
-        "//tensorflow/compiler/mlir/hlo:unfuse_batch_norm",  # buildcleaner: keep
         "//tensorflow/compiler/xla/service:hlo_module_config",
         "//tensorflow/compiler/xla:debug_options_flags",
         "//tensorflow/compiler/xla:statusor",
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/materialize_broadcasts_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/materialize_broadcasts_pass.cc
deleted file mode 100644
index e0c21f0b2e4..00000000000
--- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/materialize_broadcasts_pass.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "mlir/Dialect/StandardOps/IR/Ops.h"  // from @llvm-project
-#include "mlir/Transforms/DialectConversion.h"  // from @llvm-project
-#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h"
-#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h"
-#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h"
-
-namespace mlir {
-namespace kernel_gen {
-namespace transforms {
-namespace {
-
-#define GEN_PASS_CLASSES
-#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/kernel_gen_passes.h.inc"
-
-struct MaterializeBroadcastsPass
-    : public MaterializeBroadcastsPassBase<MaterializeBroadcastsPass> {
-  void runOnFunction() override {
-    mlir::ConversionTarget conversionTarget(getContext());
-    mlir::OwningRewritePatternList conversionPatterns;
-
-    // Consider the mhlo dialect legal for tests.
-    conversionTarget.addLegalDialect<mlir::mhlo::MhloDialect>();
-    // The conversion uses helpers from the Standard dialect.
-    conversionTarget.addLegalDialect<mlir::StandardOpsDialect>();
-
-    mlir::mhlo::SetupMaterializeBroadcastsLegality(&getContext(),
-                                                   &conversionTarget);
-    mlir::mhlo::PopulateMaterializeBroadcastsPatterns(&getContext(),
-                                                      &conversionPatterns);
-
-    if (failed(applyPartialConversion(getFunction(), conversionTarget,
-                                      std::move(conversionPatterns)))) {
-      return signalPassFailure();
-    }
-  }
-};
-
-}  // namespace
-
-std::unique_ptr<mlir::FunctionPass> CreateMaterializeBroadcastsPass() {
-  return std::make_unique<MaterializeBroadcastsPass>();
-}
-
-}  // namespace transforms
-}  // namespace kernel_gen
-}  // namespace mlir
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h
index f5169a16fac..98d831479f8 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h
@@ -62,9 +62,6 @@ std::unique_ptr<OperationPass<ModuleOp>> CreateHloBufferizePass();
 // buffers.
 std::unique_ptr<OperationPass<ModuleOp>> CreateFinalBufferizePass();
 
-// Pass to materialize broadcasts.
-std::unique_ptr<FunctionPass> CreateMaterializeBroadcastsPass();
-
 // Pass to convert scf::ParallelOp to scf::ForOp.
 std::unique_ptr<FunctionPass> CreateParallelLoopsToSequential();
 
@@ -74,9 +71,6 @@ std::unique_ptr<OperationPass<gpu::GPUModuleOp>> CreateGpuKernelToBlobPass(
     ArrayRef<std::string> architectures = {}, bool generate_fatbin = true,
     bool print_ptx = false);
 
-// Pass to unfuse batch norm.
-std::unique_ptr<FunctionPass> CreateUnfuseBatchNormPass();
-
 // Pass to propagate tensorflow runtime ABI knowledge across kernel boundaries.
 std::unique_ptr<FunctionPass> CreatePropagateTfAbiKnowledgeToKernels();
 
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td
index 2ec9bb3d3a6..abc1cb6ab06 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.td
@@ -61,16 +61,6 @@ def FinalBufferizePass : Pass<"final-bufferize", "ModuleOp"> {
   let constructor = "transforms::CreateFinalBufferizePass()";
 }
 
-def MaterializeBroadcastsPass : FunctionPass<"materialize-broadcast"> {
-  let summary = "Pass to materialize broadcasts";
-  let constructor = "transforms::CreateMaterializeBroadcastsPass()";
-}
-
-def UnfuseBatchNormPass : FunctionPass<"unfuse-batch-norm"> {
-  let summary = "Pass to unfuse batch norm";
-  let constructor = "transforms::CreateUnfuseBatchNormPass()";
-}
-
 def GpuKernelToBlobPass : Pass<"gpu-kernel-to-blob", "gpu::GPUModuleOp"> {
   let summary = "Pass to annotate GPU Module with its PTX";
   let options = [
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/unfuse_batch_norm_pass.cc b/tensorflow/compiler/mlir/tools/kernel_gen/transforms/unfuse_batch_norm_pass.cc
deleted file mode 100644
index 5c347f471b1..00000000000
--- a/tensorflow/compiler/mlir/tools/kernel_gen/transforms/unfuse_batch_norm_pass.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"  // from @llvm-project
-#include "tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/transforms/rewriters.h"
-#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/passes.h"
-
-namespace mlir {
-namespace kernel_gen {
-namespace transforms {
-namespace {
-
-#define GEN_PASS_CLASSES
-#include "tensorflow/compiler/mlir/tools/kernel_gen/transforms/kernel_gen_passes.h.inc"
-
-struct UnfuseBatchNormPass
-    : public UnfuseBatchNormPassBase<UnfuseBatchNormPass> {
-  void runOnFunction() override {
-    mlir::OwningRewritePatternList patterns;
-    mlir::mhlo::PopulateUnfuseBatchNormPatterns(&getContext(), &patterns);
-    mlir::applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
-  }
-};
-
-}  // namespace
-
-std::unique_ptr<mlir::FunctionPass> CreateUnfuseBatchNormPass() {
-  return std::make_unique<UnfuseBatchNormPass>();
-}
-
-}  // namespace transforms
-}  // namespace kernel_gen
-}  // namespace mlir
diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD
index caca0208675..52d29b7bc93 100644
--- a/tensorflow/core/kernels/mlir_generated/BUILD
+++ b/tensorflow/core/kernels/mlir_generated/BUILD
@@ -261,7 +261,6 @@ tf_cuda_cc_test(
 
 gen_kernel_library(
     name = "abs",
-    generate_unranked = True,
     tile_size = "256",
     types = [
         "f16",
@@ -275,7 +274,6 @@ gen_kernel_library(
 
 gen_kernel_library(
     name = "conj",
-    generate_unranked = True,
     tile_size = "256",
     types = [
         "f32",
@@ -286,7 +284,6 @@ gen_kernel_library(
 
 gen_kernel_library(
     name = "imag",
-    generate_unranked = True,
     tile_size = "256",
     types = [
         "f32",
@@ -296,7 +293,6 @@ gen_kernel_library(
 
 gen_kernel_library(
     name = "invert",
-    generate_unranked = True,
     tile_size = "256",
     types = [
         "i8",
@@ -309,8 +305,6 @@ gen_kernel_library(
 
 gen_kernel_library(
     name = "is_inf",
-    generate_ranked = False,
-    generate_unranked = True,
     tile_size = "256",
     types = [
         "f16",
@@ -322,14 +316,12 @@ gen_kernel_library(
 
 gen_kernel_library(
     name = "logical_not",
-    generate_unranked = True,
     tile_size = "256",
     types = ["i1"],
 )
 
 gen_kernel_library(
     name = "real",
-    generate_unranked = True,
     tile_size = "256",
     types = [
         "f32",
@@ -339,7 +331,6 @@ gen_kernel_library(
 
 gen_kernel_library(
     name = "sign",
-    generate_unranked = True,
     tile_size = "256",
     types = [
         # TODO(b/162577610): Add bf16, c64 and c128.
@@ -354,8 +345,6 @@ gen_kernel_library(
 
 gen_kernel_library(
     name = "add_v2",
-    generate_ranked = False,
-    generate_unranked = True,
     tile_size = "256,1,1",
     types = [
         "f16",
@@ -371,8 +360,6 @@ gen_kernel_library(
 [
     gen_kernel_library(
         name = name,
-        generate_ranked = False,
-        generate_unranked = True,
         tile_size = "256,1,1",
         types = [
             "i8",
@@ -401,8 +388,6 @@ gen_kernel_library(
 [
     gen_kernel_library(
         name = name,
-        generate_ranked = False,
-        generate_unranked = True,
         tile_size = "256,1,1",
         types = [
             "i1",
@@ -419,8 +404,6 @@ gen_kernel_library(
 [
     gen_kernel_library(
         name = name,
-        generate_ranked = False,
-        generate_unranked = True,
         tile_size = "256,1,1",
         types = [
             "f16",
@@ -444,8 +427,6 @@ gen_kernel_library(
 [
     gen_kernel_library(
         name = name,
-        generate_ranked = False,
-        generate_unranked = True,
         tile_size = "256,1,1",
         types = [
             "f16",
@@ -470,8 +451,6 @@ gen_kernel_library(
 [
     gen_kernel_library(
         name = name,
-        generate_ranked = False,
-        generate_unranked = True,
         tile_size = "256,1,1",
         types = [
             "f16",
@@ -494,7 +473,6 @@ gen_kernel_library(
 [
     gen_kernel_library(
         name = name,
-        generate_unranked = True,
         tile_size = "256",
         types = [
             "f16",
@@ -516,7 +494,6 @@ gen_kernel_library(
 [
     gen_kernel_library(
         name = name,
-        generate_unranked = True,
         tile_size = "256",
         types = [
             "f16",
@@ -541,7 +518,6 @@ gen_kernel_library(
 [
     gen_kernel_library(
         name = name,
-        generate_unranked = True,
         tile_size = "256",
         types = [
             "f16",
diff --git a/tensorflow/core/kernels/mlir_generated/build_defs.bzl b/tensorflow/core/kernels/mlir_generated/build_defs.bzl
index 5db18a55642..2605a4c9670 100644
--- a/tensorflow/core/kernels/mlir_generated/build_defs.bzl
+++ b/tensorflow/core/kernels/mlir_generated/build_defs.bzl
@@ -31,168 +31,6 @@ GpuBinaryInfo = provider(
     fields = ["gpu_bins"],
 )
 
-def _gen_kernel_gpu_bin_impl(ctx):
-    name = ctx.attr.name
-    tile_sizes = ctx.attr.tile_size.replace("x", ",")
-    cmd_args = []
-    if ctx.attr.unroll_factors:
-        cmd_args.append("--unroll_factors=%s" % ctx.attr.unroll_factors)
-
-    if ctx.attr.extra_args:
-        cmd_args.extend(ctx.attr.extra_args)
-
-    gpu_bins = []
-    for arch in ctx.attr.gpu_archs:
-        # TODO(b/170283783): 'compute_' should generate both SASS and PTX.
-        arch = arch.replace("compute_", "sm_")
-        filename = "%s.%s.bin" % (name, arch)
-        gpu_bin = ctx.actions.declare_file(filename)
-        ctx.actions.run(
-            inputs = [ctx.file.mlir_op, ctx.file._tfso],
-            outputs = [gpu_bin],
-            executable = ctx.executable._tool,
-            arguments = cmd_args + [
-                "--tile_sizes=%s" % tile_sizes,
-                "--arch=%s" % arch,
-                "--input=%s" % ctx.file.mlir_op.path,
-                "--output=%s" % gpu_bin.path,
-            ],
-            mnemonic = "compile",
-        )
-        gpu_bins.append(gpu_bin)
-    return [GpuBinaryInfo(gpu_bins = gpu_bins)]
-
-_gen_kernel_gpu_bin_rule = rule(
-    attrs = {
-        "mlir_op": attr.label(mandatory = True, allow_single_file = True),
-        "tile_size": attr.string(mandatory = True),
-        "unroll_factors": attr.string(),
-        "gpu_archs": attr.string_list(mandatory = True),
-        "extra_args": attr.string_list(),
-        "_tfso": attr.label(
-            default = Label("//tensorflow:libtensorflow_framework.so.2"),
-            cfg = "host",
-            allow_single_file = True,
-        ),
-        "_tool": attr.label(
-            executable = True,
-            default = Label("//tensorflow/compiler/mlir/tools/kernel_gen:tf_to_gpu_binary"),
-            cfg = "host",
-        ),
-    },
-    output_to_genfiles = True,
-    implementation = _gen_kernel_gpu_bin_impl,
-)
-
-def _gen_kernel_image_hdr_impl_cuda(ctx):
-    images = []
-    for cubin in ctx.attr.input[GpuBinaryInfo].gpu_bins:
-        arch = cubin.path.split(".")[-2]
-        images.append("--image=profile=%s,file=%s" % (arch, cubin.path))
-
-    # Generate fatbin file from all cubins.
-    fatbin = ctx.actions.declare_file("%s.fatbin" % ctx.attr.name)
-    ctx.actions.run(
-        outputs = [fatbin],
-        inputs = ctx.attr.input[GpuBinaryInfo].gpu_bins,
-        executable = _lookup_file(ctx.attr._gpu_root, "bin/fatbinary"),
-        arguments = [
-            "--64",
-            "--cmdline=--compile-only",
-            "--link",
-            "--compress-all",
-            "--create=%s" % fatbin.path,
-        ] + images,
-        mnemonic = "fatbinary",
-    )
-
-    bin2c = _lookup_file(ctx.attr._gpu_root, "bin/bin2c")
-    ctx.actions.run_shell(
-        outputs = [ctx.outputs.out],
-        inputs = [fatbin],
-        tools = [bin2c],
-        command = "%s --static --const --type=char --name=%s %s 1> %s" %
-                  (bin2c.path, ctx.attr.symbol, fatbin.path, ctx.outputs.out.path),
-        mnemonic = "bin2c",
-    )
-
-def _gen_kernel_image_hdr_impl_rocm(ctx):
-    hsaco_files = []
-    hsaco_targets = []
-
-    # Add a dummy host target triple...clang-offload-bundler requires 1 and only 1 host target triple
-    hsaco_files.append("/dev/null")
-    hsaco_targets.append("host-x86_64-unknown-linux")
-
-    hsacos = ctx.attr.input[GpuBinaryInfo].gpu_bins
-    for hsaco in hsacos:
-        gfx_arch = hsaco.path.split(".")[-2]
-        hsaco_files.append(hsaco.path)
-        hsaco_targets.append("hip-amdgcn-amd-amdhsa-%s" % gfx_arch)
-
-    # Generate fatbin file from all hsacos.
-    fatbin = ctx.actions.declare_file("%s.fatbin" % ctx.attr.name)
-    ctx.actions.run(
-        outputs = [fatbin],
-        inputs = hsacos,
-        executable = _lookup_file(ctx.attr._gpu_root, "bin/clang-offload-bundler"),
-        arguments = [
-            "--inputs=%s" % ",".join(hsaco_files),
-            "--targets=%s" % ",".join(hsaco_targets),
-            "--type=o",
-            "--outputs=%s" % fatbin.path,
-        ],
-        mnemonic = "fatbinary",
-    )
-
-    ctx.actions.run_shell(
-        outputs = [ctx.outputs.out],
-        inputs = [fatbin],
-        command = (
-            ("hex=`hexdump -v -e \'/1 \"0x%%02x, \"\' %s` && " +
-             "len=`echo $hex | wc -c` && " +
-             "echo 'static const unsigned char %s['$len' + 1] = {' > %s && " +
-             "echo $hex | cat >> %s && " +
-             "echo '};' >> %s") % (
-                fatbin.path,
-                ctx.attr.symbol,
-                ctx.outputs.out.path,
-                ctx.outputs.out.path,
-                ctx.outputs.out.path,
-            )
-        ),
-    )
-
-_gen_kernel_image_hdr_rule = rule(
-    implementation = _gen_kernel_image_hdr_impl_rocm if rocm_is_configured() else _gen_kernel_image_hdr_impl_cuda,
-    output_to_genfiles = True,
-    attrs = {
-        "input": attr.label(mandatory = True, providers = [GpuBinaryInfo]),
-        "out": attr.output(mandatory = True),
-        "symbol": attr.string(mandatory = True),
-        "_gpu_root": attr.label(
-            default = Label("@local_config_rocm//rocm:rocm_root") if rocm_is_configured() else Label("@local_config_cuda//cuda:cuda_root"),
-        ),
-    },
-)
-
-def _gen_kernel_image_hdr(name, mlir_op, gpu_archs, tile_size, unroll_factors = None, extra_args = []):
-    """Generates a C header with fatbin data from a Tensorflow op."""
-    _gen_kernel_gpu_bin_rule(
-        name = name + "_cubin",
-        mlir_op = mlir_op,
-        tile_size = tile_size,
-        unroll_factors = unroll_factors,
-        gpu_archs = gpu_archs,
-        extra_args = extra_args,
-    )
-    _gen_kernel_image_hdr_rule(
-        name = name,
-        input = ":" + name + "_cubin",
-        out = "%s.h" % name,
-        symbol = "k%s" % name.replace("_", " ").title().replace(" ", ""),
-    )
-
 type_to_mlir = {
     "c64": "complex<f32>",
     "c128": "complex<f64>",
@@ -204,18 +42,12 @@ def _gen_mlir_op_impl(ctx):
     if mlir_type in type_to_mlir:
         mlir_type = type_to_mlir[mlir_type]
 
-    # In order to generate a ranked kernel we change *xelem_type to ?xelem_type
-    # and remove element type from the entry function name.
-    convert_to_ranked = ""
-    if ctx.attr.unranked == False:
-        convert_to_ranked = "sed s/*x/?x/g | sed s/_elem_type//g |"
     cmd = ctx.actions.run_shell(
         inputs = [ctx.file.template],
         outputs = [ctx.outputs.out],
         command = (
-            ("cat %s | %s sed 's/_elem_type/_%s/g' | sed 's/elem_type/%s/g' > %s") % (
+            ("cat %s | sed 's/_elem_type/_%s/g' | sed 's/elem_type/%s/g' > %s") % (
                 ctx.file.template.path,
-                convert_to_ranked,
                 ctx.attr.type,
                 mlir_type,
                 ctx.outputs.out.path,
@@ -244,40 +76,6 @@ def _gen_mlir_op(name, type, unranked):
         unranked = unranked,
     )
 
-def gen_ranked_kernel_library(name, types, tile_size, tags = [], unroll_factors = None, extra_args = []):
-    """ Generate a library with kernels for a specific tensorflow op.
-
-    Args:
-      name: The name of the tensorflow op.
-      types: The types ("f16", "f32", "f64") for which a kernel should be generated.
-      tile_size: The tiling specification, e.g. "16x16".
-      unroll_factors: The unrolling specification, e.g. "4,4"
-      tags: The tags which should be added to the library.
-      extra_args: Extra arguments to pass to the generator tool.
-    """
-
-    if cuda_gpu_architectures() or rocm_gpu_architectures():
-        for type in types:
-            _gen_mlir_op(
-                name = name,
-                type = type,
-                unranked = False,
-            )
-            _gen_kernel_image_hdr(
-                name = "{name}_{type}_kernel".format(name = name, type = type),
-                mlir_op = "{name}_{type}.mlir".format(name = name, type = type),
-                gpu_archs = rocm_gpu_architectures() if rocm_is_configured() else cuda_gpu_architectures(),
-                tile_size = tile_size,
-                unroll_factors = unroll_factors,
-                extra_args = extra_args,
-            )
-
-    native.cc_library(
-        name = name + "_kernels",
-        hdrs = if_gpu_is_configured([":{name}_{type}_kernel".format(name = name, type = type) for type in types]),
-        tags = tags,
-    )
-
 ################################################################################
 # Unranked kernels build rules.
 ################################################################################
@@ -410,22 +208,12 @@ def gen_unranked_kernel_library(name, types, tile_size, tags = [], unroll_factor
         tags = tags,
     )
 
-def gen_kernel_library(name, types, tile_size, tags = [], unroll_factors = None, extra_args = [], generate_ranked = True, generate_unranked = False):
-    if (generate_ranked):
-        gen_ranked_kernel_library(
-            name = name,
-            types = types,
-            tile_size = tile_size,
-            tags = tags,
-            unroll_factors = unroll_factors,
-            extra_args = extra_args,
-        )
-    if (generate_unranked):
-        gen_unranked_kernel_library(
-            name = name + "_unranked",
-            types = types,
-            tile_size = tile_size,
-            tags = tags,
-            unroll_factors = unroll_factors,
-            extra_args = extra_args,
-        )
+def gen_kernel_library(name, types, tile_size, tags = [], unroll_factors = None, extra_args = []):
+    gen_unranked_kernel_library(
+        name = name + "_unranked",
+        types = types,
+        tile_size = tile_size,
+        tags = tags,
+        unroll_factors = unroll_factors,
+        extra_args = extra_args,
+    )
diff --git a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_abs.cc b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_abs.cc
deleted file mode 100644
index 948a7c00437..00000000000
--- a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_abs.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include <string>
-#include <vector>
-
-#include "absl/types/span.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/kernels/mlir_generated/abs_f16_kernel.h"
-#include "tensorflow/core/kernels/mlir_generated/abs_f32_kernel.h"
-#include "tensorflow/core/kernels/mlir_generated/abs_f64_kernel.h"
-#include "tensorflow/core/kernels/mlir_generated/abs_i32_kernel.h"
-#include "tensorflow/core/kernels/mlir_generated/abs_i64_kernel.h"
-#include "tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.h"
-
-namespace tensorflow {
-namespace {
-GENERATE_OP_KERNEL_BASE(Abs);
-}  // namespace
-
-GENERATE_AND_REGISTER_UNARY_KERNEL(Abs, F16, Eigen::half);
-GENERATE_AND_REGISTER_UNARY_KERNEL(Abs, F32, float);
-GENERATE_AND_REGISTER_UNARY_KERNEL(Abs, F64, double);
-GENERATE_AND_REGISTER_UNARY_KERNEL(Abs, I32, int32);
-GENERATE_AND_REGISTER_UNARY_KERNEL(Abs, I64, int64);
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.cc b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.cc
deleted file mode 100644
index c5fbb155923..00000000000
--- a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.cc
+++ /dev/null
@@ -1,129 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.h"
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "absl/strings/string_view.h"
-#include "absl/synchronization/mutex.h"
-#include "absl/types/span.h"
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/lib/core/errors.h"
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/platform/logging.h"
-#include "tensorflow/core/platform/stream_executor.h"
-
-namespace tensorflow {
-namespace {
-Status CreateKernel(absl::string_view kernel_name, uint64_t num_args,
-                    absl::string_view ptx, absl::Span<const uint8_t> cubin_data,
-                    se::StreamExecutor* stream_exec,
-                    std::unique_ptr<se::KernelBase>& kernel_base) {
-  se::MultiKernelLoaderSpec loader_spec(num_args);
-
-  if (!cubin_data.empty()) {
-    loader_spec.AddCudaCubinInMemory(
-        reinterpret_cast<const char*>(cubin_data.data()), kernel_name);
-  }
-
-  kernel_base.reset(new se::KernelBase(stream_exec));
-  return stream_exec->GetKernel(loader_spec, kernel_base.get());
-}
-
-struct LaunchConfig {
-  se::BlockDim blockDim;
-  se::ThreadDim threadDim;
-};
-
-LaunchConfig GetLaunchConfiguration(std::vector<uint64> tile_sizes,
-                                    std::vector<uint64> unrolling_factors,
-                                    std::vector<uint64> shape) {
-  LaunchConfig result;
-  // Ensure the vectors are length 3 and pad with ones.
-  tile_sizes.resize(3, 1);
-  unrolling_factors.resize(3, 1);
-  shape.resize(3, 1);
-  // The number of threads is given by the tiling size.
-  result.threadDim = se::ThreadDim(tile_sizes[0], tile_sizes[1], tile_sizes[2]);
-  // We know that the kernel was generated by mapping the three outer-most
-  // dimensions to x,y,z dimensions. So we only need to compute those.
-  std::vector<int> block_dims(3);
-  for (int i = 0; i < 3; ++i) {
-    // Compute the number of grids. We use ceildiv here as we have to allocate
-    // an extra thread/block if the division is not even. The kernel contains
-    // code to handle the boundaries.
-    uint64 number_of_threads = Eigen::divup(shape[i], unrolling_factors[i]);
-    int number_of_grids = Eigen::divup(number_of_threads, tile_sizes[i]);
-    block_dims[i] = number_of_grids;
-  }
-  result.blockDim = se::BlockDim(block_dims[0], block_dims[1], block_dims[2]);
-  return result;
-}
-}  // namespace
-
-void MlirGeneratedUnaryOp::Compute(OpKernelContext* ctx) {
-  auto* stream = ctx->op_device_context()->stream();
-  se::KernelBase* kernel;
-  {
-    absl::MutexLock l(&mu_);
-    if (!kernel_) {
-      OP_REQUIRES_OK(ctx, CreateKernel(name_, 10, "", cubin_data_,
-                                       stream->parent(), kernel_));
-    }
-    kernel = kernel_.get();
-  }
-
-  const Tensor& inp = ctx->input(0);
-  Tensor* out = nullptr;
-  OP_REQUIRES_OK(
-      ctx, ctx->forward_input_or_allocate_output({0}, 0, inp.shape(), &out));
-
-  if (inp.NumElements() == 0) {
-    return;
-  }
-
-  se::KernelArgsArray<10> args;
-
-  args.add_device_memory_argument(
-      stream_executor::DeviceMemoryBase(inp.data(), inp.TotalBytes()));
-  args.add_device_memory_argument(
-      stream_executor::DeviceMemoryBase(inp.data(), inp.TotalBytes()));
-  args.add_argument<int64_t>(0);
-  args.add_argument<int64_t>(inp.NumElements());
-  args.add_argument<int64_t>(1);
-
-  args.add_device_memory_argument(
-      stream_executor::DeviceMemoryBase(out->data(), out->TotalBytes()));
-  args.add_device_memory_argument(
-      stream_executor::DeviceMemoryBase(out->data(), out->TotalBytes()));
-  args.add_argument<int64_t>(0);
-  args.add_argument<int64_t>(inp.NumElements());
-  args.add_argument<int64_t>(1);
-
-  // This has to be aligned with the configuration that was used when building
-  // the kernels. See the corresponding build rules in the `BUILD` file.
-  LaunchConfig config = GetLaunchConfiguration(
-      {256}, {4}, {static_cast<uint64>(inp.NumElements())});
-  OP_REQUIRES_OK(ctx, stream->parent()->Launch(stream, config.threadDim,
-                                               config.blockDim, *kernel, args));
-}
-
-}  // namespace tensorflow
diff --git a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.h b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.h
deleted file mode 100644
index 466bbead3a5..00000000000
--- a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_CORE_KERNELS_MLIR_GENERATED_CWISE_OP_GPU_BASE_H_
-#define TENSORFLOW_CORE_KERNELS_MLIR_GENERATED_CWISE_OP_GPU_BASE_H_
-
-#include <memory>
-#include <string>
-
-#include "absl/strings/ascii.h"
-#include "absl/synchronization/mutex.h"
-#include "absl/types/span.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/platform/stream_executor.h"
-
-namespace tensorflow {
-class MlirGeneratedUnaryOp : public OpKernel {
- public:
-  MlirGeneratedUnaryOp(OpKernelConstruction* ctx, std::string name,
-                       absl::Span<const uint8_t> cubin_data)
-      : OpKernel(ctx), name_(name), cubin_data_(cubin_data) {}
-
-  void Compute(OpKernelContext* ctx) override;
-
- private:
-  std::string name_;
-  absl::Span<const uint8_t> cubin_data_;
-  std::unique_ptr<se::KernelBase> kernel_;
-  absl::Mutex mu_;
-};
-
-#define GENERATE_OP_KERNEL_BASE(kernel_name)                               \
-  class MlirGenerated##kernel_name##Op : public MlirGeneratedUnaryOp {     \
-   public:                                                                 \
-    MlirGenerated##kernel_name##Op(OpKernelConstruction* ctx,              \
-                                   absl::Span<const uint8_t> cubin_data)   \
-        : MlirGeneratedUnaryOp(ctx, #kernel_name "_kernel", cubin_data) {} \
-  };
-
-#define GENERATE_OP_KERNEL_FOR(kernel_name, data_type)    \
-  class MlirGenerated##kernel_name##data_type##Op         \
-      : public MlirGenerated##kernel_name##Op {           \
-   public:                                                \
-    explicit MlirGenerated##kernel_name##data_type##Op(   \
-        OpKernelConstruction* ctx)                        \
-        : MlirGenerated##kernel_name                      \
-          ##Op(ctx, k##kernel_name##data_type##Kernel) {} \
-  };
-
-#define GENERATE_AND_REGISTER_UNARY_KERNEL(kernel_name, data_type,    \
-                                           native_data_type)          \
-  namespace {                                                         \
-  GENERATE_OP_KERNEL_FOR(kernel_name, data_type)                      \
-  }                                                                   \
-  REGISTER_KERNEL_BUILDER(Name(#kernel_name)                          \
-                              .Device(DEVICE_GPU)                     \
-                              .TypeConstraint<native_data_type>("T"), \
-                          MlirGenerated##kernel_name##data_type##Op);
-
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_CORE_KERNELS_MLIR_GENERATED_CWISE_OP_GPU_BASE_H_
diff --git a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_tanh.cc b/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_tanh.cc
deleted file mode 100644
index a9cc0666b0b..00000000000
--- a/tensorflow/core/kernels/mlir_generated/cwise_op_gpu_tanh.cc
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include <string>
-#include <vector>
-
-#include "absl/types/span.h"
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/tensor_types.h"
-#include "tensorflow/core/kernels/mlir_generated/cwise_op_gpu_base.h"
-#include "tensorflow/core/kernels/mlir_generated/tanh_f16_kernel.h"
-#include "tensorflow/core/kernels/mlir_generated/tanh_f32_kernel.h"
-#include "tensorflow/core/kernels/mlir_generated/tanh_f64_kernel.h"
-
-namespace tensorflow {
-namespace {
-GENERATE_OP_KERNEL_BASE(Tanh);
-}  // namespace
-
-GENERATE_AND_REGISTER_UNARY_KERNEL(Tanh, F16, Eigen::half)
-GENERATE_AND_REGISTER_UNARY_KERNEL(Tanh, F32, float)
-GENERATE_AND_REGISTER_UNARY_KERNEL(Tanh, F64, double)
-}  // namespace tensorflow

From 4b05947bf188786ff7f09b46fbc286d6a885e36b Mon Sep 17 00:00:00 2001
From: Allen Lavoie <allenl@google.com>
Date: Mon, 14 Dec 2020 08:45:36 -0800
Subject: [PATCH 39/60] tape.batch_jacobian: don't make zeros with the wrong
 dtype if gradients are disconnected

Fixes #43043.

PiperOrigin-RevId: 347393623
Change-Id: Ie2b3adea92a817f96b7d4fd23b001328c42a87e1
---
 tensorflow/python/eager/backprop.py      | 10 +++++++---
 tensorflow/python/eager/backprop_test.py | 22 ++++++++++++++++++++++
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index be121bf135e..527b106d01e 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -1344,9 +1344,13 @@ class GradientTape(object):
                                  parallel_iterations=parallel_iterations)
     new_shape = array_ops.concat([target_shape, source_shape[1:]], axis=0)
     if output is None:
-      output = array_ops.zeros(new_shape)
-      if rewrap_as_ndarray:
-        output = np_arrays.tensor_to_ndarray(output)
+      if not experimental_use_pfor and target_row_size == 0:
+        # Since we can't actually run the loop function in this case, we don't
+        # know whether gradients are unconnected or not. We'll return a numeric
+        # tensor (with zero elements).
+        output = array_ops.zeros(new_shape, target.dtype)
+        if rewrap_as_ndarray:
+          output = np_arrays.tensor_to_ndarray(output)
       return output
     else:
       output = array_ops.reshape(output,
diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 0063b7f155e..417f8c132e7 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -1961,6 +1961,28 @@ class BatchJacobianTest(test.TestCase, parameterized.TestCase):
       f = def_function.function(f)
     self.assertAllEqual([1, 0, 0], array_ops.shape(f(array_ops.zeros([1, 0]))))
 
+  @parameterized.parameters((True,), (False))
+  def test_respects_disconnected_gradients(self, use_pfor):
+    @def_function.function
+    def f(x):
+      del x
+      return constant_op.constant([[1.]], dtype=dtypes.float64)
+
+    with backprop.GradientTape(persistent=True) as tape:
+      x = constant_op.constant([[2.]], dtype=dtypes.float64)
+      tape.watch(x)
+      y = f(x)
+    self.assertIsNone(tape.batch_jacobian(y, x, experimental_use_pfor=use_pfor))
+
+    with backprop.GradientTape(persistent=True) as tape:
+      x = constant_op.constant([[2.]], dtype=dtypes.float64)
+      tape.watch(x)
+      y = f(x)
+    jac = tape.batch_jacobian(y, x, unconnected_gradients='zero',
+                              experimental_use_pfor=use_pfor)
+    self.assertEqual(dtypes.float64, jac.dtype)
+    self.assertAllClose([[[0.]]], jac)
+
 
 class AggregateIndexedSlicesGradientsTest(test_util.TensorFlowTestCase):
 

From d028c80a0886b353e7e3d85aa18f14b711eca772 Mon Sep 17 00:00:00 2001
From: Jean-Baptiste Lespiau <jblespiau@google.com>
Date: Mon, 14 Dec 2020 08:54:24 -0800
Subject: [PATCH 40/60] Move the logic to push an argument on device into a
 `DevicePut` function.

It also adds `PjRtBufferFromPyval` to skip the PyBuffer layer.

PiperOrigin-RevId: 347395005
Change-Id: I41f8b5032b265177c6dcf352cfb253b671d7ef70
---
 tensorflow/compiler/xla/python/jax_jit.cc   | 426 ++++++++++----------
 tensorflow/compiler/xla/python/jax_jit.h    |  35 +-
 tensorflow/compiler/xla/python/py_client.cc |  11 +-
 tensorflow/compiler/xla/python/py_client.h  |   3 +
 4 files changed, 259 insertions(+), 216 deletions(-)

diff --git a/tensorflow/compiler/xla/python/jax_jit.cc b/tensorflow/compiler/xla/python/jax_jit.cc
index 0c624928d86..72bbb57cb0d 100644
--- a/tensorflow/compiler/xla/python/jax_jit.cc
+++ b/tensorflow/compiler/xla/python/jax_jit.cc
@@ -57,6 +57,7 @@ namespace py = pybind11;
 
 // TODO(phawkins): Add support for Tracers.
 // TODO(jblespiau): Use absl Status.
+// TODO(jblespiau): Remove the "xla::" prefixes when not needed.
 
 std::string ArgSignature::DebugString() const {
   std::string result = "";
@@ -175,6 +176,207 @@ H AbslHashValue(H h, const CallSignature& s) {
   return h;
 }
 
+namespace {
+const py::dtype* DtypeTo32BitDtype(const py::dtype& dtype) {
+  static const auto* int64_dt = new py::dtype("int64");
+  static const auto* int32_dt = new py::dtype("int32");
+  static const auto* uint64_dt = new py::dtype("uint64");
+  static const auto* uint32_dt = new py::dtype("uint32");
+  static const auto* float64_dt = new py::dtype("float64");
+  static const auto* float32_dt = new py::dtype("float32");
+  static const auto* complex64_dt = new py::dtype("complex64");
+  static const auto* complex128_dt = new py::dtype("complex128");
+
+  if (dtype.equal(*int64_dt)) {
+    return int32_dt;
+  }
+  if (dtype.equal(*float64_dt)) {
+    return float32_dt;
+  }
+  if (dtype.equal(*uint64_dt)) {
+    return uint32_dt;
+  }
+  if (dtype.equal(*complex128_dt)) {
+    return complex64_dt;
+  }
+
+  return nullptr;
+}
+
+// The equivalent of the Python jax/lazy.py::is_trivial:
+// return (type(lexpr.input) is ArrayVar and
+//         lexpr.dims == tuple(range(len(lexpr.shape))))
+//
+// Expects *only* `None` or a LazyExpr` object.
+bool IsTrivialLazyExpr(py::handle lexpr) {
+  if (lexpr.is_none()) {
+    return true;
+  }
+
+  static const auto* lazy_module =
+      new py::module(py::module::import("jax.lazy"));
+  auto input = py::getattr(lexpr, "input");
+  if (!input.get_type().is(lazy_module->attr("ArrayVar"))) {
+    return false;
+  }
+  py::tuple dims = py::cast<py::tuple>(lexpr.attr("dims"));
+  py::tuple shape = py::cast<py::tuple>(lexpr.attr("shape"));
+
+  for (int i = 0; i < shape.size(); ++i) {
+    if (dims[i].is_none()) {
+      return false;
+    }
+    if (py::cast<int>(dims[i]) != i) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool IsFloat0(py::array arg) {
+  static const auto* dtypes_module =
+      new py::module(py::module::import("jax.dtypes"));
+  static const auto* float0_dtype =
+      new py::handle(dtypes_module->attr("float0"));
+  return float0_dtype->is(arg.attr("dtype"));
+}
+
+template <typename CppType, typename Pybind11Type>
+std::unique_ptr<xla::PjRtBuffer> ConvertToScalarBuffer(
+    const py::handle& scalar, xla::PjRtClient* client,
+    xla::PjRtDevice* device) {
+  CppType data = py::cast<Pybind11Type>(scalar);
+  xla::Shape shape = xla::ShapeUtil::MakeShapeWithType<CppType>({});
+  return ValueOrThrow(client->BufferFromHostBuffer(
+      &data, shape,
+      xla::PjRtClient::HostBufferSemantics::kImmutableOnlyDuringCall, nullptr,
+      device));
+}
+
+// Convert a scalar to the associated PjRtBuffer or raises an error if it is
+// not convertible (thus, this must be called after other checks).
+StatusOr<std::unique_ptr<xla::PjRtBuffer>> ScalarToBuffer(
+    py::handle scalar, bool jax_enable_x64, xla::PjRtClient* client,
+    xla::PjRtDevice* device) {
+  // Important: In Python, isinstance(True, int) returns True. Thus, we have
+  // to check for bool before int.
+  if (py::isinstance<py::bool_>(scalar)) {
+    return ConvertToScalarBuffer<bool, py::bool_>(scalar, client, device);
+  } else if (py::isinstance<py::int_>(scalar)) {
+    if (jax_enable_x64) {
+      return ConvertToScalarBuffer<int64, py::int_>(scalar, client, device);
+    } else {
+      return ConvertToScalarBuffer<int, py::int_>(scalar, client, device);
+    }
+  } else if (py::isinstance<py::float_>(scalar)) {
+    if (jax_enable_x64) {
+      return ConvertToScalarBuffer<double, py::float_>(scalar, client, device);
+
+    } else {
+      return ConvertToScalarBuffer<float, py::float_>(scalar, client, device);
+    }
+  } else if (PyComplex_Check(scalar.ptr())) {
+    Py_complex result = PyComplex_AsCComplex(scalar.ptr());
+    if (result.real == -1.0 && PyErr_Occurred()) {
+      PyErr_Clear();
+      throw std::runtime_error("Could not convert the complex number");
+    }
+    if (jax_enable_x64) {
+      xla::complex128 data(result.real, result.imag);
+      xla::Shape shape = xla::ShapeUtil::MakeShapeWithType<xla::complex128>({});
+      return ValueOrThrow(client->BufferFromHostBuffer(
+          &data, shape,
+          xla::PjRtClient::HostBufferSemantics::kImmutableOnlyDuringCall,
+          nullptr, device));
+    } else {
+      xla::complex64 data(result.real, result.imag);
+      xla::Shape shape = xla::ShapeUtil::MakeShapeWithType<xla::complex64>({});
+      return ValueOrThrow(client->BufferFromHostBuffer(
+          &data, shape,
+          xla::PjRtClient::HostBufferSemantics::kImmutableOnlyDuringCall,
+          nullptr, device));
+    }
+  }
+  return InvalidArgument(
+      "%s", absl::StrCat(
+                "Not supported: The C++ jax jit execution path, only accepts "
+                "DeviceArray, Numpy arrays, or Python scalars. Got type ",
+                py::cast<std::string>(py::str(scalar.get_type()))));
+}
+
+}  // namespace
+
+StatusOr<DevicePutResult> DevicePut(pybind11::handle obj, PjRtDevice* to_device,
+                                    bool jax_enable_x64,
+                                    xla::PyClient& pyclient) {
+  static const auto* xla_module =
+      new py::module(py::module::import("jax.interpreters.xla"));
+  const auto& device_array = xla_module->attr("_DeviceArray");
+
+  static const auto* numpy_module = new py::module(py::module::import("numpy"));
+  const auto& np_array = numpy_module->attr("array");
+
+  bool is_py_buffer = py::isinstance<PyBuffer>(obj);
+  if (is_py_buffer) {
+    // PyBuffer necessarily has a trivial LazyExpr, no need to check it.
+    PyBuffer* buffer = py::cast<xla::PyBuffer*>(obj);
+    bool weak_type = py::cast<py::bool_>(obj.attr("aval").attr("weak_type"));
+    if (buffer->device().contents == to_device) {
+      return DevicePutResult(buffer->buffer(), weak_type);
+    } else {
+      // Performs a device-to-device copy if the devices are on the same
+      // platform.
+      // Buffers from different XLA backends are passed through the host.
+      std::unique_ptr<PjRtBuffer> copied_buffer =
+          ValueOrThrow(buffer->buffer()->CopyToDevice(to_device));
+      return DevicePutResult(std::move(copied_buffer), weak_type);
+    }
+
+  } else if (obj.get_type().is(device_array)) {
+    if (!IsTrivialLazyExpr(py::getattr(obj, "_lazy_expr"))) {
+      return InvalidArgument(
+          "Non-trivial lazy expression not supported in C++. "
+          "Falling back to Python.");
+    }
+    PyBuffer* buffer = py::cast<xla::PyBuffer*>(obj.attr("device_buffer"));
+    bool weak_type = py::cast<py::bool_>(obj.attr("aval").attr("weak_type"));
+    // Same block as in the previous `if (is_py_buffer)`.
+    if (buffer->device().contents == to_device) {
+      return DevicePutResult(buffer->buffer(), weak_type);
+    } else {
+      std::unique_ptr<PjRtBuffer> copied_buffer =
+          ValueOrThrow(buffer->buffer()->CopyToDevice(to_device));
+      return DevicePutResult(std::move(copied_buffer), weak_type);
+    }
+  } else if (py::isinstance<py::array>(obj)) {
+    py::array numpy_array = py::cast<py::array>(obj);
+    if (IsFloat0(numpy_array)) {
+      return InvalidArgument(
+          "float0 numpy arrays not supported in C++. "
+          "Falling back to Python.");
+    }
+    // If jax_enable_x64 is not set, we need to coerce 32 bits types.
+    // Note that this is calling back to Python!
+    if (!jax_enable_x64) {
+      const py::dtype* to_dtype = DtypeTo32BitDtype(numpy_array.dtype());
+      if (to_dtype) {
+        numpy_array = np_array(numpy_array, *to_dtype);
+      }
+    }
+    std::unique_ptr<xla::PjRtBuffer> buffer =
+        ValueOrThrow(pyclient.PjRtBufferFromPyval(
+            numpy_array, to_device,
+            /*force_copy=*/false, /*host_buffer_semantics=*/
+            xla::PjRtClient::HostBufferSemantics::kZeroCopy));
+    return DevicePutResult(std::move(buffer), /*weak_type=*/false);
+  } else {
+    TF_ASSIGN_OR_RETURN(
+        std::unique_ptr<xla::PjRtBuffer> buffer,
+        ScalarToBuffer(obj, jax_enable_x64, to_device->client(), to_device));
+    return DevicePutResult(std::move(buffer), /*weak_type=*/true);
+  }
+}
+
 namespace {
 
 struct CacheEntry {
@@ -301,36 +503,6 @@ CompiledFunction::~CompiledFunction() {
 
 namespace {
 
-// The equivalent of the Python jax/lazy.py::is_trivial:
-// return (type(lexpr.input) is ArrayVar and
-//         lexpr.dims == tuple(range(len(lexpr.shape))))
-//
-// Expects *only* instances of `DeviceArray`.
-bool IsTrivialLazyExpr(py::handle lexpr) {
-  if (lexpr.is_none()) {
-    return true;
-  }
-
-  static const auto* lazy_module =
-      new py::module(py::module::import("jax.lazy"));
-  auto input = py::getattr(lexpr, "input");
-  if (!input.get_type().is(lazy_module->attr("ArrayVar"))) {
-    return false;
-  }
-  py::tuple dims = py::cast<py::tuple>(lexpr.attr("dims"));
-  py::tuple shape = py::cast<py::tuple>(lexpr.attr("shape"));
-
-  for (int i = 0; i < shape.size(); ++i) {
-    if (dims[i].is_none()) {
-      return false;
-    }
-    if (py::cast<int>(dims[i]) != i) {
-      return false;
-    }
-  }
-  return true;
-}
-
 // The resulting information of the parsing and conversion of the arguments.
 struct ParsedArgumentsAsBuffers {
   // The call signature will be filled during 2 steps:
@@ -347,13 +519,11 @@ struct ParsedArgumentsAsBuffers {
 
   // The following is only valid if the parsing succeeds.
   std::vector<xla::PjRtBuffer*> arg_buffers;
-  // We may need to keep some objects around, because:
+  // We may need to keep these objects around, because:
   // (a) we need to extend the lifetime of objects created within
   //    `ConvertArgsToBuffers`
   // (b) `arg_buffers` do not maintain ownership
-  std::vector<absl::variant<std::unique_ptr<xla::PyBuffer>,
-                            std::unique_ptr<xla::PjRtBuffer>>>
-      keep_alive;
+  std::vector<std::unique_ptr<xla::PjRtBuffer>> keep_alive;
 };
 
 // Filter out static arguments, flatten and concatenate other arguments (i.e.
@@ -412,103 +582,6 @@ void FlattenArguments(const py::args& args, const py::kwargs& py_kwargs,
   }
 }
 
-template <typename CppType, typename Pybind11Type>
-std::unique_ptr<xla::PjRtBuffer> ConvertToScalarBuffer(
-    const py::handle& scalar, xla::PjRtClient* client,
-    xla::PjRtDevice* device) {
-  CppType data = py::cast<Pybind11Type>(scalar);
-  xla::Shape shape = xla::ShapeUtil::MakeShapeWithType<CppType>({});
-  return ValueOrThrow(client->BufferFromHostBuffer(
-      &data, shape,
-      xla::PjRtClient::HostBufferSemantics::kImmutableOnlyDuringCall, nullptr,
-      device));
-}
-
-// Convert a scalar to the associated PjRtBuffer or raises an error if it is
-// not convertible (thus, this must be called after other checks).
-StatusOr<std::unique_ptr<xla::PjRtBuffer>> ScalarToBuffer(
-    py::handle scalar, bool jax_enable_x64, xla::PjRtClient* client,
-    xla::PjRtDevice* device) {
-  // Important: In Python, isinstance(True, int) returns True. Thus, we have
-  // to check for bool before int.
-  if (py::isinstance<py::bool_>(scalar)) {
-    return ConvertToScalarBuffer<bool, py::bool_>(scalar, client, device);
-  } else if (py::isinstance<py::int_>(scalar)) {
-    if (jax_enable_x64) {
-      return ConvertToScalarBuffer<int64, py::int_>(scalar, client, device);
-    } else {
-      return ConvertToScalarBuffer<int, py::int_>(scalar, client, device);
-    }
-  } else if (py::isinstance<py::float_>(scalar)) {
-    if (jax_enable_x64) {
-      return ConvertToScalarBuffer<double, py::float_>(scalar, client, device);
-
-    } else {
-      return ConvertToScalarBuffer<float, py::float_>(scalar, client, device);
-    }
-  } else if (PyComplex_Check(scalar.ptr())) {
-    Py_complex result = PyComplex_AsCComplex(scalar.ptr());
-    if (result.real == -1.0 && PyErr_Occurred()) {
-      PyErr_Clear();
-      throw std::runtime_error("Could not convert the complex number");
-    }
-    if (jax_enable_x64) {
-      xla::complex128 data(result.real, result.imag);
-      xla::Shape shape = xla::ShapeUtil::MakeShapeWithType<xla::complex128>({});
-      return ValueOrThrow(client->BufferFromHostBuffer(
-          &data, shape,
-          xla::PjRtClient::HostBufferSemantics::kImmutableOnlyDuringCall,
-          nullptr, device));
-    } else {
-      xla::complex64 data(result.real, result.imag);
-      xla::Shape shape = xla::ShapeUtil::MakeShapeWithType<xla::complex64>({});
-      return ValueOrThrow(client->BufferFromHostBuffer(
-          &data, shape,
-          xla::PjRtClient::HostBufferSemantics::kImmutableOnlyDuringCall,
-          nullptr, device));
-    }
-  }
-  return InvalidArgument(
-      "%s", absl::StrCat(
-                "Not supported: The C++ jax jit execution path, only accepts "
-                "DeviceArray, Numpy arrays, or Python scalars. Got type ",
-                py::cast<std::string>(py::str(scalar.get_type()))));
-}
-
-const py::dtype* DtypeTo32BitDtype(const py::dtype& dtype) {
-  static const auto* int64_dt = new py::dtype("int64");
-  static const auto* int32_dt = new py::dtype("int32");
-  static const auto* uint64_dt = new py::dtype("uint64");
-  static const auto* uint32_dt = new py::dtype("uint32");
-  static const auto* float64_dt = new py::dtype("float64");
-  static const auto* float32_dt = new py::dtype("float32");
-  static const auto* complex64_dt = new py::dtype("complex64");
-  static const auto* complex128_dt = new py::dtype("complex128");
-
-  if (dtype.equal(*int64_dt)) {
-    return int32_dt;
-  }
-  if (dtype.equal(*float64_dt)) {
-    return float32_dt;
-  }
-  if (dtype.equal(*uint64_dt)) {
-    return uint32_dt;
-  }
-  if (dtype.equal(*complex128_dt)) {
-    return complex64_dt;
-  }
-
-  return nullptr;
-}
-
-bool IsFloat0(py::array arg) {
-  static const auto* dtypes_module =
-      new py::module(py::module::import("jax.dtypes"));
-  static const auto* float0_dtype =
-      new py::handle(dtypes_module->attr("float0"));
-  return float0_dtype->is(arg.attr("dtype"));
-}
-
 // Converts flattened arguments contained in ParsedArgumentsAsBuffers in
 // place. If arguments are `DeviceArray`, they must all be on the same `Device`.
 //
@@ -528,9 +601,6 @@ Status ConvertArgsToBuffers(bool jax_enable_x64, xla::PyClient& pyclient,
       new py::module(py::module::import("jax.interpreters.xla"));
   const auto& device_array = xla_module->attr("_DeviceArray");
 
-  static const auto* numpy_module = new py::module(py::module::import("numpy"));
-  const auto& np_array = numpy_module->attr("array");
-
   // When the jitted function is not committed, we first check whether any
   // sticky `DeviceArray` is present and on which device they live. See also:
   // https://github.com/google/jax/pull/1884
@@ -581,88 +651,20 @@ Status ConvertArgsToBuffers(bool jax_enable_x64, xla::PyClient& pyclient,
   }
   CHECK(data_device);
   arguments.signature.device = data_device;
-  xla::PjRtClient* pjrt_client = data_device->client();
 
   for (py::handle arg : arguments.flat_dynamic_args) {
-    bool is_py_buffer = py::isinstance<PyBuffer>(arg);
-    if (is_py_buffer || arg.get_type().is(device_array)) {
-      PyBuffer* buffer;
-      if (is_py_buffer) {
-        // PyBuffer necessarily has a trivial LazyExpr, no need to check it.
-        buffer = py::cast<xla::PyBuffer*>(arg);
-      } else {
-        if (!IsTrivialLazyExpr(py::getattr(arg, "_lazy_expr"))) {
-          return InvalidArgument(
-              "Non-trivial lazy expression not supported in C++. "
-              "Falling back to Python.");
-        }
-        buffer = py::cast<xla::PyBuffer*>(arg.attr("device_buffer"));
-      }
+    TF_ASSIGN_OR_RETURN(DevicePutResult on_device,
+                        DevicePut(arg, data_device, jax_enable_x64, pyclient));
 
-      if (buffer->device().contents == data_device) {
-        arg_buffers.push_back(buffer->buffer());
-      } else {
-        // source and target platforms are the same, but different device.
-        // Perform a device-to-device copy.
-        // buffers from different XLA backends are passed through the host.
-        std::unique_ptr<PjRtBuffer> copied_buffer =
-            ValueOrThrow(buffer->buffer()->CopyToDevice(data_device));
-        arg_buffers.push_back(copied_buffer.get());
-        keep_alive.emplace_back(std::move(copied_buffer));
-      }
-
-      ArgSignature sig;
-      sig.dtype = buffer->shape().element_type();
-      sig.shape.assign(buffer->shape().dimensions().begin(),
-                       buffer->shape().dimensions().end());
-      sig.weak_type = py::cast<py::bool_>(arg.attr("aval").attr("weak_type"));
-      arguments.signature.dynamic_args_signatures.push_back(std::move(sig));
-    } else if (py::isinstance<py::array>(arg)) {
-      // TODO(jblespiau): Can we improve this call? Do we need the underlying
-      // GlobalPyRefManager() and co?
-      py::array numpy_array = py::cast<py::array>(arg);
-      if (IsFloat0(numpy_array)) {
-        return InvalidArgument(
-            "float0 numpy arrays not supported in C++. "
-            "It will fallback to Python.");
-      }
-      // If jax_enable_x64 is not set, we need to coerce 32 bits types.
-      // Note that this is calling back to Python!
-      if (!jax_enable_x64) {
-        const py::dtype* to_dtype = DtypeTo32BitDtype(numpy_array.dtype());
-        if (to_dtype) {
-          numpy_array = np_array(numpy_array, *to_dtype);
-        }
-      }
-      std::unique_ptr<xla::PyBuffer> buffer =
-          ValueOrThrow(pyclient.BufferFromPyval(
-              numpy_array, data_device,
-              /*force_copy=*/false, /*host_buffer_semantics=*/
-              xla::PjRtClient::HostBufferSemantics::kZeroCopy));
-      arg_buffers.push_back(buffer->buffer());
-
-      ArgSignature sig;
-      sig.dtype = buffer->shape().element_type();
-      sig.weak_type = false;
-      sig.shape.assign(buffer->shape().dimensions().begin(),
-                       buffer->shape().dimensions().end());
-      arguments.signature.dynamic_args_signatures.push_back(sig);
-
-      keep_alive.emplace_back(std::move(buffer));
-    } else {
-      StatusOr<std::unique_ptr<xla::PjRtBuffer>> buffer =
-          ScalarToBuffer(arg, jax_enable_x64, pjrt_client, data_device);
-      if (!buffer.ok()) {
-        return buffer.status();
-      }
-      arg_buffers.push_back(buffer.ValueOrDie().get());
-      ArgSignature sig;
-      sig.dtype = buffer.ValueOrDie()->on_host_shape().element_type();
-      sig.weak_type = true;
-      arguments.signature.dynamic_args_signatures.push_back(sig);
-
-      keep_alive.emplace_back(std::move(buffer).ValueOrDie());
+    PjRtBuffer* buffer = on_device.buffer;
+    arg_buffers.push_back(buffer);
+    if (on_device.owned_buffer) {
+      keep_alive.emplace_back(std::move(on_device.owned_buffer));
     }
+
+    ArgSignature sig(buffer->on_host_shape().element_type(),
+                     buffer->on_host_shape().dimensions(), on_device.weak_type);
+    arguments.signature.dynamic_args_signatures.push_back(std::move(sig));
   }
   return Status::OK();
 }
diff --git a/tensorflow/compiler/xla/python/jax_jit.h b/tensorflow/compiler/xla/python/jax_jit.h
index 8cb81fdfbce..c61522ff686 100644
--- a/tensorflow/compiler/xla/python/jax_jit.h
+++ b/tensorflow/compiler/xla/python/jax_jit.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include "absl/strings/str_join.h"
 #include "pybind11/pybind11.h"
 #include "tensorflow/compiler/xla/pjrt/pjrt_client.h"
+#include "tensorflow/compiler/xla/python/py_client.h"
 #include "tensorflow/compiler/xla/python/pytree.h"
 #include "tensorflow/compiler/xla/xla_data.pb.h"
 
@@ -27,12 +28,15 @@ namespace xla {
 
 // Describes the abstract shape and dtype of an argument.
 struct ArgSignature {
+  ArgSignature(PrimitiveType dtype, absl::Span<const int64> shape,
+               bool weak_type)
+      : dtype(dtype), shape(shape.begin(), shape.end()), weak_type(weak_type) {}
   // This is the XLA dtype of the object.
-  xla::PrimitiveType dtype;
+  const PrimitiveType dtype;
+  const absl::InlinedVector<int64, 4> shape;
   // JAX arguments can be of weak type, if and only if they are Python scalars
   // or `DeviceArray` values such that `aval.weak_type` is true.
-  bool weak_type;
-  absl::InlinedVector<int64, 4> shape;
+  const bool weak_type;
   bool operator==(const ArgSignature& other) const {
     return std::tie(dtype, weak_type, shape) ==
            std::tie(other.dtype, other.weak_type, other.shape);
@@ -105,6 +109,31 @@ H AbslHashValue(H h, const CallSignature::KwargEntry& kw) {
 template <typename H>
 H AbslHashValue(H h, const CallSignature& s);
 
+struct DevicePutResult {
+  explicit DevicePutResult(PjRtBuffer* b, bool weak_type)
+      : buffer(b), weak_type(weak_type), owned_buffer(nullptr) {}
+  DevicePutResult(std::unique_ptr<PjRtBuffer> new_buffer, bool weak_type)
+      : buffer(new_buffer.get()),
+        weak_type(weak_type),
+        owned_buffer(std::move(new_buffer)) {}
+
+  PjRtBuffer* buffer;
+  bool weak_type;
+  std::unique_ptr<PjRtBuffer> owned_buffer;
+};
+
+// Moves a device-like object to be on device.
+// - If the object is already on device, `owned_buffer` will be nullptr.
+// - If it's not, a new buffer will be created and returned using
+//   `owned_buffer`.
+// In all cases, `buffer` will point to the already existing or newly created
+// buffer.
+// If `obj` is not convertible to a `PjRtBuffer` from C++, an error will be
+// returned; float0 dtype and `_DeviceArray` with non-trivial LazyExpr are not
+// supported yet.
+StatusOr<DevicePutResult> DevicePut(pybind11::handle obj, PjRtDevice* to_device,
+                                    bool jax_enable_x64, PyClient& pyclient);
+
 // The function to call in `xla.cc` to add the bindings for this module.
 void BuildJaxjitSubmodule(pybind11::module& m);
 
diff --git a/tensorflow/compiler/xla/python/py_client.cc b/tensorflow/compiler/xla/python/py_client.cc
index a5638f973df..7d384724b7a 100644
--- a/tensorflow/compiler/xla/python/py_client.cc
+++ b/tensorflow/compiler/xla/python/py_client.cc
@@ -88,7 +88,7 @@ PyClient::GetDefaultDeviceAssignment1D(int num_replicas) {
   return result;
 }
 
-StatusOr<std::unique_ptr<PyBuffer>> PyClient::BufferFromPyval(
+StatusOr<std::unique_ptr<PjRtBuffer>> PyClient::PjRtBufferFromPyval(
     const pybind11::object& argument, PjRtDevice* device, bool force_copy,
     PjRtClient::HostBufferSemantics host_buffer_semantics) {
   if (device == nullptr) {
@@ -120,6 +120,15 @@ StatusOr<std::unique_ptr<PyBuffer>> PyClient::BufferFromPyval(
                                     c->buf_ptr, c->shape, host_buffer_semantics,
                                     std::move(py_buffer_ref), device));
   }
+  return buffer;
+}
+StatusOr<std::unique_ptr<PyBuffer>> PyClient::BufferFromPyval(
+    const pybind11::object& argument, PjRtDevice* device, bool force_copy,
+    PjRtClient::HostBufferSemantics host_buffer_semantics) {
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<PjRtBuffer> buffer,
+      PjRtBufferFromPyval(argument, device, force_copy, host_buffer_semantics));
+
   auto traceback = Traceback::Get();
   return std::make_unique<PyBuffer>(shared_from_this(), std::move(buffer),
                                     std::move(traceback));
diff --git a/tensorflow/compiler/xla/python/py_client.h b/tensorflow/compiler/xla/python/py_client.h
index 158171b83c7..f2690fdf6c4 100644
--- a/tensorflow/compiler/xla/python/py_client.h
+++ b/tensorflow/compiler/xla/python/py_client.h
@@ -123,6 +123,9 @@ class PyClient : public std::enable_shared_from_this<PyClient> {
     return pjrt_client_->CreateHostToDeviceChannelHandle();
   }
 
+  StatusOr<std::unique_ptr<PjRtBuffer>> PjRtBufferFromPyval(
+      const pybind11::object& argument, PjRtDevice* device, bool force_copy,
+      PjRtClient::HostBufferSemantics host_buffer_semantics);
   StatusOr<std::unique_ptr<PyBuffer>> BufferFromPyval(
       const pybind11::object& argument, PjRtDevice* device, bool force_copy,
       PjRtClient::HostBufferSemantics host_buffer_semantics);

From 834cadde2ee5f7e935e05f8e836acde3337b3f98 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <jurahul@google.com>
Date: Mon, 14 Dec 2020 09:10:50 -0800
Subject: [PATCH 41/60] [XLA:GPU] Eliminate support for
 Convolution/GEMM/Choleksy from ThunkEmitter

- Since these have been migrated to MLIR, we don't need to support them in the
  ThunkEmitter

PiperOrigin-RevId: 347397940
Change-Id: I31c088008b1f4fdad82ead96313fb92324c206fe
---
 .../compiler/xla/service/gpu/thunk_emitter.cc | 76 -------------------
 1 file changed, 76 deletions(-)

diff --git a/tensorflow/compiler/xla/service/gpu/thunk_emitter.cc b/tensorflow/compiler/xla/service/gpu/thunk_emitter.cc
index 058aad76777..215fdb56ce8 100644
--- a/tensorflow/compiler/xla/service/gpu/thunk_emitter.cc
+++ b/tensorflow/compiler/xla/service/gpu/thunk_emitter.cc
@@ -298,83 +298,7 @@ Status ThunkEmitter::HandleCustomCall(HloInstruction* custom_call) {
     return Status::OK();
   }
 
-  if (IsCustomCallToDnnConvolution(*custom_call)) {
-    std::vector<BufferAllocation::Slice> operand_slices;
-    operand_slices.reserve(custom_call->operand_count());
-    for (const auto* operand : custom_call->operands()) {
-      operand_slices.push_back(GetAllocationSlice(*operand));
-    }
-    auto conv_result_slice = GetAllocationSlice(*custom_call, {0});
-    auto scratch_slice = GetAllocationSlice(*custom_call, {1});
-
-    // Assert that the tuple slice is not used by anyone directly. That is, all
-    // users of the tuple output are get-tuple-element. Also assert that the
-    // second element of the tuple (the scratch buffer) is not used by anyone.
-    for (const HloInstruction* user : custom_call->users()) {
-      TF_RET_CHECK(user->opcode() == HloOpcode::kGetTupleElement &&
-                   user->tuple_index() == 0);
-    }
-
-    TF_ASSIGN_OR_RETURN(
-        GpuConvConfig config,
-        GetGpuConvConfig(Cast<HloCustomCallInstruction>(custom_call)));
-    AddThunkToThunkSequence(absl::make_unique<ConvolutionThunk>(
-        context_->GetThunkInfo(custom_call), std::move(config),
-        std::move(operand_slices), conv_result_slice, scratch_slice));
-    return Status::OK();
-  }
-
-  if (IsCublasGemm(*custom_call)) {
-    AddThunkToThunkSequence(BuildGemmThunk(custom_call));
-    return Status::OK();
-  }
-
 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA)
-  if (custom_call->custom_call_target() == kCusolverCholeskyCallTarget) {
-    TF_ASSIGN_OR_RETURN(CholeskyOptions options,
-                        custom_call->backend_config<CholeskyOptions>());
-
-    const Shape& shape = custom_call->operand(0)->shape();
-    int ndim = shape.dimensions_size();
-    CHECK_GE(ndim, 2);
-    int64 n = shape.dimensions(ndim - 1);
-
-    const auto& dims = shape.dimensions();
-    int64 batch_size = std::accumulate(dims.begin(), dims.end() - 2, int64{1},
-                                       [](int64 a, int64 b) { return a * b; });
-
-    auto operand_buffer = GetAllocationSlice(*custom_call->operand(0));
-
-    auto a_buffer = GetAllocationSlice(*custom_call, {0});
-    auto workspace_buffer = GetAllocationSlice(*custom_call, {1});
-    auto info_buffer = GetAllocationSlice(*custom_call, {2});
-
-    std::vector<std::unique_ptr<Thunk>> thunks;
-
-    if (operand_buffer != a_buffer) {
-      thunks.push_back(absl::make_unique<DeviceToDeviceCopyThunk>(
-          context_->GetThunkInfo(custom_call),
-          /*source_address=*/operand_buffer,
-          /*destination_buffer=*/a_buffer,
-          /*mem_size=*/ShapeUtil::ByteSizeOf(shape)));
-    }
-
-    thunks.push_back(absl::make_unique<CholeskyThunk>(
-        context_->GetThunkInfo(custom_call), options, a_buffer,
-        workspace_buffer, info_buffer,
-        custom_call->operand(0)->shape().element_type(), batch_size, n));
-
-    // Elide the sequential thunk if there's no copy.
-    if (thunks.size() == 1) {
-      AddThunkToThunkSequence(std::move(thunks[0]));
-    } else {
-      AddThunkToThunkSequence(absl::make_unique<SequentialThunk>(
-          context_->GetThunkInfo(custom_call), std::move(thunks)));
-    }
-
-    return Status::OK();
-  }
-
   if (void* call_target = CustomCallTargetRegistry::Global()->Lookup(
           custom_call->custom_call_target(), std::string(platform_name()))) {
     auto get_slices_for_instr = [&](const HloInstruction* instr) {

From a12a2aa51973686418e0c4525d8f55ed72f5aa3b Mon Sep 17 00:00:00 2001
From: Karim Nosir <karimnosseir@google.com>
Date: Mon, 14 Dec 2020 09:42:11 -0800
Subject: [PATCH 42/60] Enhance error message when model fails to convert
 because of unsupported ops which can be handled using TF SELECT or require
 custom op implementation.

PiperOrigin-RevId: 347404307
Change-Id: Ie56cfae6a72be39b50291b13550296476ab37e3a
---
 tensorflow/compiler/mlir/lite/flatbuffer_export.cc       | 9 +++++----
 .../mlir/lite/tests/mlir2flatbuffer/disable_custom.mlir  | 2 +-
 .../mlir/lite/tests/mlir2flatbuffer/disable_flex.mlir    | 2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc
index b9a1c4737d3..4a72e40491f 100644
--- a/tensorflow/compiler/mlir/lite/flatbuffer_export.cc
+++ b/tensorflow/compiler/mlir/lite/flatbuffer_export.cc
@@ -1759,13 +1759,14 @@ Optional<std::string> Translator::TranslateInternal() {
     std::string err;
     if (!failed_flex_ops_.empty())
       err +=
-          "Ops that can be supported by the flex runtime (enabled via setting "
-          "the -emit-select-tf-ops flag):\n" +
+          "Some ops are not supported by the native TFLite runtime, you can "
+          "enable TF kernels fallback using TF Select. See instructions: "
+          "https://www.tensorflow.org/lite/guide/ops_select" +
           failed_flex_ops_summary;
     if (!failed_custom_ops_.empty())
       err +=
-          "Ops that need custom implementation (enabled via setting the "
-          "-emit-custom-ops flag):\n" +
+          "Some ops in the model are custom ops, See instructions to implement "
+          "custom ops: https://www.tensorflow.org/lite/guide/ops_custom" +
           failed_custom_ops_summary;
 
     auto& failed_region = named_regions[first_failed_func];
diff --git a/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/disable_custom.mlir b/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/disable_custom.mlir
index 1be7db1d69c..913e128e697 100644
--- a/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/disable_custom.mlir
+++ b/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/disable_custom.mlir
@@ -2,7 +2,7 @@
 
 // CHECK: error: 'tf.MyCustomOp' op is neither a custom op nor a flex op
 // CHECK: error: failed while converting: 'main'
-// CHECK: Ops that need custom implementation (enabled via setting the -emit-custom-ops flag):
+// CHECK: Some ops in the model are custom ops, See instructions to implement
 // CHECK: tf.MyCustomOp {name = "MyCustomOp"}
 
 func @main(tensor<4xf32>) -> tensor<4xf32> {
diff --git a/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/disable_flex.mlir b/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/disable_flex.mlir
index e767dc0e686..8e36c5266ce 100644
--- a/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/disable_flex.mlir
+++ b/tensorflow/compiler/mlir/lite/tests/mlir2flatbuffer/disable_flex.mlir
@@ -2,7 +2,7 @@
 
 // CHECK: error: 'tf.Div' op is neither a custom op nor a flex op
 // CHECK: error: failed while converting: 'main'
-// CHECK: Ops that can be supported by the flex runtime (enabled via setting the -emit-select-tf-ops flag):
+// CHECK: Some ops are not supported by the native TFLite runtime
 // CHECK: tf.Div {name = "div"}
 
 func @main(tensor<4xf32>) -> tensor<4xf32> {

From 3acbbe9df69eb707169445fff620693722d067e9 Mon Sep 17 00:00:00 2001
From: Michael Banfield <micban@google.com>
Date: Mon, 14 Dec 2020 10:01:39 -0800
Subject: [PATCH 43/60] Add initial TpuTracer.

PiperOrigin-RevId: 347408830
Change-Id: I729103e841c33d155e7e1e9dafac6c20707f877e
---
 tensorflow/core/profiler/internal/tpu/BUILD   |  29 +++++
 .../core/profiler/internal/tpu/tpu_tracer.cc  | 120 ++++++++++++++++++
 tensorflow/core/tpu/BUILD                     |   2 +
 tensorflow/core/tpu/tpu_library_init_fns.inc  |   6 +
 tensorflow/core/tpu/tpu_ops_c_api.h           |  24 ++++
 tensorflow/stream_executor/tpu/BUILD          |   1 +
 6 files changed, 182 insertions(+)
 create mode 100644 tensorflow/core/profiler/internal/tpu/BUILD
 create mode 100644 tensorflow/core/profiler/internal/tpu/tpu_tracer.cc

diff --git a/tensorflow/core/profiler/internal/tpu/BUILD b/tensorflow/core/profiler/internal/tpu/BUILD
new file mode 100644
index 00000000000..e76e7e985c5
--- /dev/null
+++ b/tensorflow/core/profiler/internal/tpu/BUILD
@@ -0,0 +1,29 @@
+load("//tensorflow/core/platform:rules_cc.bzl", "cc_library")
+load("//tensorflow/core/profiler/builds:build_config.bzl", "tf_profiler_copts")
+
+package(
+    default_visibility = ["//tensorflow:internal"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+cc_library(
+    name = "tpu_tracer",
+    srcs = ["tpu_tracer.cc"],
+    copts = tf_profiler_copts(),
+    deps = [
+        "//tensorflow/core:lib",
+        "//tensorflow/core:protos_all_cc",
+        "//tensorflow/core/profiler:profiler_options_proto_cc",
+        "//tensorflow/core/profiler/lib:profiler_factory",
+        "//tensorflow/core/profiler/lib:profiler_interface",
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
+        "//tensorflow/core/profiler/utils:time_utils",
+        "//tensorflow/core/profiler/utils:xplane_schema",
+        "//tensorflow/core/profiler/utils:xplane_utils",
+        "//tensorflow/core/tpu:tpu_api",
+        "//tensorflow/core/tpu:tpu_ops_c_api_hdrs",
+        "//tensorflow/stream_executor/tpu:status_helper",
+        "@com_google_absl//absl/strings",
+    ],
+    alwayslink = True,
+)
diff --git a/tensorflow/core/profiler/internal/tpu/tpu_tracer.cc b/tensorflow/core/profiler/internal/tpu/tpu_tracer.cc
new file mode 100644
index 00000000000..e4cf245413d
--- /dev/null
+++ b/tensorflow/core/profiler/internal/tpu/tpu_tracer.cc
@@ -0,0 +1,120 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "tensorflow/core/framework/step_stats.pb.h"
+#include "tensorflow/core/platform/errors.h"
+#include "tensorflow/core/platform/status.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/profiler/lib/profiler_factory.h"
+#include "tensorflow/core/profiler/lib/profiler_interface.h"
+#include "tensorflow/core/profiler/profiler_options.pb.h"
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
+#include "tensorflow/core/profiler/utils/xplane_schema.h"
+#include "tensorflow/core/tpu/tpu_api.h"
+#include "tensorflow/core/tpu/tpu_ops_c_api.h"
+#include "tensorflow/stream_executor/tpu/status_helper.h"
+
+namespace tensorflow {
+namespace profiler {
+namespace {
+
+// Tpu implementation of ProfilerInterface.
+//
+// Thread-safety: This class is go/thread-compatible.
+class TpuTracer : public ProfilerInterface {
+ public:
+  explicit TpuTracer();
+  ~TpuTracer() override;
+
+  Status Start() override;
+
+  Status Stop() override;
+
+  // Unsupported.
+  Status CollectData(RunMetadata* run_metadata) override;
+
+  Status CollectData(XSpace* space) override;
+
+ private:
+  TpuProfiler* tpu_profiler_;
+};
+
+TpuTracer::TpuTracer() {
+  tpu_profiler_ = tpu::OpsApiFn()->TpuProfiler_CreateFn();
+}
+
+TpuTracer::~TpuTracer() { tpu::OpsApiFn()->TpuProfiler_FreeFn(tpu_profiler_); }
+
+Status TpuTracer::Start() {
+  StatusHelper status;
+  tpu::OpsApiFn()->TpuProfiler_StartFn(tpu_profiler_, status.c_status);
+  if (!status.ok()) {
+    VLOG(1) << "Run Start failed.";
+    return status.status();
+  }
+  return Status::OK();
+}
+
+Status TpuTracer::Stop() {
+  StatusHelper status;
+  tpu::OpsApiFn()->TpuProfiler_StopFn(tpu_profiler_, status.c_status);
+  if (!status.ok()) {
+    VLOG(1) << "Run Stop failed.";
+    return status.status();
+  }
+  return Status::OK();
+}
+
+Status TpuTracer::CollectData(RunMetadata* run_metadata) {
+  // Unsupported
+  return Status::OK();
+}
+
+Status TpuTracer::CollectData(XSpace* space) {
+  StatusHelper status;
+  tpu::OpsApiFn()->TpuProfiler_CollectDataFn(tpu_profiler_, status.c_status,
+                                             space);
+  if (!status.ok()) {
+    VLOG(1) << "Run CollectData failed.";
+    return status.status();
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
+// Not in anonymous namespace for testing purposes.
+std::unique_ptr<ProfilerInterface> CreateTpuTracer(
+    const ProfileOptions& options) {
+  if (options.device_type() != ProfileOptions::TPU &&
+      options.device_type() != ProfileOptions::UNSPECIFIED) {
+    return nullptr;
+  }
+  return absl::make_unique<TpuTracer>();
+}
+
+auto register_host_tracer_factory = [] {
+  RegisterProfilerFactory(&CreateTpuTracer);
+  return 0;
+}();
+
+}  // namespace profiler
+}  // namespace tensorflow
diff --git a/tensorflow/core/tpu/BUILD b/tensorflow/core/tpu/BUILD
index 1235821e80c..cd288bfce00 100644
--- a/tensorflow/core/tpu/BUILD
+++ b/tensorflow/core/tpu/BUILD
@@ -116,6 +116,7 @@ cc_library(
     name = "tpu_api",
     srcs = ["tpu_api.cc"],
     hdrs = ["tpu_api.h"],
+    visibility = ["//visibility:public"],
     deps = [
         ":libtftpu_header",
         ":tpu_executor_api",
@@ -344,6 +345,7 @@ cc_library(
     visibility = ["//visibility:public"],
     deps = [
         ":libtftpu_header",
+        "//tensorflow/core/profiler/protobuf:xplane_proto_cc",
         "//tensorflow/stream_executor/tpu:c_api_decl",
         "//tensorflow/stream_executor/tpu:proto_helper",
     ],
diff --git a/tensorflow/core/tpu/tpu_library_init_fns.inc b/tensorflow/core/tpu/tpu_library_init_fns.inc
index ef4d7ba55ed..0b984fa2a75 100644
--- a/tensorflow/core/tpu/tpu_library_init_fns.inc
+++ b/tensorflow/core/tpu/tpu_library_init_fns.inc
@@ -70,6 +70,12 @@ tensorflow::Status SetTpuOpsStructFns(void* library_handle) {
   TFTPU_SET_FN(ops_api_fn, TpuCompile_CreateCompilationCacheKey);
   TFTPU_SET_FN(ops_api_fn, TpuCompile_DestroyCompilationCacheKey);
   TFTPU_SET_FN(ops_api_fn, TpuCompile_CreateGuaranteedConstFingerprint);
+  
+  TFTPU_SET_FN(ops_api_fn, TpuProfiler_Create);
+  TFTPU_SET_FN(ops_api_fn, TpuProfiler_Free);
+  TFTPU_SET_FN(ops_api_fn, TpuProfiler_Start);
+  TFTPU_SET_FN(ops_api_fn, TpuProfiler_Stop);
+  TFTPU_SET_FN(ops_api_fn, TpuProfiler_CollectData);
 
   return tensorflow::Status::OK();
 }
diff --git a/tensorflow/core/tpu/tpu_ops_c_api.h b/tensorflow/core/tpu/tpu_ops_c_api.h
index 77e5ddb406c..f361110f975 100644
--- a/tensorflow/core/tpu/tpu_ops_c_api.h
+++ b/tensorflow/core/tpu/tpu_ops_c_api.h
@@ -19,6 +19,7 @@ limitations under the License.
 
 #include <cstdint>
 
+#include "tensorflow/core/profiler/protobuf/xplane.pb.h"
 #include "tensorflow/core/tpu/libtftpu.h"
 #include "tensorflow/stream_executor/tpu/c_api_decl.h"
 #include "tensorflow/stream_executor/tpu/proto_helper.h"
@@ -53,6 +54,8 @@ struct HostComputeMetadataSerializedProto {
 
 typedef struct XLA_TpuMeshState XLA_TpuMeshState;
 
+typedef struct TpuProfiler TpuProfiler;
+
 typedef struct XLA_DeviceAssignment {
   const char* bytes;
   size_t size;
@@ -103,6 +106,21 @@ TFTPU_CAPI_EXPORT void TpuCompile_XrtCompileAndBuild(
     TpuSerializedProto xrt_computation, const XLA_TpuMeshState* mesh_state,
     XLA_TpuProgram** tpu_programs[], size_t* count, TF_Status* status);
 
+// Creates a new TPU profiler object.
+TFTPU_CAPI_EXPORT TpuProfiler* TpuProfiler_Create();
+
+TFTPU_CAPI_EXPORT TpuProfiler* TpuProfiler_Free(TpuProfiler* tpu_profiler);
+
+TFTPU_CAPI_EXPORT void TpuProfiler_Start(TpuProfiler* tpu_profiler,
+                                         TF_Status* status);
+
+TFTPU_CAPI_EXPORT void TpuProfiler_Stop(TpuProfiler* tpu_profiler,
+                                        TF_Status* status);
+
+TFTPU_CAPI_EXPORT void TpuProfiler_CollectData(
+    TpuProfiler* tpu_profiler, TF_Status* status,
+    tensorflow::profiler::XSpace* space);
+
 // Creates a new TPU mesh state object.
 TFTPU_CAPI_EXPORT XLA_TpuMeshState* TpuMeshState_Create();
 
@@ -397,6 +415,12 @@ struct TfTpu_OpsApiFn {
   TFTPU_ADD_FN_IN_STRUCT(TpuMeshState_Free);
   TFTPU_ADD_FN_IN_STRUCT(TpuMeshState_MeshCommonState);
 
+  TFTPU_ADD_FN_IN_STRUCT(TpuProfiler_Create);
+  TFTPU_ADD_FN_IN_STRUCT(TpuProfiler_Free);
+  TFTPU_ADD_FN_IN_STRUCT(TpuProfiler_Start);
+  TFTPU_ADD_FN_IN_STRUCT(TpuProfiler_Stop);
+  TFTPU_ADD_FN_IN_STRUCT(TpuProfiler_CollectData);
+
   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_LoadProgramAndEnqueueToStream);
   TFTPU_ADD_FN_IN_STRUCT(HardwareLayout_HostShapeToDeviceShape);
   TFTPU_ADD_FN_IN_STRUCT(HardwareLayout_ShapeSize);
diff --git a/tensorflow/stream_executor/tpu/BUILD b/tensorflow/stream_executor/tpu/BUILD
index b8f7582e0a8..1a7e130f9ac 100644
--- a/tensorflow/stream_executor/tpu/BUILD
+++ b/tensorflow/stream_executor/tpu/BUILD
@@ -5,6 +5,7 @@ load("//tensorflow/core/platform:rules_cc.bzl", "cc_library")
 package(
     default_visibility = [
         "//learning/brain/experimental/dtensor:__subpackages__",
+        "//tensorflow/core/profiler/internal/tpu:__subpackages__",
         "//tensorflow/core/tpu:__subpackages__",
     ],
     licenses = ["notice"],  # Apache 2.0

From c7a83701c595f4343a300cb849a2dcb7aa6fdc31 Mon Sep 17 00:00:00 2001
From: Kibeom Kim <kkb@google.com>
Date: Mon, 14 Dec 2020 10:11:34 -0800
Subject: [PATCH 44/60] Add penpornk@ as a /tensorflow/core/kernels/mkl/
 reviewer

PiperOrigin-RevId: 347411463
Change-Id: Iff5198a1e1f646b10113e98ea625e90291e89c62
---
 CODEOWNERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CODEOWNERS b/CODEOWNERS
index 9de1922a262..3b0565b3e4a 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -3,6 +3,8 @@
 /tensorflow/c/eager @qqfish @kkimdev
 /tensorflow/core/common_runtime/eager @qqfish @kkimdev
 /tenosrflow/core/debug @caisq
+/tensorflow/core/kernels/mkl/ @penpornk
+/tensorflow/core/kernels/sparse/ @penpornk
 /tensorflow/core/nccl/ @azaks2 @chsigg
 /tensorflow/core/platform/windows/ @mihaimaruseac
 /tensorflow/lite/experimental/micro @petewarden @advaitjain

From 26f038dcefb788b08990045126111d4305463744 Mon Sep 17 00:00:00 2001
From: Advait Jain <advaitjain@users.noreply.github.com>
Date: Mon, 14 Dec 2020 10:23:32 -0800
Subject: [PATCH 45/60] clang-format and fix the build due to missing include.

---
 tensorflow/lite/micro/kernels/quantize.cc        | 3 ++-
 tensorflow/lite/micro/kernels/quantize.h         | 2 +-
 tensorflow/lite/micro/kernels/quantize_common.cc | 3 +--
 tensorflow/lite/micro/kernels/xtensa/quantize.cc | 3 ++-
 4 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/tensorflow/lite/micro/kernels/quantize.cc b/tensorflow/lite/micro/kernels/quantize.cc
index 5f71da92029..f62addbb776 100644
--- a/tensorflow/lite/micro/kernels/quantize.cc
+++ b/tensorflow/lite/micro/kernels/quantize.cc
@@ -13,12 +13,13 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/lite/micro/kernels/quantize.h"
+
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"
 #include "tensorflow/lite/micro/kernels/kernel_util.h"
-#include "tensorflow/lite/micro/kernels/quantize.h"
 #include "tensorflow/lite/micro/micro_utils.h"
 
 namespace tflite {
diff --git a/tensorflow/lite/micro/kernels/quantize.h b/tensorflow/lite/micro/kernels/quantize.h
index 517697e16c7..aefe62446a8 100644
--- a/tensorflow/lite/micro/kernels/quantize.h
+++ b/tensorflow/lite/micro/kernels/quantize.h
@@ -15,8 +15,8 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_
 #define TENSORFLOW_LITE_MICRO_KERNELS_QUANTIZE_H_
 
-#include "tensorflow/lite/c/builtin_op_data.h"
 #include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/kernels/internal/types.h"
 
 namespace tflite {
 
diff --git a/tensorflow/lite/micro/kernels/quantize_common.cc b/tensorflow/lite/micro/kernels/quantize_common.cc
index 6cabed6c704..2c4a8d2c604 100644
--- a/tensorflow/lite/micro/kernels/quantize_common.cc
+++ b/tensorflow/lite/micro/kernels/quantize_common.cc
@@ -27,8 +27,7 @@ namespace tflite {
 
 TfLiteStatus EvalQuantizeReference(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->user_data != nullptr);
-  auto* data =
-      static_cast<OpDataQuantizeReference*>(node->user_data);
+  auto* data = static_cast<OpDataQuantizeReference*>(node->user_data);
 
   const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0);
   TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
diff --git a/tensorflow/lite/micro/kernels/xtensa/quantize.cc b/tensorflow/lite/micro/kernels/xtensa/quantize.cc
index b552131e698..7a93492c7cf 100644
--- a/tensorflow/lite/micro/kernels/xtensa/quantize.cc
+++ b/tensorflow/lite/micro/kernels/xtensa/quantize.cc
@@ -13,11 +13,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include "tensorflow/lite/kernels/internal/reference/quantize.h"
+
 #include <xtensa/tie/xt_hifi2.h>
 
 #include "tensorflow/lite/c/common.h"
 #include "tensorflow/lite/kernels/internal/quantization_util.h"
-#include "tensorflow/lite/kernels/internal/reference/quantize.h"
 #include "tensorflow/lite/kernels/internal/reference/requantize.h"
 #include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
 #include "tensorflow/lite/kernels/kernel_util.h"

From 6433b89b4969af4d2dd6eb0531e0f0507dfaffb4 Mon Sep 17 00:00:00 2001
From: Amit Patankar <amitpatankar@google.com>
Date: Mon, 14 Dec 2020 10:24:30 -0800
Subject: [PATCH 46/60] Create BUILD files and corresponding targets for
 `tensorflow/core/platform/profile_utils/BUILD`.

PiperOrigin-RevId: 347414826
Change-Id: I5995e5959d4040d5fc1ea995c7abb4ae7408ed71
---
 tensorflow/core/platform/BUILD               | 58 +++++++------------
 tensorflow/core/platform/default/BUILD       |  7 ++-
 tensorflow/core/platform/profile_utils/BUILD | 60 ++++++++++++++++++++
 3 files changed, 84 insertions(+), 41 deletions(-)
 create mode 100644 tensorflow/core/platform/profile_utils/BUILD

diff --git a/tensorflow/core/platform/BUILD b/tensorflow/core/platform/BUILD
index 2ce21ef554c..a680ea69bda 100644
--- a/tensorflow/core/platform/BUILD
+++ b/tensorflow/core/platform/BUILD
@@ -94,10 +94,6 @@ exports_files(
         "mutex.h",
         "net.h",
         "numa.h",
-        "profile_utils/android_armv7a_cpu_utils_helper.h",
-        "profile_utils/cpu_utils.cc",
-        "profile_utils/cpu_utils.h",
-        "profile_utils/i_cpu_utils_helper.h",
         "ram_file_system.h",
         "resource_loader.h",
         "resource.h",
@@ -985,24 +981,9 @@ cc_library(
     alwayslink = 1,
 )
 
-cc_library(
+alias(
     name = "profile_utils_cpu_utils",
-    srcs = [
-        "profile_utils/android_armv7a_cpu_utils_helper.h",
-        "profile_utils/cpu_utils.cc",
-        "profile_utils/i_cpu_utils_helper.h",
-    ],
-    hdrs = [
-        "profile_utils/cpu_utils.h",
-    ],
-    copts = tf_copts(),
-    deps = [
-        ":logging",
-        ":macros",
-        ":types",
-        "@com_google_absl//absl/base",
-    ],
-    alwayslink = 1,
+    actual = "//tensorflow/core/platform/profile_utils:profile_utils_cpu_utils",
 )
 
 filegroup(
@@ -1021,13 +1002,13 @@ tf_cc_tests(
         "mutex_test.cc",
         "net_test.cc",
         "port_test.cc",
-        "profile_utils/cpu_utils_test.cc",
         "scanner_test.cc",
         "str_util_test.cc",
         "strcat_test.cc",
         "stringpiece_test.cc",
         "stringprintf_test.cc",
         "vmodule_benchmark_test.cc",
+        "//tensorflow/core/platform/profile_utils:cpu_utils_test.cc",
     ],
     create_named_test_suite = True,
     deps = [
@@ -1370,10 +1351,10 @@ filegroup(
         "numa.h",
         "path.h",
         "prefetch.h",
-        "profile_utils/android_armv7a_cpu_utils_helper.h",
-        "profile_utils/clock_cycle_profiler.h",
-        "profile_utils/cpu_utils.h",
-        "profile_utils/i_cpu_utils_helper.h",
+        "//tensorflow/core/platform/profile_utils:android_armv7a_cpu_utils_helper.h",
+        "//tensorflow/core/platform/profile_utils:clock_cycle_profiler.h",
+        "//tensorflow/core/platform/profile_utils:cpu_utils.h",
+        "//tensorflow/core/platform/profile_utils:i_cpu_utils_helper.h",
         "protobuf.h",
         "ram_file_system.h",
         "random.h",
@@ -1661,11 +1642,11 @@ filegroup(
         "platform_strings.cc",
         "platform_strings.h",
         "platform_strings_computed.h",
-        "profile_utils/android_armv7a_cpu_utils_helper.cc",
-        "profile_utils/android_armv7a_cpu_utils_helper.h",
-        "profile_utils/cpu_utils.cc",
-        "profile_utils/cpu_utils.h",
-        "profile_utils/i_cpu_utils_helper.h",
+        "//tensorflow/core/platform/profile_utils:android_armv7a_cpu_utils_helper.cc",
+        "//tensorflow/core/platform/profile_utils:android_armv7a_cpu_utils_helper.h",
+        "//tensorflow/core/platform/profile_utils:cpu_utils.cc",
+        "//tensorflow/core/platform/profile_utils:cpu_utils.h",
+        "//tensorflow/core/platform/profile_utils:i_cpu_utils_helper.h",
         "protobuf_internal.h",
         "random.cc",
         "random.h",
@@ -1683,7 +1664,6 @@ filegroup(
     srcs = glob(
         [
             "*.h",
-            "profile_utils/**/*.h",
         ],
         exclude = [
             "dynamic_annotations.h",
@@ -1700,16 +1680,18 @@ filegroup(
             "**/rocm.h",
             "**/stream_executor.h",
         ],
-    ),
+    ) + [
+        "//tensorflow/core/platform/profile_utils:android_armv7a_cpu_utils_helper.h",
+        "//tensorflow/core/platform/profile_utils:cpu_utils.h",
+        "//tensorflow/core/platform/profile_utils:i_cpu_utils_helper.h",
+        "//tensorflow/core/platform/profile_utils:clock_cycle_profiler.h",
+    ],
     visibility = ["//tensorflow/core:__pkg__"],
 )
 
-filegroup(
+alias(
     name = "legacy_lib_internal_srcs",
-    srcs = [
-        "profile_utils/android_armv7a_cpu_utils_helper.cc",
-        "profile_utils/clock_cycle_profiler.cc",
-    ],
+    actual = "//tensorflow/core/platform/profile_utils:legacy_lib_internal_srcs",
     visibility = ["//tensorflow/core:__pkg__"],
 )
 
diff --git a/tensorflow/core/platform/default/BUILD b/tensorflow/core/platform/default/BUILD
index 09ce2f2e77f..7c1148bd0c4 100644
--- a/tensorflow/core/platform/default/BUILD
+++ b/tensorflow/core/platform/default/BUILD
@@ -283,8 +283,8 @@ cc_library(
         "//tensorflow/core/platform:init_main.h",
         "//tensorflow/core/platform:mem.h",
         "//tensorflow/core/platform:numa.h",
-        "//tensorflow/core/platform:profile_utils/cpu_utils.h",
         "//tensorflow/core/platform:snappy.h",
+        "//tensorflow/core/platform/profile_utils:cpu_utils.h",
     ],
     copts = tf_copts(),
     defines = ["TF_USE_SNAPPY"] + select({
@@ -292,6 +292,7 @@ cc_library(
         "//tensorflow:with_numa_support": ["TENSORFLOW_USE_NUMA"],
         "//conditions:default": [],
     }),
+    features = ["-layering_check"],
     tags = [
         "manual",
         "no_oss",
@@ -546,8 +547,8 @@ filegroup(
         "resource.cc",
         "stacktrace.h",
         "tracing_impl.h",
-        "//tensorflow/core/platform:profile_utils/cpu_utils.h",
-        "//tensorflow/core/platform:profile_utils/i_cpu_utils_helper.h",
+        "//tensorflow/core/platform/profile_utils:cpu_utils.h",
+        "//tensorflow/core/platform/profile_utils:i_cpu_utils_helper.h",
     ],
     visibility = ["//tensorflow/core/platform:__pkg__"],
 )
diff --git a/tensorflow/core/platform/profile_utils/BUILD b/tensorflow/core/platform/profile_utils/BUILD
new file mode 100644
index 00000000000..5d900e395cf
--- /dev/null
+++ b/tensorflow/core/platform/profile_utils/BUILD
@@ -0,0 +1,60 @@
+# Description:
+# profile_utils targets.
+
+load("//tensorflow:tensorflow.bzl", "filegroup")
+load(
+    "//tensorflow/core/platform:rules_cc.bzl",
+    "cc_library",
+)
+load(
+    "//tensorflow:tensorflow.bzl",
+    "tf_copts",  # @unused
+)
+
+package(
+    default_visibility = [
+        "//tensorflow/core:__pkg__",
+        "//tensorflow/core/default:__pkg__",
+        "//tensorflow/core/platform:__pkg__",
+    ],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+exports_files(srcs = [
+    "android_armv7a_cpu_utils_helper.cc",
+    "android_armv7a_cpu_utils_helper.h",
+    "clock_cycle_profiler.h",
+    "cpu_utils.cc",
+    "cpu_utils.h",
+    "cpu_utils_test.cc",
+    "i_cpu_utils_helper.h",
+])
+
+filegroup(
+    name = "legacy_lib_internal_srcs",
+    srcs = [
+        "android_armv7a_cpu_utils_helper.cc",
+        "clock_cycle_profiler.cc",
+    ],
+    visibility = ["//tensorflow/core/platform:__pkg__"],
+)
+
+cc_library(
+    name = "profile_utils_cpu_utils",
+    srcs = [
+        "android_armv7a_cpu_utils_helper.h",
+        "cpu_utils.cc",
+        "i_cpu_utils_helper.h",
+    ],
+    hdrs = [
+        "cpu_utils.h",
+    ],
+    copts = tf_copts(),
+    deps = [
+        "//tensorflow/core/platform:logging",
+        "//tensorflow/core/platform:macros",
+        "//tensorflow/core/platform:types",
+        "@com_google_absl//absl/base",
+    ],
+    alwayslink = 1,
+)

From 0495c4af6e1e6202c0f6aa75661afebe0823c5b8 Mon Sep 17 00:00:00 2001
From: Hanhan Wang <hanchung@google.com>
Date: Mon, 14 Dec 2020 10:46:04 -0800
Subject: [PATCH 47/60] [NFC] Make function names follow style guide.

Functions should start with a capital letter and have a capital letter for each
new word. See https://google.github.io/styleguide/cppguide.html#Function_Names

PiperOrigin-RevId: 347420402
Change-Id: I7bdec384aba39675d5315972396d73920c87d48a
---
 .../mhlo/transforms/legalize_to_linalg.cc     | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_linalg.cc b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_linalg.cc
index 9beda1388bd..56a1ea1430b 100644
--- a/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_linalg.cc
+++ b/tensorflow/compiler/mlir/hlo/lib/Dialect/mhlo/transforms/legalize_to_linalg.cc
@@ -49,17 +49,17 @@ SmallVector<StringRef, 3> GetNParallelLoopsAttrs(unsigned nParallelLoops) {
 }
 
 template <bool isLHLO = true>
-Value getResultValue(Operation* op) {
+Value GetResultValue(Operation* op) {
   return isLHLO ? op->getOperand(op->getNumOperands() - 1) : op->getResult(0);
 }
 
 template <bool isLHLO = true>
-ShapedType getHloOpResultType(Operation* op) {
-  return getResultValue<isLHLO>(op).getType().template cast<ShapedType>();
+ShapedType GetHloOpResultType(Operation* op) {
+  return GetResultValue<isLHLO>(op).getType().template cast<ShapedType>();
 }
 
 template <bool isLHLO = true>
-bool verifyHloOpBufferOrTensorSemantics(Operation* op) {
+bool VerifyHloOpBufferOrTensorSemantics(Operation* op) {
   auto verify_type = [&](Value val) -> bool {
     return (isLHLO && val.getType().isa<MemRefType>()) ||
            (!isLHLO && val.getType().isa<RankedTensorType>());
@@ -293,8 +293,8 @@ class DataMovementOpConverter : public OpConversionPattern<OpTy> {
   LogicalResult matchAndRewrite(
       OpTy op, ArrayRef<Value> args,
       ConversionPatternRewriter& rewriter) const final {
-    if (!verifyHloOpBufferOrTensorSemantics<isLHLO>(op)) return failure();
-    auto result_type = getHloOpResultType<isLHLO>(op);
+    if (!VerifyHloOpBufferOrTensorSemantics<isLHLO>(op)) return failure();
+    auto result_type = GetHloOpResultType<isLHLO>(op);
 
     SmallVector<AffineMap, 2> indexing_maps =
         Derived::getIndexingMaps(op, &rewriter);
@@ -331,7 +331,7 @@ class BroadcastConverter
     ShapedType input_type =
         broadcast_op.operand().getType().template cast<ShapedType>();
     unsigned input_rank = input_type.getRank();
-    unsigned nloops = getHloOpResultType<isLHLO>(broadcast_op).getRank();
+    unsigned nloops = GetHloOpResultType<isLHLO>(broadcast_op).getRank();
 
     // BroadcastOp prepends the dimensions in the `broadcast_sizes` attribute to
     // the input's dimensions.
@@ -365,7 +365,7 @@ class HloBroadcastInDimConverter
 
   static SmallVector<AffineMap, 2> getIndexingMaps(
       mhlo::BroadcastInDimOp broadcast_op, Builder* b) {
-    auto result_type = getHloOpResultType<false>(broadcast_op);
+    auto result_type = GetHloOpResultType<false>(broadcast_op);
     auto operand_type =
         broadcast_op.operand().getType().template cast<ShapedType>();
     unsigned nloops = result_type.getRank();
@@ -563,7 +563,7 @@ class TransposeConverter
                                 isLHLO>::DataMovementOpConverter;
   static SmallVector<AffineMap, 2> getIndexingMaps(OpTy op, Builder* b) {
     auto result_type =
-        getHloOpResultType<isLHLO>(op).template cast<ShapedType>();
+        GetHloOpResultType<isLHLO>(op).template cast<ShapedType>();
     auto nloops = result_type.getRank();
     SmallVector<AffineExpr, 2> input_exprs;
     input_exprs.resize(result_type.getRank());
@@ -587,11 +587,11 @@ class ReshapeOpConverter : public OpConversionPattern<OpTy> {
   LogicalResult matchAndRewrite(
       OpTy reshape_op, ArrayRef<Value> args,
       ConversionPatternRewriter& rewriter) const final {
-    if (!verifyHloOpBufferOrTensorSemantics<isLHLO>(reshape_op))
+    if (!VerifyHloOpBufferOrTensorSemantics<isLHLO>(reshape_op))
       return failure();
     ShapedType operand_type =
         reshape_op.operand().getType().template cast<ShapedType>();
-    ShapedType result_type = getHloOpResultType<isLHLO>(reshape_op);
+    ShapedType result_type = GetHloOpResultType<isLHLO>(reshape_op);
 
     if (!operand_type.hasStaticShape() || !result_type.hasStaticShape())
       return failure();
@@ -696,7 +696,7 @@ class IotaConverter : public OpConversionPattern<OpTy> {
   LogicalResult matchAndRewrite(
       OpTy iota_op, ArrayRef<Value> args,
       ConversionPatternRewriter& rewriter) const final {
-    ShapedType result_shaped_type = getHloOpResultType<isLHLO>(iota_op);
+    ShapedType result_shaped_type = GetHloOpResultType<isLHLO>(iota_op);
     if (!result_shaped_type) return failure();
 
     auto result_element_type = result_shaped_type.getElementType();
@@ -867,7 +867,7 @@ class ReverseConverter
                                 isLHLO>::DataMovementOpConverter;
   static SmallVector<AffineMap, 2> getIndexingMaps(OpTy op, Builder* b) {
     auto result_type =
-        getHloOpResultType<isLHLO>(op).template cast<ShapedType>();
+        GetHloOpResultType<isLHLO>(op).template cast<ShapedType>();
     auto nloops = result_type.getRank();
     SmallVector<AffineExpr, 2> input_exprs;
     input_exprs.reserve(nloops);

From 3c81a306067d8148a6e37ccb51b9836766932796 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 14 Dec 2020 11:06:49 -0800
Subject: [PATCH 48/60] tape.batch_jacobian: don't make zeros with the wrong
 dtype if gradients are disconnected

Fixes #43043.

PiperOrigin-RevId: 347425143
Change-Id: I6d965b49c64319d48b3baffd0821450b1155de62
---
 tensorflow/python/eager/backprop.py      | 10 +++-------
 tensorflow/python/eager/backprop_test.py | 22 ----------------------
 2 files changed, 3 insertions(+), 29 deletions(-)

diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py
index 527b106d01e..be121bf135e 100644
--- a/tensorflow/python/eager/backprop.py
+++ b/tensorflow/python/eager/backprop.py
@@ -1344,13 +1344,9 @@ class GradientTape(object):
                                  parallel_iterations=parallel_iterations)
     new_shape = array_ops.concat([target_shape, source_shape[1:]], axis=0)
     if output is None:
-      if not experimental_use_pfor and target_row_size == 0:
-        # Since we can't actually run the loop function in this case, we don't
-        # know whether gradients are unconnected or not. We'll return a numeric
-        # tensor (with zero elements).
-        output = array_ops.zeros(new_shape, target.dtype)
-        if rewrap_as_ndarray:
-          output = np_arrays.tensor_to_ndarray(output)
+      output = array_ops.zeros(new_shape)
+      if rewrap_as_ndarray:
+        output = np_arrays.tensor_to_ndarray(output)
       return output
     else:
       output = array_ops.reshape(output,
diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py
index 417f8c132e7..0063b7f155e 100644
--- a/tensorflow/python/eager/backprop_test.py
+++ b/tensorflow/python/eager/backprop_test.py
@@ -1961,28 +1961,6 @@ class BatchJacobianTest(test.TestCase, parameterized.TestCase):
       f = def_function.function(f)
     self.assertAllEqual([1, 0, 0], array_ops.shape(f(array_ops.zeros([1, 0]))))
 
-  @parameterized.parameters((True,), (False))
-  def test_respects_disconnected_gradients(self, use_pfor):
-    @def_function.function
-    def f(x):
-      del x
-      return constant_op.constant([[1.]], dtype=dtypes.float64)
-
-    with backprop.GradientTape(persistent=True) as tape:
-      x = constant_op.constant([[2.]], dtype=dtypes.float64)
-      tape.watch(x)
-      y = f(x)
-    self.assertIsNone(tape.batch_jacobian(y, x, experimental_use_pfor=use_pfor))
-
-    with backprop.GradientTape(persistent=True) as tape:
-      x = constant_op.constant([[2.]], dtype=dtypes.float64)
-      tape.watch(x)
-      y = f(x)
-    jac = tape.batch_jacobian(y, x, unconnected_gradients='zero',
-                              experimental_use_pfor=use_pfor)
-    self.assertEqual(dtypes.float64, jac.dtype)
-    self.assertAllClose([[[0.]]], jac)
-
 
 class AggregateIndexedSlicesGradientsTest(test_util.TensorFlowTestCase):
 

From e290ea66bab72e741e4d581ec58ba6b54be1edca Mon Sep 17 00:00:00 2001
From: Andy Ly <lyandy@google.com>
Date: Mon, 14 Dec 2020 11:27:49 -0800
Subject: [PATCH 49/60] Migrate TF MLIR shape inference pass to use declarative
 pass registration instead of manually defined pass registration (NFC).

PiperOrigin-RevId: 347430171
Change-Id: Iff3e9c3a6c2ddcca1ef7351adadc2c9ba75e0d4a
---
 tensorflow/compiler/mlir/BUILD                                 | 1 +
 tensorflow/compiler/mlir/tensorflow/BUILD                      | 1 +
 tensorflow/compiler/mlir/tensorflow/transforms/passes.h        | 3 +++
 .../mlir/tensorflow/transforms/shape_inference_pass.cc         | 3 ---
 tensorflow/compiler/mlir/tensorflow/transforms/tf_passes.td    | 2 +-
 tensorflow/compiler/mlir/tf_mlir_opt_main.cc                   | 2 ++
 6 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/tensorflow/compiler/mlir/BUILD b/tensorflow/compiler/mlir/BUILD
index 15a10c31237..e0074545d33 100644
--- a/tensorflow/compiler/mlir/BUILD
+++ b/tensorflow/compiler/mlir/BUILD
@@ -79,6 +79,7 @@ cc_library(
         "//tensorflow/compiler/mlir/hlo:hlo_dialect_registration",
         "//tensorflow/compiler/mlir/lite:tensorflow_lite",
         "//tensorflow/compiler/mlir/tensorflow",
+        "//tensorflow/compiler/mlir/tensorflow:tensorflow_passes",
         "//tensorflow/compiler/mlir/tools/kernel_gen/ir:tf_framework_ops",
         "//tensorflow/core:lib",
         "@llvm-project//mlir:AllPassesAndDialectsNoRegistration",
diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD
index 82b00f303a3..301e3ba9151 100644
--- a/tensorflow/compiler/mlir/tensorflow/BUILD
+++ b/tensorflow/compiler/mlir/tensorflow/BUILD
@@ -639,6 +639,7 @@ cc_library(
         ":tensorflow_tfrt_ops_inc_gen",
         ":tensorflow_traits",
         ":tensorflow_types",
+        ":tf_pass_inc_gen",
         ":tf_saved_model_inc_gen",
         "//tensorflow/compiler/mlir/lite:validators",
         "//tensorflow/core:framework",
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h
index 748735ada9c..609fab0e30b 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h
@@ -401,6 +401,9 @@ CreateTPUCompileOpReplicationPass();
 
 }  // namespace TFTPU
 
+#define GEN_PASS_REGISTRATION
+#include "tensorflow/compiler/mlir/tensorflow/transforms/tf_passes.h.inc"
+
 }  // namespace mlir
 
 #endif  // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_PASSES_H_
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc
index 8587c1bc7ed..9d77164a906 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/shape_inference_pass.cc
@@ -36,9 +36,6 @@ class ShapeInference : public TensorFlowShapeInferencePassBase<ShapeInference> {
   }
 };
 
-PassRegistration<ShapeInference> pass(
-    "tf-shape-inference", "Simple Shape Inference on TensorFlow Dialect");
-
 }  // namespace
 
 std::unique_ptr<OperationPass<ModuleOp>> CreateTFShapeInferencePass() {
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tf_passes.td b/tensorflow/compiler/mlir/tensorflow/transforms/tf_passes.td
index 4a8076db651..6561e41adeb 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tf_passes.td
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/tf_passes.td
@@ -21,7 +21,7 @@ def TensorFlowShapeInferencePass : Pass<"tf-shape-inference", "ModuleOp"> {
   let summary = "Simple Shape Inference on TensorFlow Dialect";
   // TODO(jpienaar): Write `description`.
 
-  let constructor = "CreateTFShapeInferencePass()";
+  let constructor = "TF::CreateTFShapeInferencePass()";
 
   let options = [
     Option<"max_iterations_", "max-iterations", "int64_t", /*default=*/"10",
diff --git a/tensorflow/compiler/mlir/tf_mlir_opt_main.cc b/tensorflow/compiler/mlir/tf_mlir_opt_main.cc
index ee0241945f6..70c00efe405 100644
--- a/tensorflow/compiler/mlir/tf_mlir_opt_main.cc
+++ b/tensorflow/compiler/mlir/tf_mlir_opt_main.cc
@@ -22,6 +22,7 @@ limitations under the License.
 #include "tensorflow/compiler/mlir/init_mlir.h"
 #include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/dialect_registration.h"
+#include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
 #include "tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.h"
 #include "tensorflow/core/platform/init_main.h"
 
@@ -29,6 +30,7 @@ int main(int argc, char **argv) {
   tensorflow::InitMlir y(&argc, &argv);
 
   mlir::registerAllPasses();
+  mlir::registerTensorFlowPasses();
   mlir::mhlo::registerAllMhloPasses();
   mlir::lmhlo::registerAllLmhloPasses();
 

From 49d76d9d4be5e63b742799ea27841523170638df Mon Sep 17 00:00:00 2001
From: Prakalp Srivastava <prakalps@google.com>
Date: Mon, 14 Dec 2020 11:45:32 -0800
Subject: [PATCH 50/60] Move GuaranteeAllFuncOneUse pass prior to first TF
 shape inference pass.

This would allow Shape inference to propagate shape to/from callee prior to first shape inference.

PiperOrigin-RevId: 347434378
Change-Id: Ia367de1559c7baef275d2ce12dda9601d5972be3
---
 .../compiler/mlir/tensorflow/utils/compile_mlir_util.cc       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc
index 862ecb4a556..2bc21a21cf8 100644
--- a/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc
+++ b/tensorflow/compiler/mlir/tensorflow/utils/compile_mlir_util.cc
@@ -282,6 +282,8 @@ void CreateConvertMlirToXlaHloPipeline(
   pm.addPass(mlir::TF::CreateTFFunctionalControlFlowToRegions());
   pm.addNestedPass<mlir::FuncOp>(mlir::TF::CreateDropWhileShapeInvariantPass());
   pm.addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
+  // Guarantee all functions have one use, which enables shape inference.
+  pm.addPass(mlir::TF::CreateGuaranteeAllFuncsOneUsePass());
   // Run shape inference pass before tensorlist decomposition to get buffer
   // shape of uninitialized TensorLists.
   pm.addPass(mlir::TF::CreateTFShapeInferencePass());
@@ -292,8 +294,6 @@ void CreateConvertMlirToXlaHloPipeline(
       mlir::TFDevice::CreateDecomposeResourceOpsPass());
   pm.addPass(mlir::TF::CreatePromoteResourcesToArgsPass());
   pm.addPass(mlir::createSymbolDCEPass());
-  // Guarantee all functions have one use, which enables shape inference.
-  pm.addPass(mlir::TF::CreateGuaranteeAllFuncsOneUsePass());
   pm.addPass(mlir::TF::CreateTFShapeInferencePass());
   // TODO(b/171426148): We cannot completely remove region to functional control
   // flow conversion from this pipeline yet as it causes some unit tests to

From 37b8e9be12ee40b4a890e6ac6b2e9a4fc37960d9 Mon Sep 17 00:00:00 2001
From: Robert David <lrdx@google.com>
Date: Mon, 14 Dec 2020 11:50:13 -0800
Subject: [PATCH 51/60] Store temporaries in unique_ptr<T[]> instead of
 vector<T> while reading/writing tensors, so memory can be allocated with
 default-initialization (vector resize guarantees value-init with the default
 allocator).

PiperOrigin-RevId: 347435406
Change-Id: Ic7aa3dc71239481f34ef7fb3b7d4567cadd44b02
---
 tensorflow/lite/delegates/gpu/cl/tensor.cc    | 33 +++++++++---------
 .../gpu/metal/metal_spatial_tensor.mm         | 34 ++++++++++---------
 2 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/tensorflow/lite/delegates/gpu/cl/tensor.cc b/tensorflow/lite/delegates/gpu/cl/tensor.cc
index 5243124e77a..a30007be77e 100644
--- a/tensorflow/lite/delegates/gpu/cl/tensor.cc
+++ b/tensorflow/lite/delegates/gpu/cl/tensor.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/lite/delegates/gpu/cl/tensor.h"
 
 #include <cstring>
+#include <memory>
 
 #include "absl/strings/str_cat.h"
 #include "tensorflow/lite/delegates/gpu/cl/buffer.h"
@@ -477,18 +478,18 @@ absl::Status Tensor::WriteDataBHWDC(absl::Span<const float> in,
       shape_.b * shape_.w * shape_.h * shape_.d * aligned_channels;
 
   const size_t data_size = elements_count * SizeOf(descriptor_.data_type);
-  std::vector<float> data_f;
-  std::vector<half> data_h;
+  std::unique_ptr<float[]> data_f;
+  std::unique_ptr<half[]> data_h;
   if (descriptor_.data_type == DataType::FLOAT32) {
-    data_f.resize(elements_count);
-    data_ptr = data_f.data();
+    data_f.reset(new float[elements_count]);
+    data_ptr = data_f.get();
     DataFromBHWDC(in, shape_, descriptor_,
-                  absl::MakeSpan(data_f.data(), data_f.size()));
+                  absl::MakeSpan(data_f.get(), elements_count));
   } else {
-    data_h.resize(elements_count);
-    data_ptr = data_h.data();
+    data_h.reset(new half[elements_count]);
+    data_ptr = data_h.get();
     DataFromBHWDC(in, shape_, descriptor_,
-                  absl::MakeSpan(data_h.data(), data_h.size()));
+                  absl::MakeSpan(data_h.get(), elements_count));
   }
 
   switch (descriptor_.storage_type) {
@@ -541,14 +542,14 @@ absl::Status Tensor::ReadDataBHWDC(absl::Span<float> out,
   const int elements_count =
       shape_.b * shape_.w * shape_.h * shape_.d * aligned_channels;
   const size_t data_size = elements_count * SizeOf(descriptor_.data_type);
-  std::vector<float> data_f;
-  std::vector<half> data_h;
+  std::unique_ptr<float[]> data_f;
+  std::unique_ptr<half[]> data_h;
   if (descriptor_.data_type == DataType::FLOAT32) {
-    data_f.resize(elements_count);
-    data_ptr = data_f.data();
+    data_f.reset(new float[elements_count]);
+    data_ptr = data_f.get();
   } else {
-    data_h.resize(elements_count);
-    data_ptr = data_h.data();
+    data_h.reset(new half[elements_count]);
+    data_ptr = data_h.get();
   }
 
   switch (descriptor_.storage_type) {
@@ -568,10 +569,10 @@ absl::Status Tensor::ReadDataBHWDC(absl::Span<float> out,
   }
 
   if (descriptor_.data_type == DataType::FLOAT32) {
-    DataToBHWDC(absl::MakeConstSpan(data_f.data(), data_f.size()), shape_,
+    DataToBHWDC(absl::MakeConstSpan(data_f.get(), elements_count), shape_,
                 descriptor_, out);
   } else {
-    DataToBHWDC(absl::MakeConstSpan(data_h.data(), data_h.size()), shape_,
+    DataToBHWDC(absl::MakeConstSpan(data_h.get(), elements_count), shape_,
                 descriptor_, out);
   }
 
diff --git a/tensorflow/lite/delegates/gpu/metal/metal_spatial_tensor.mm b/tensorflow/lite/delegates/gpu/metal/metal_spatial_tensor.mm
index 376eef25359..9ae19ea63da 100644
--- a/tensorflow/lite/delegates/gpu/metal/metal_spatial_tensor.mm
+++ b/tensorflow/lite/delegates/gpu/metal/metal_spatial_tensor.mm
@@ -17,6 +17,8 @@ limitations under the License.
 
 #include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h"
 
+#include <memory>
+
 namespace tflite {
 namespace gpu {
 namespace metal {
@@ -241,18 +243,18 @@ absl::Status MetalSpatialTensor::WriteDataBHWDC(absl::Span<const float> in) {
       shape_.b * shape_.w * shape_.h * shape_.d * aligned_channels;
 
   const size_t data_size = elements_count * SizeOf(descriptor_.data_type);
-  std::vector<float> data_f;
-  std::vector<half> data_h;
+  std::unique_ptr<float[]> data_f;
+  std::unique_ptr<half[]> data_h;
   if (descriptor_.data_type == DataType::FLOAT32) {
-    data_f.resize(elements_count);
-    data_ptr = data_f.data();
+    data_f.reset(new float[elements_count]);
+    data_ptr = data_f.get();
     DataFromBHWDC(in, shape_, descriptor_,
-                  absl::MakeSpan(data_f.data(), data_f.size()));
+                  absl::MakeSpan(data_f.get(), elements_count));
   } else {
-    data_h.resize(elements_count);
-    data_ptr = data_h.data();
+    data_h.reset(new half[elements_count]);
+    data_ptr = data_h.get();
     DataFromBHWDC(in, shape_, descriptor_,
-                  absl::MakeSpan(data_h.data(), data_h.size()));
+                  absl::MakeSpan(data_h.get(), elements_count));
   }
 
   switch (descriptor_.storage_type) {
@@ -297,14 +299,14 @@ absl::Status MetalSpatialTensor::ReadDataBHWDC(absl::Span<float> out) const {
   const int elements_count =
       shape_.b * shape_.w * shape_.h * shape_.d * aligned_channels;
   const size_t data_size = elements_count * SizeOf(descriptor_.data_type);
-  std::vector<float> data_f;
-  std::vector<half> data_h;
+  std::unique_ptr<float[]> data_f;
+  std::unique_ptr<half[]> data_h;
   if (descriptor_.data_type == DataType::FLOAT32) {
-    data_f.resize(elements_count);
-    data_ptr = data_f.data();
+    data_f.reset(new float[elements_count]);
+    data_ptr = data_f.get();
   } else {
-    data_h.resize(elements_count);
-    data_ptr = data_h.data();
+    data_h.reset(new half[elements_count]);
+    data_ptr = data_h.get();
   }
 
   switch (descriptor_.storage_type) {
@@ -321,10 +323,10 @@ absl::Status MetalSpatialTensor::ReadDataBHWDC(absl::Span<float> out) const {
   }
 
   if (descriptor_.data_type == DataType::FLOAT32) {
-    DataToBHWDC(absl::MakeConstSpan(data_f.data(), data_f.size()), shape_,
+    DataToBHWDC(absl::MakeConstSpan(data_f.get(), elements_count), shape_,
                 descriptor_, out);
   } else {
-    DataToBHWDC(absl::MakeConstSpan(data_h.data(), data_h.size()), shape_,
+    DataToBHWDC(absl::MakeConstSpan(data_h.get(), elements_count), shape_,
                 descriptor_, out);
   }
 

From ebd14c2c12231b058aa27fa8afd29e5e610b236f Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jpienaar@google.com>
Date: Mon, 14 Dec 2020 11:54:22 -0800
Subject: [PATCH 52/60] Split out dialect hooks into separate targets

This allows not linking in the hooks too if, for example, one wants the TF dialect but not constant folding via fallback hook.

PiperOrigin-RevId: 347436371
Change-Id: Ib4790e8282073b7c0c6488be5faa1a245c81c372
---
 tensorflow/compiler/mlir/BUILD                |  1 +
 tensorflow/compiler/mlir/lite/BUILD           |  3 +-
 tensorflow/compiler/mlir/tensorflow/BUILD     | 43 ++++++-------------
 .../tensorflow/transforms/constant_fold.cc    |  1 +
 .../tensorflow/transforms/constant_fold.h     |  6 +--
 .../transforms/decode_attributes_hook.cc      |  1 +
 tensorflow/compiler/mlir/tfjs/BUILD           |  3 +-
 7 files changed, 22 insertions(+), 36 deletions(-)

diff --git a/tensorflow/compiler/mlir/BUILD b/tensorflow/compiler/mlir/BUILD
index e0074545d33..77db4eb43be 100644
--- a/tensorflow/compiler/mlir/BUILD
+++ b/tensorflow/compiler/mlir/BUILD
@@ -110,6 +110,7 @@ cc_library(
         "//tensorflow/compiler/mlir/tensorflow:compile_mlir_util_pass",
         "//tensorflow/compiler/mlir/tensorflow:tensorflow_passes",
         "//tensorflow/compiler/mlir/tensorflow:tensorflow_test_passes",
+        "//tensorflow/compiler/mlir/tensorflow:tf_dialect_passes",
         "//tensorflow/compiler/mlir/tensorflow:tf_legalize_hlo",
         "//tensorflow/compiler/mlir/tfjs:tensorflow_js_passes",
         "//tensorflow/compiler/mlir/tosa:tf_passes",
diff --git a/tensorflow/compiler/mlir/lite/BUILD b/tensorflow/compiler/mlir/lite/BUILD
index 72e5799c5c9..664dfe0e3ba 100644
--- a/tensorflow/compiler/mlir/lite/BUILD
+++ b/tensorflow/compiler/mlir/lite/BUILD
@@ -937,7 +937,8 @@ cc_library(
         "//tensorflow/compiler/mlir/tensorflow:decode_constant_pass",
         "//tensorflow/compiler/mlir/tensorflow:error_util",
         "//tensorflow/compiler/mlir/tensorflow:mlir_roundtrip_flags",
-        "//tensorflow/compiler/mlir/tensorflow:tf_dialect_hooks",
+        "//tensorflow/compiler/mlir/tensorflow:tf_dialect_lib",
+        "//tensorflow/compiler/mlir/tensorflow:tf_dialect_passes",
         "//tensorflow/compiler/mlir/tensorflow:translate_lib",
         "//tensorflow/core:framework",
         "//tensorflow/core:protos_all_cc",
diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD
index 301e3ba9151..c74d47404e4 100644
--- a/tensorflow/compiler/mlir/tensorflow/BUILD
+++ b/tensorflow/compiler/mlir/tensorflow/BUILD
@@ -1107,6 +1107,7 @@ cc_library(
         ":mlir_roundtrip_flags",
         ":tensorflow",
         ":tensorflow_attributes",
+        ":tensorflow_passes",
         ":tensorflow_types",
         ":tf_saved_model_passes",
         ":translate_utils",
@@ -1450,21 +1451,27 @@ cc_library(
 )
 
 cc_library(
-    name = "tf_constant_fallback_hook",
+    name = "tf_dialect_passes",
     srcs = [
         "transforms/constant_fold.cc",
+        "transforms/decode_attributes_hook.cc",
     ],
     hdrs = [
         "transforms/constant_fold.h",
     ],
     deps = [
+        ":convert_tensor",
         ":decode_constant_pass",
         ":eval_util",
         ":tensorflow",
         ":tensorflow_traits",
         ":tensorflow_types",
         "//tensorflow/c:tf_status",
+        "//tensorflow/c/eager:c_api",
+        "//tensorflow/core:framework",
         "//tensorflow/core:lib",
+        "//tensorflow/stream_executor",
+        "//tensorflow/stream_executor/lib",
         "@llvm-project//llvm:Support",
         "@llvm-project//mlir:IR",
         "@llvm-project//mlir:SideEffects",
@@ -1474,39 +1481,13 @@ cc_library(
 )
 
 cc_library(
-    name = "tf_decode_attributes_hook",
-    srcs = [
-        "transforms/decode_attributes_hook.cc",
-    ],
+    name = "tf_dialect_lib",
     deps = [
-        ":convert_tensor",
-        ":decode_constant_pass",
-        ":tensorflow",
-        "//tensorflow/core:framework",
-        "//tensorflow/stream_executor",
-        "//tensorflow/stream_executor/lib",
-        "@llvm-project//llvm:Support",
-        "@llvm-project//mlir:IR",
-        "@llvm-project//mlir:Support",
-    ],
-    alwayslink = 1,
-)
-
-cc_library(
-    name = "tf_dialect_hooks",
-    deps = [
-        ":tf_constant_fallback_hook",
-        ":tf_decode_attributes_hook",
+        ":tf_dialect_passes",
         "@llvm-project//mlir:AllPassesAndDialectsNoRegistration",
     ],
 )
 
-# TODO(jpienaar): Remove post updating all.
-alias(
-    name = "tf_dialect_lib",
-    actual = ":tf_dialect_hooks",
-)
-
 cc_library(
     name = "tf_graph_optimization_pass",
     srcs = ["transforms/tf_graph_optimization_pass.cc"],
@@ -1722,8 +1703,8 @@ cc_library(
     name = "compile_mlir_util",
     hdrs = ["utils/compile_mlir_util.h"],
     deps = COMPILE_MLIR_UTIL_DEPS + [
-        ":compile_mlir_util_no_tf_dialect_passes",
-        ":tf_dialect_hooks",
+        "compile_mlir_util_no_tf_dialect_passes",
+        ":tf_dialect_passes",
     ],
 )
 
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.cc b/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.cc
index a3c487f6378..31cfc5ebf9c 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "mlir/IR/OpDefinition.h"  // from @llvm-project
 #include "mlir/Interfaces/SideEffectInterfaces.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
+#include "tensorflow/c/eager/c_api.h"
 #include "tensorflow/c/tf_status.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h"
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.h b/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.h
index 54f296dcb2f..887eea745e7 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.h
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.h
@@ -25,9 +25,9 @@ limitations under the License.
 namespace mlir {
 namespace TF {
 
-LogicalResult ConstantFoldFallbackHook(Operation *inst,
-                                       ArrayRef<Attribute> operands,
-                                       SmallVectorImpl<OpFoldResult> &results);
+LogicalResult ConstantFoldFallbackHook(
+    Operation *inst, ArrayRef<Attribute> operands,
+    SmallVectorImpl<OpFoldResult> &results);  // NOLINT
 
 }  // namespace TF
 }  // namespace mlir
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/decode_attributes_hook.cc b/tensorflow/compiler/mlir/tensorflow/transforms/decode_attributes_hook.cc
index 9dbf332fc67..09fac6e0706 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/decode_attributes_hook.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/decode_attributes_hook.cc
@@ -23,6 +23,7 @@ limitations under the License.
 #include "mlir/IR/Types.h"  // from @llvm-project
 #include "mlir/Support/LogicalResult.h"  // from @llvm-project
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
+#include "tensorflow/compiler/mlir/tensorflow/transforms/constant_fold.h"
 #include "tensorflow/compiler/mlir/tensorflow/utils/convert_tensor.h"
 #include "tensorflow/core/framework/logging.h"
 #include "tensorflow/stream_executor/lib/statusor.h"
diff --git a/tensorflow/compiler/mlir/tfjs/BUILD b/tensorflow/compiler/mlir/tfjs/BUILD
index 66b9a5493ce..a337dc02a9e 100644
--- a/tensorflow/compiler/mlir/tfjs/BUILD
+++ b/tensorflow/compiler/mlir/tfjs/BUILD
@@ -175,7 +175,8 @@ cc_library(
         "//tensorflow/compiler/mlir/tensorflow",
         "//tensorflow/compiler/mlir/tensorflow:decode_constant_pass",
         "//tensorflow/compiler/mlir/tensorflow:error_util",
-        "//tensorflow/compiler/mlir/tensorflow:tf_dialect_hooks",
+        "//tensorflow/compiler/mlir/tensorflow:tf_dialect_lib",
+        "//tensorflow/compiler/mlir/tensorflow:tf_dialect_passes",
         "//tensorflow/compiler/mlir/tensorflow:translate_cl_options",
         "//tensorflow/compiler/mlir/tensorflow:translate_lib",
         "//tensorflow/core:framework",

From aaf5008be0acf63ca60751e3556c5433253afe4b Mon Sep 17 00:00:00 2001
From: George Karpenkov <cheshire@google.com>
Date: Mon, 14 Dec 2020 11:58:39 -0800
Subject: [PATCH 53/60] [XLA:GPU/Docs] Showcase MLPerf for performance
 improvements

PiperOrigin-RevId: 347437325
Change-Id: Ida8af3c0ac70f22ceef8249d5d1041ff321b36f8
---
 .../xla/g3doc/images/tf_xla_performance.png   | Bin 20690 -> 22954 bytes
 tensorflow/compiler/xla/g3doc/index.md        |   7 ++++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/tensorflow/compiler/xla/g3doc/images/tf_xla_performance.png b/tensorflow/compiler/xla/g3doc/images/tf_xla_performance.png
index 70087f5747c11750dd6d8c33160954d26a4f2a16..7ab49fdfbf6d2cb9833b900b6b21e11cc41f779e 100644
GIT binary patch
literal 22954
zcmeAS@N?(olHy`uVBq!ia0y~yU{zpXV0y;E#K6FCH*swu0|Ntdv6E*A2M5RPhyD*3
z7#QNKLLy3n63Z0|it^Jkb5a#bDhpB-90LLj85lTbKKAlI>#cjrCrm@*l>g~7o=<}q
zn3IL|guR8m7rQVsGjlU@7?>Csn6XT9bJ%7XqH&Rdb;gX@OBomLjNCL+v*SD~ga0nk
z3~AjPJq!#A44y8IAr*7p+|8U3dbLsSLFmyC=4Kxw4korEona2rlY?9qqzdGVh&t}b
zP>*}Tl9xQilgVp^$t{g-E<H?!6FFjT_Z?$A5@hJd;WAU{FsosckRxMjQ&Uru)%W8Y
zIk#(UP&j^T_Nm|hK3Jx&Eq!wN`Az<>S8t`i-QuDo$iTqRu-~`p3L^sp!vdv%4U7y7
z3albI3=9k&jI7cO3=BaI2iX`H8bn5<l>a5|^?NRDPCvh^^mW+ku(xYApOdmGd7(bP
zrs&HiS?jVM3ByNsiqD@tbt>!1ii-`5%-7b%R$n^Jz`)S478JUrZNcZKXa?u)|NBj-
z*);3Q4D<YZPp8M%Riu1=cGl9;Qnjnx&F$Ep%FkBi?`jr-^%+~=P=E9H`~Cfo`>eG>
zS528cdw1sLWqYf?@B4nQdU|Zx&3AWq=kI#C?C<aI@^O`)&zjeNI;oy}YfI#ojKIXi
zhwSn-7j_muKXmBOzS`gK?(O~UGM$luK_I<sga3{Fe@^fBnQiv=ob~&f2aW7}{O@jU
zeSOCG{F(FT=UbP*yBZ!Z>s0aithsgRt0^;Q?yUR!OU-wd%5+gNvAB|puCK4Hoo!S3
zNw)k>;I~u;28JVZWZwvXyOG?lD%7cbyOd*L*xIP)bBg^q9(L>R^XRkr`{lA?%j<Q!
z-xcKYDW;^P@W@)3w0zio|L?n&X&@&RbCqSixv^2XQYZ5Ew%p0e-qZDX6=i--eY51W
z-fkVs>JJYNZqjFFU{H7_@@D_lK3VI3ulN6Z-KD+m#@_1hy4&v*U33*!J#L<NN5U!N
z=BCunuV-hQ^DD|&6eOIUrkfhi#=zikM)OAf<hsAVuC5MW??2yeb@+NYvz#02>%Oi&
z92{T!H5BY-RlA~!l;`K>LbZhUSshrpcEN%NkGl2m?Wz2HZEf^&zqzyK&;S2oaevtA
zFx{33d$wBt{N<6EnHjn&L{ke4es9|3aW9vTfx#i@_qVsFKlg8L{%c<OuvPqC_4~cL
z(cAWXx#YckZrQE;y3e!!|K0z;yhzK~*tn(O!GT60c~2FgljmZ#W?hB)_f5ibw|+U>
zx}T@x@0DEk-T&|F`u~6L|KD5o_Lgb(H60C&6X(zCZ!EgLF7~AO*6iziic<<BBd2_R
zpT@wzU@=Md&6JlH7rS#jys*&u_4W1q|7-JZ9+$6=Ns265616ppSK3VHw-EyaLz3S?
zyTo2;b3bqI>W@doy?fr@-EIC;sWP~HciCGf4sLNhosDnimfz!4+>(2HThLN3XaH&6
zP+#LcT~D#4U9M`yjvYTvd^|Z>y<H&d`nuYKtl}*KRo~yqI#v97x!gSeUQLB>V&X&P
zej6dD7nhg&7cJ=lr*TWx-z~i|mYa&6o?5^EU)BD)zhUd+_LjfD7rHv^Zq@6x-|yGg
zb0pr`Q5d=^gi|r5?&s6{f8UjJb8+n|d;9BJbpG4>`|H2Hy1KXOYt;6<*uTHNvhzw!
z`Llg$p(FzXgW>dP9x98@6}<ZSHoyM&VflXsKU>x+{{<zUDVo82`(yXl{S^(5*;xAe
zn)3COw6uM{UahvYv{c{4!@$sBvD&2W&yN+xbE@?Ad^nW7ZfDx_b8~NR&%eLFzJ71j
z*Ii|Atx8{AQFLyT*|{d~?ykSTzu(`UfB*UX`hEHL_bv0C9asDH>i_5e|IFW(dwbpy
zP%u9_!)M08z_6s@eF8HBg9Dew4F(2=2~1oN!6se?1_sRq!$)fBVm<~22EiNKtUun~
z&cEib_s8S6yB422bbszN-b?y9VVMt7Lsz9Qah=;5;^nO(6qwGy&``9sG(73U<KN#z
zetFK&FkLg%>}~r@t1{O|VucJ0|K9!iboK8o(}}ltX@9(3-nGcjQA_g%1H+%Ykv}_c
zAL?wKI-Ac`wlO4V10%za-O0aa&A9Pl=l!oC8pAb&NBu?jd=%1Wb-++mT>SlwjgK!Z
zbl#eE_0*X&JyxKUF5|z4>)Z1Abynr??u5tJmR?-sdV5>0b;*l>qCJI=kG;LUegCUf
ztABlYS^Vqea(8JV1_p+{Y2X4%z=^|qy58BdXXjd%zk3)IyXw?b?eDLzzh7?uH}mzi
zwWoEZ85l%;rgyRix;L!Xm6$AB$`YjZt7&iP>uc8U_Y|L-W2xA3tXI1H^|iN?y!A3K
zFY|qTtT+4Gn#h=#J-N5H<=)<=8@=t#+wJ%3e!tyb{^rKSdGqQ%pEbAt@u2zZ>+Ahy
zHVh2Ql~`y02-u)Kse}8Q@p&8Lj0+3?{P{CGZ&&7p1&*@TWj6nQHM8^Y`*4W+@^b(B
zuUErgUtMip`|Hd5`v1S53X3u{a5*1TvlDsKrShn0?zCxTcT2DT{k{Kx_3E&-elMCq
z)vaODkv~5_E8lNqXLvAWae}ZsSDCBON5%B@`~Ur#8Xi|EByaQo&*%86mrKLrYfYa{
zWME);pyKjJ=FQ~n>+OFp+y4z-HHm?N;lO>RN8qAT)cMHvj{9Y|bJc%MjQ{&8{Jt9~
zM1MrP)StN7S$AY*8Uw?hUYR1d8jB}ulsYTet<BB9pV+P)wx;6Q%=D})D+(|Bn#UHM
zR4sjV<>v14_g$je^J+ewl(jD7kv3c7+|C#1%F4jd@VKLb{kMmF&4+{K`+wg()+@dL
z->=ucvrH_DvVMPiJJ-5AZhzfh`F|hU-`9QL9jMB}upntevv19fgX=t!N>1Ed5xAI(
ziwjhJPs_Nj*i!ZNmF=e!%D=zAmp4xHQMn27<By~*&9R%)&feW!E-oepYFfUy`S<tt
z^pup8^z`>PH!uIQnUNu2;*Eo9c5f2GRSa((To<)9i{s(v^Y;3sWsi^bDz?lr&7PL7
z!oa|wu~<U%&9}F=+Xde3{eExpa-mL_PtQJ_*4w>iPckC|gM;Le6$$qXkIO2y9G9zJ
zlYf8T`#qoiK81bw^XJd?xazgh+w;!b{m!|%Dzuhent`FAqU&P$24{}RYQ9poRbL)@
zWM<a)PM<l`viR8<>HIy0yGj@sLIn0T`_}y6D{~cEEc-^_(20r4({&<?9`~BhE4!7c
z*pjk)UG(;M{PjN$^V|J!*v-bkps?-H*5=xX!orW=-rg>McPDbW-`riLua_-d>N=N$
zp`q(ApV`Mm_T5S=Wl9?^EnU8Re%-H^%jefsU2z2UOMD)MZs6wTPCq+qYr(@qbFIte
zY$`Tfx%+iz@$+f*2B22YJ_*q`vesoKZ*CNxon;!x&d9*fAS|}HY3}UVyVK6j`ts$=
z>-GEpP4d=T8Mk-W`Z#6=hJsZcfd}i}?R@UU0cvi1Y6R8G1zepMlXuzuezRHN5P#i=
z<`vDL#K+kkcyL|tazBoRD^_T{Qea?UC^(c9vcbCam59>}`}#VtBOkOJU6If)YrQRM
zYnHNmU(BWy&nuCjs6KM!^@Po*_4ohTdOdFUmrLH!TeC{9MW&~wq)geh1{BQ<2R<%U
z73y4JT(h=^|7n+h0t16zV62*$i!9^5Z3#YQR|Gv~O1w?5|MTH+pgX9)Hd$nGlWob1
z3oDdCuJh~)JZP8m|KHzkas7WE4)cG1b2E6cTkka8=&~0V7G8Ou3@R>tj;u~l7wXgu
zUiRnq{=c`sy}jKpWB+!`<##)u&r3YqHuYomEd~aLfOC&RH`ISwEblwpOt)pi+_``G
z|9{{&&A#^M)#~+jKOQvCwJ!hnMBV=8hQz~HRt7IV$HBnBu(ac1@rL-Ck4G~vFI&2F
zsqYK}PzRvm``z;Rx}Q%~y{FZDJStv(zqXv?;Y4@2%C}puFY4n2>kB;i@2P&hq?3eU
z(vcm7k2h`F#Md9bKJIV2{qLJ|%kNcIfEq_fe0#XQ$^ZXxT;R|IMQ5+MzVi&{#V|84
zFcb<eZrWS@{hff*Vz*wexw&_C6mlfq-j@4x7RdBdYHqB%9h=z{TduASpKdi<#{Tb@
z%co6+85kI*`1Wv>JwG?snIrjlpRS&so?&QAj7{2^83Inr{pOysmtbIEr~&1vhlkt4
zR|cit+fx}HTYB~N`u+RL-`^8(a&$b{#L5k7iheq+AHFW8^2djVfyY@G7#f0(ys*5H
zduz)~v)o(%f5rdL1}PNN;ZO`&7xVMM!RC`UPfgYKpKbQ`5VwAh2`Kia=($<nP;9CH
z_q@LD``z;Y$N&G3KY6@F^>%4m+B4JZF`v&_zyI@q-M+&Ll<dH&J^%mc|NrC9=kxLw
z1qq*@o!wpjeqG$&Uo+F^MK1N4x}unof#JZ%BiZcMEgO=L_r1KlJbZ1`*EO5Z?fP(t
zd;jOT??YAwEv^6mFMt2vvMU!s33SGz&<*ozzuh!GZ&Uo?!9mqy&(F-fyfOLs{QrNR
zPh7&pz|fE{vbgE*wdnl21&4X7zrTBXb8~v<%8kj#!`8)A-q}%j^6+^E1_p(^q>v5j
zzO$xysI>9PTIJu{BNV)I=T2cY9}Y#EFBhBxU6?^e2`p~fn{jcGz@ZL7Wv{ui1_=iq
znldmj2$&mc-KhTdhEvfb=f;LBH`#clS_D3Q{)`l5lhu5kIO>00p6@j`c6(mzkN2}c
z=>wDuYOjWdw+guR%f*I-ge0xnv7_S53`511b91fTC-5*ZFzoY;QkvLdb@=+Vwb9qt
z#qQo){e4^R?Xnvi65rn1x;k?6vv+rQhp&nFctW{fCvMM<sI6J2r|DY1-}8Bq9~Z-c
zm1|EQX<DIt;JR4P$v!J4P2t5&vQ8E6c0On0li5-I{oUf_n>K9%RU`NIR!^_jVqnmc
zfL5y#ca`Lvc6_^)9lk!!Ht9&m=X2KUqqb%#wrtt5Wtwg@pQO<f|N38(%Xk?W9CD6k
zv+r(cYHB)j=FFEb6@SGT7#f1-%H}*e3@T<XzuT2~xUKT@v%Nnab)PKz`}6sH_qm|l
z*QZ{_T77PgC8!v`x+-*i+}>YbUr(3Gxwhu!0>@@9Ztlfr*%%lYjti}Cs=Xdt{`LL+
z`oF)vN=iz)_lb*&o}FR1*u7tlolmAiE1L8D{{MCR>;BF%$qd?>b@j5J_1iA(bu%oB
zpI!WZ`~8iL$wxW_pU<n#3kwTd?AFW1Be9|E?XAWCb5mnLeeIm1FRX7=R#x8KU4H)5
zsj4p*-G6_3JKN@oj%5DFM@PfsYrjs9ulsqynIF^veYGU<SdXOruZ#VBvQ{BUe}27Q
z|Ns2|Kj*!sYN_7OySK+uUH$oFe>=+}U&!!CU(JGY$?1#?6SR;k9?jroEdpwOb1VuT
z99R{)TF$cc<<jY(VoI^)=H_&F;ddO_*Vq01*k51s;=)2utLA8zC@U+geC?NuCnhRi
zzI=Ip<+GW;-|zqb;neK>eLpAr+htx}c6O0#Hy1ZIsF`$buC=(B*tfU0zn^fQ2<coF
ziY=CXv#a!V-Os1fqqpbfURdDBp{T2?`{z4f8AsyrKH0CYuC9*TTlHja@r@0M`+pqO
zU$^Vksy%!5C?9w4l`_q|v?T59tlT?03|l@to1I_w>WZeZaq-_@Uwvnpc&^jb(b@B2
zQTMgA(dLz(o>;%zvG~nt1_lpB>l;VfW#4qExLHGT$dwg=i?7^#^X83!Q($1=&NQvF
zyWj7VcG~grn6!7#{hH5b6<d5~o2l|ASO0ps+?nHgZ28@Xw`817oH!w2Rg%H+uzdgT
zSXftl=cz#UAFJJZrE+g=$-KHM6jZoWe0eiT)!T_9eNLg<&gWV$_kO>pU0QQ}U94iu
zs?gPH`)&7?yu2iE=;`$MxTKq@(_^R2v90)!@buKw(>rvI{s(2sB~d+P8+Mn!mvcIC
z_AI1`4G1vEy0SvR>H4}@Wsfj3f18gk91l-vulKokXWpK`J*lUssZKM^zh?s;6VZ*{
zHl-HanHOKY_F~{-x3jZMv)|p>X<PkG#wf+3sNn1@({3@{Uq8>+?<;(Ktf!~v;o)|7
z=}!@Hl~1Rt&#O?<)qM+UM;AR=y?!5;;+0jQtB;&ItZZHWF6Y*kOv&j}Rh^5BWf&Ta
zEYx2yd@!{?R=@hz_WbyH^)WqMW!+-B^D3Xsw5$E~<mppcJNq|(KA-pBcjC;MIi}fR
ztHahV;;!(?-~ad9*X!}~Jyhy`U7jzu@8<6Z2b&dJp1ghJ?&a;hI%ug^U|`^<-%lWU
z<IK?&3GD*AN?&g)dU}douHr$fc-)UKm;EP$TH^0-Zhrpa;^Om`>V5k&Y96+V|N8cJ
zcmDl-xAS)Enwx+BP?46F=ID5^k(u3VjX!9lXikktq~!Feu9pAv*ccv&OnWX;_~=mo
zM^Vo`TxA>!D?dLAUhX&7s`S;nySqPs{=9k9rmXAhWJ^~h&$6lfbYh}%cXzjYpG@Wb
z+V86SYv+b+PCMJw)b!`iA0c_qd97kj9{<WAN!?I<cT;UduaxPe{x@EEPr_CPslIag
zKfjrs|KE?}_VWyr-KJ`VF7usTb~|_b)QAh4QcvqfZ>#z9@wn=-$;ww(g=XK_ka!b3
z_*iUc<!Y08b5p9Ynvci6d%H@r`{itnjEpvI+O%%xvsu2g&0J@{-SGGK_xk_8@6R_(
zcKiG5tGG@?LCJ1D#Vy&_^;$L*JUk@y`2C*GejE#PZ*QBb6{-ap!qRhNwVvp~BWJUt
z_V>5%cZ&Usp0r8lRs4K9y}P@cU(Uv&{N0^7bLKpmdHbT5icszEZ?!L%PCsY&`;Dsi
zG?~8~ub150Tiq@&ukP2&v$M_R<#cp(YMxAVcjBo1^~G`_c#t!uhwIy$o12&W&!0DM
zUfpTk?JWXg`f+paT;KcsUUm4Ih{BIY#s5Fmub*j>8Fa<)uzuag?&tHW*ZIx0(%=85
zs9Q|8Z|j>+r}h88E8ia-5O85@_Vw-es<Qw8`@1@P{kyBHukR{-{p$L9`-}?<grh*k
z=owg3u=x48`X7ho|2?+<_xRiGe163q8Ou-qe!pKIzyII0wb7R^Ul!MkskoUs9n?}<
z9lpNL;K{$gzh|3dZmRwL?eUtM+j67#R(*Biczu2S{zqNf+w$&KJ;_N+d$xN0zFCte
z7gvCX;ykA63C~sXi_w90FWszfXos!gP~?#?Q26<6>GU`*#hA*cQ(wM(so27;zsI0u
zL*e6NCqA_bI7MvBx%u(-*NLFOOg%mA?~mj5cAw7}uMS&#Yft6pe}8|kkJz}#C0Fk;
zcnnO(&HBd7nKN6#;OeT-+q+7$7rXU3arE1MyYb*)vopufpFg)|T|IT;#EtFw@mD+<
z87#KmJP7an<lWm-$?@=f{l8+4#Dh(&Cl9{6yIVb8<@@&8X1P+n7N8!$ItkG??`z*z
zE4FM*KCTzP@6W&A@A(x=UR+ohxj9X-Wv+F3pPa2#3n&IMoS7IH99AZkbpLRduPym_
zRQ&y&oyI@Sbaiw8{rP!!S1Gq*N^0uc+uPUI{r&ZR|Nnn8)8|QM`Y<r~Rptq*+uc!$
zn|LIH{kQe~pXYYJ-&g&1>vgm2YdY%c&woCj@4xM}fKy`P!>QqMKabo03j}w5=4rXh
zKKNh!Zs+rV|Gw|Hud4a{>};>Jx!!*pP@~(;@YX>)zPC~g3=K1^O~J!EN9KTgyI2PX
z9eTLFwed>x$=OJhg66X<Bn`E0nBOUIzP!vgd|ixWsQ@TA5~nWa`w;&6`uh9Z^W#51
zJG+<<G}EOHiioeTu5v5BxwTar(Unp6ods$Zg1WTxYrjRpIxa87KAxRz4$77F-)^SQ
zHp{I#XZd_i<+GVLZ{Bp*|9$s;UHCspclMG{r^^+|ebc5*3!9euk&}nvfaWTAt4T98
zdSY-$PuYh5kL~{{zXCN|3MM6mYycIaus+O1#dP;u8xjxisr)>z>eb38x6j-Ex48uh
zpMsi=+Bb@yoyokrt8{hP+N!UwuC9;Y|0H2^+F3SUsgQ^WiCoZFs>QSl(Kox_?OI*`
z|KIED>(8G!VNtYa#fpq$J(9LnUn~k9ID~+mYLOJO;q#|Y?{>f6w`R?nna1g$NstpO
zTe-!rtqgwNYksfd%SHFaUF@Jnli=be+tgE21P&eP6lUj>iP)NTRe8DI-Y=Kf<!cOD
z4647q$v6aRZWkV1k)R*BNu}k&_Wb)n>D6Cfi8@8>F3a7S25LQT>I^)1Z*#go$3nMW
zsio)A($m`oy1KefAvz=Atgt$4tyZbo&i%E=B-5v4tART9vn52|yt})*U7)DAxVyXC
z+ecJHq({nB3!I8MCV{Lr?+iRBXPOmKR`%`A&f@BCZ+<@N)>mV{z9#bXkH`J-1qWHZ
zr|FzLaYDkbrecdCBLl;NkjzdOrB^)lQ>RbAzTAKQpFe*VFE=taPCq;A>hALQpZr-F
z99S=>COKXae2~q3EX7^6aZ2Y!zA}!5(cAO1!`Ff8-^<H<`T4J{kN-a{Ixlit&P;d%
z_P59G^7rS?pFe-<)T-FsWuSDlBH&GeuzcAD?MbHs*){IYGR-#2xv?Sp`nsyGujW?2
z+j(}jdB2q)0|NtSB(1El@Z<WrudA!y?=63Ptk=D7d+u#BbMx=7R<G~V5C`?knk8!4
zK}EidMZt#$2c^yP)&wm*Rr!4G^@Yyu#pf)S2QBqFJzf9()z#NqIE5EMI;h=&2mhVT
zuQRUx_U6l%FZ2JtNx!{4KYnl3*GEUYLEVAc-`@%!A6vR~>D}V<w%lSm9af;wpQ7Z(
zT77Fvree#-$H(v2e!pv)ea*(E?pEe<-x&sp;10&g$?ERY_(4N@x^BxmuCIwK{{HUn
z?QOZM!`I)txH)=TPUf8*g?p>MZp*zL2FfTaJmx6*)mXUhQ38eNRIN}aj;pIewM(~!
zt&LJ_S+r<T(ODJ-28LAO#Z8bd&9gHzgI~S*^YgPaN9^u0-RnM}^1{q;m+PO+=j~3P
zJPC^IwDa?7-)_AwXSp@!=B9bJ)olXPbRsXUiQL@5#KaKbxWjeMkAm(BSE0pCwsn7g
z=<omYY2$G@Z7r>=%gfHDPLB=So_DvYsp;wI>HEK4i#~ev=-XRcvu|z5?DV(&ey4ci
z5>|!>SC&Et?iPbb`Gn?o`PVTp_=$3veO#Cjt^zJ;`0f7`TmePn1C^sI68L4UOknxE
zqzhC&&)G%nq_V;?*6PD;yq}+)t$wv~`J{aN|9?Jbd<Knld_1}?;rvYF<)yE$t&Q6H
z>gww7{dIpQHJ_bjDs7SxkeHab1)Rep9))grzw7lnP$d+<-%eTi@rjR%7A=xAPW$rW
z;^PC2%z>$(p6KI_z=P*bpWgj`-|tPEHl>}LvorhpI_2eJdNC2p{pOnAt9X3TUA|XK
z4wN+ApRQp3b9IVluoH){x}S_iK|<$BZ~eVr4zkPd`Fbrn{ro)JoEsZ7E-`*6Pr3c>
zNdA4h4d3*S%I9nMS#j5wN11u--w?U9aeC;=c@xD-#Tfj2p#9NT*Vg`iIz2w`?yjwl
z!9M#MU~2(D;}%=CY!Nv0u)@0R&5H2#abI6uEq{McHr0zkYuDP{Le28`w=Xx)tGEC3
zeP!a>Et&Rbu58?Ii#}*{S7~4R`FR3Py;7!LbI+bT$EPSFDmpb=jp5%vlu4C8H<$~$
zIs*^pU0D&xk$81g=+bj~pyI3mS`_{Jx8~20+=oXFo$_s8)W>k3vorAEyL)@9ITX#!
zx9`~T#NUjSm36UOFPCD;+gqkhDGYyXH`e<4<ovkvzKX%m&`|5fVz=H?=g!TuF3&qZ
z&lWUT^yI-T(`>o=KOedEb|jpfq$(*XdAOaw-wZUMIYrBDdB^sQi;rfe&wF`!xqloO
z6n(&Ghl6U8<07CTNT=<2cf;1jSf-qqu*`S1oZOoD{r_~g-`SLNbJM-O)$TJuosvFf
zH|raoXJ?zww<>*gWo2;r`+IZe&!0baYUu8=w=0~%rD?}?Xg}`qa({lsD=PvY_ZXk^
z*!KG0-{1THe!Cs$4oW%6UEv4ozTHeeZ}a)g!Dja967!AI{f_s^*8lrF-+dCOPTm*T
z!}TpXe{bnwUh_A1cW+NWKTmbpu|C=8okdRzA0N|=*q|^8w8o;K#!&0VWHnze6(Q$#
zzPaZ4ah)sg+_}RktkxnRSM_3{#v+KT0uRP*$q3{~JklY!^c<gr!GjM{3=9k%p!&hK
z`df>Dlvz&1tDI&wUMEmP3TXt)bF!LZi~Zl1{!7oTi{7sHryMjI(6FO3@Zi2zt5!R4
zEOzhL)6&vHjDS5qH&@vMG-7aH#f|m%x7+#WPo0X|kl@(PC!2O|&d-DF@}M5n)m5RJ
z)6Skcd2*(8dEVBnt8TqgqNSk1Ny8PCQ$k-~TMNn)1rH89Jw5$?)$6sYX`3NqB%jY&
z>vv@^Ffe@JP1@2dOL%l7`2D@Ti}^q?y|nuxUs=%QWxndZvrIBCEdh<hIi{thRaNb&
z<7QxBU~un_J!ltHR#pZsh!$WQH8z;&A+tB_-JP9J<v{~Oefn;!)yMl}Wv$C%HmCU_
z26CT2d$ulevzo2#-(RoS?|(AMdsX=QdDEtem1=<M<;fDFZ=TPq)@v#F|Mxp+Xgx3;
zJm4(6n0c2y-3O0B<Id9N?>@Zd2MrPP+xHlNyf$Zogy@@g`8o?&IUKT~oljP(6jZ@j
zcp6$gdE+5%mIGRl66|j~wdOZyL}@mt)zha>&WMugIevXDtyzZ2pp^@JG8O`%;6_=W
zgy@?+d-nW(w_9IZJNwz0nY+v0S`}qIJk(nH>PqF~UURwHFBexl0;Tf8BP$a8XBw%t
z7!*D_(#Xsnwl?bP30Nu1qj+YXt@W$|(Aay7q1FxWX*!&WU*6u{e&uG|-l|rCYwKd8
z*UbdY733s^Z211>rm)kBz{PGWH{ZN@6Rl>wyCQJ0z@b&4tG(v>zRRCqcuW#0zf^yJ
zr`U4m&Yee(9tj1b6>1C&AAFNSHn{gnO`SW}w*1|ldA8O2K*MndK`n!SKOT$66datS
z>TP6X)W$C_m+JvaDxu)~fHI`;^yyPo_Vf1t|2*kFbLLFs=CrL@SG9IEfV}2oc<Uga
zV#(`kxld0`b#CLyytU=!!-K!RzP?{{TKDuc-Pd<_m$!f`Ig6w%&2z8psr<YW(%b+3
z?(WZ@KO-X}w`O1Ovl0O}m2QZ>ISiWfTH{^@ng_ZAng^<Sy*4%S+Yw>^n%$P5!QUfC
zS0tRDW4T%9G`N0F(r{x1&6&M8Xj}d5&(HJq=T4v2*3tsyH_PX9g8v-^>5&GdI9cm&
z5y8>h^WNUr`1r@i$J6y<qvnC8?-ukt`Z{A>a^{T<51sjKw<I6$dprdcZ5<a@25xZX
z08O<i?)h{|8!|@|G{=bb_i6q8Jc>`AKX;b}B~GuxERi?P?R={FeD>e(6fZsp>bNZJ
zxTu)U%*JC;WiZj>&E4JQFE1^fU-v7M<Ked3dD`D&Kn2R#q>v5i=jYY_VcmYO>h*(z
z&8OAj8KL_9-tAXDo|>wCeO;`zy83Zv&_cJ4wGyIlE-rR=mj$hU^SiXN@C0bg=}1gZ
z*@pbRU$0I6aYfkQX5u3mP}DpEnSO3grO<rPN&|+DW(m<Zpb6ibn^GSi>s=kS^;H8S
zb5T(ds96#aup#Ma*Kzs!n$y#C|9|oSe`RCx@fFFSUZ*Csr?f5UXxE%Mb1wVa|NVMB
zKL6sPqZ5NcOOmGR#ex$4x7+#UFD@vqYGL?e`@(3W!YhssTUnDgE$$0Q+<B>!KfKCS
zXtDFTce`G%+gttJ?%R!I)mx<;3(MZ#>bLpi5g2%JhGDYONl-C<TzqlU-hzjR1VD2e
zUUPG=uZ!hK{P^hTQ%UeFiG~~N?yRe;6k9+eZdSi9y34l;tO{QrhsaC%JD*GvI0R}7
zehDco{P^PHVrP!nT_u|DBN!MM0@w_-ZX9ms_xJYx{Wib;_UiEU;c=CzzrVfx{r&y_
zN8S3*&dsfUIyD@WO(Z2HL9;+t3_+a`5yL8<@acN7YQD2fvaYPC{QT_fZ1eR;a=yO0
zI=|}G%HngD>aYqDlp}cL?P@+gI$Hbd%gV^j&mJCR<CWr(GRb&)YAPrSka9%HN!96p
zp4b0NSDJWZWAgEJu^``pM#T~J`?o~-+AkL;sd|H!W~J=@_viEZ#eSfKcU*XPlWqFB
zIoH-kZ_m043WdMFzIyYSWnas=u)wjkwe?~eGXq0Ia%cF#eXrN;{`LL+{<^=vdU|@4
zk9T)<rJb2^aj|<pGdrKlRA_61)mmTw{l4GtPM<n8%d+^{y4c-)bE0+>B+j)ey;t$L
z*ImBWWK{(yZOIsF-3VUh!>Q;q$D;7TfyU6)VYWrDE_v(MemKbP%&|HBe4iR95z2)3
zaD4+0&DQ?@rdzr#e0`i^%hv4c(-7T7aC0E*%8I~OIb~&K91B;hSP^3k%KbNwtVl4=
zx}wptA?xa@p!CVVZ-a+-;XPEO_S}vmA&!m*R|GD00`)q26+oGy22|$-E_O>zPd`7`
z8q}cC-}fU4JP_M&|L+H@c+7^BlapWrv7kwLuTaZZ4E0wRxpt?|t6b(k->wo2P9FSx
z-roMFlCg2|wKb7zqqcVGfu^=(f_us~ICI?HRSIe&O;B{6WtLl2v7zqoukCk=PH##*
z4O&)~2(44zCa9a{->U&N;+Ff}-CJE=S63Gn7WVem*2R3R3=9nS70OtDZ;&$0Qjxzo
zwZ<fTs#fSCNEC~#k3QneaeA8WY=cCnPhYRCjjsOkV&cS!6Q@osJ!^VB=hha@Ti}M7
zj-l3#PM4$)4-SHsa6F$^JxSU8PQl^R`uk&IVro7-IJmR;`LicaYQA0#zt{$z`vVoE
z^K7eseR=uv<;(4NioC)3j9XlfNAXVe`@Py>Yj*s2)cya{^!+}wOf+B30A-&p3DGz4
z|9@RqY&khu-TvbdVa1m8`L$*(0sHIzPE>Y}t9aPTZ~G;H<6*0KTnB8zkhS_~m#AV3
zBQu-I{e539c`LSPuis;2Yirw5@UT_9O~5Sgjzpv#Xb@<dgy@^v-``pVCaJgQ-`!RE
z_0?7IteAQ_Xn6ELXfpH>Xr8q4^E1U3`~N@BE4Hw5i>a{hE`L9-X5yXudAGJ?nmhy*
z`8$$AHthet_kF9tu|C<|H9tT7e!t(o<-$Ve_O!FJS_R79-Lb6t@&Y`%znB+PvMhzH
z?)q_yq!Bi)9eTpti~1PiqYc!oUvY>|)1LdIU@3V0!@qOo`#!(h{XVYx?bfriOi!OY
zS^4kh^T}_mS^^>>DxOXa|M#)K{>nN=28NF1rvll3{GU_(Zs-30f8Y1Z-QQDL{P<Y!
zX?svteM;aF>l^KSvRbD%C2xIgR0qme`*H=vK`SWs|9m!E{rk<IpP&CfZ2!kO3*75G
z*|{U(`r7F2!OQ)w#}uEPr0RX@$HYgE9#wvRR{HkV*7Wo9-bu4FFes=g>D^er@7F74
zewz>N_J1B;^48z`Vo^6}plPah__V6W-|y|Mj<5gwbz|~zv;2E|!q>-v3J1`Lad=$i
z(m%$`3=9V@CViZ-&forT$(<dAo72vEg}u(Yx(YN*(kpGgE_C&^gU#&Q^Y8D=y}hmU
z^)*pZQPuSP-EX&DT<mV1c}eBf1BMU>xqHqv6&2^VAD`-1v*VKXqzI8WOZx5qRs8z$
z64d6nwKdy27BpC2_xIP|)BFE)TOVfp@$JXP**`uce}`+55Pfs_->2#Ob>j9^oSvq8
zdt0uymX_7qEtmIwp8I}F{{4NGpPvP~GcS1bIobT<!{hJr`=Jr_v5K$kl4Sav!oOdy
zyUzvH+<p#6+UI}@prb^uax~NejY3CkOgg$Tt-HJX^QTYqYCfI()B`G{Q$xU|%C_9w
z0!~U3HA=r-Sm=Crclr9*-QWKIng4%tO);n~Y2LjlL0nW+R77No#GReR&o63scX!8E
zzuhWu=-=Ppr<Fk^+MER%+~3y4?vC1?_xI`acsa|6-DPhd*DQ(MU3R|+)XcY#46I|{
z9k;jY`@QOXN%8P?F*|c_Z|k@F^<ujeD2e;|9l3u{&Lrc)fkx)Z>y3?#ojHDgduv@4
zSyfeabyetTQ?M-^FZeA_pE%*MPr@kW#Ke3?28IVBLbV6&a$a3o83<p_QTuxB_Gzg%
zh5c<lzI_H-;~^}*xXHHo*_ll`!QiIKLJ83~ckbL-%>Mn&&Ck#6|6A(ouV1ku<I)n(
z=W~kvezJpVCeSp!ZP}X}$-cRFcWni=ypH$B?<&cxbOQ}T8g+fLypei+L*n6fxvGrg
zeX_?jL0Ma1VesOFb^)uhHxf>uY_teGP`uP4mFwH|_&Q0a6X(zOTY<V?QyyK>0A(Dn
zdop$Jc0PBT3o2>71Q$D>W8;x1_>!PB@y71*_m`G>gH~Dy9Ln8(H|*WceU3^KU)<UG
z`P0+W<?rrPetFX;YYoZ~JBy#^-P*D;Y;9ERu9BZG{p+i~tmizn1GEJ92DIdeii*1R
zQT$HT>$UrTU0tsgx+=u3@>Ir=4ncmK4-RXiw)W}Wm~Eba?&QgzPbT~K$(%WHVupEs
z-1BpD{YxYno-T%uKA-Hfa%z3*rEVAVQq;4jY{UD#-|qz-*A8D7v%Bo=oH=uL*s@gr
z|MwTPPA<9MwrmZn`Td&D>ho(ZE%TlI^78WSxwpR^=C_wI%enD-{r-E~^Y8cD{aWGH
zE2X{#<XVgFi+tZ!KAl^B@8|XUzt^9ho}QkT_U`uf_b+7MnBTA29K76b=FFL|udmO)
zw8XP$!-fqX7Io`IZO^;=<mBY{J0ACK(q~?9=f-s<yEoaj)iE!Rv}^BG>U^n~o_%c%
zX!$rO;s5;nJpaxP!&ernf5BEvkKb4G^ON6PE8E|1HZNUQiF1<oE~hSd{rscQ4c*<{
z%l+ryTjbjP`MmxA-}nFjJJ85n{q4=oMXudtZ*QHQZLa^Sq<(Am^>v%iS-n1~KELMg
z*X#W{$K0cKm1J@}{B&Ah->~jx>U7m~emR>6$ohZKT&dDT35$XU4-Ph)=iP}|>NWNA
zIqUV#?R=m#xzuawH2wH{tHalG3ah!y{g}PC`a5WS5<8DX!JOuslceA4MsL$G1g)_b
zIMm3@zAfiw5ZvHzpoQ``H>WROzWo22bo<-;YJb1Fx>{U6Zco|UTW@b~@0T{$)75=D
zulil&h6KkehRz%d{pMP|e0l%F{&$Ee-yaVrJ}KXJ8l0?VbO#=sXO<hqk@)AwM^Hob
z>?~9M{`qCMGKJNAB#hI1I1)cSIXUsEwd2X?-K#@ahb7tQ>aO*gss-*oLRaP|eVMUt
zYxeavfx0h?<)>O%r<|Bj|9g9V_4~cwPnlmidHT#59Ssf8Ttnwa7o~}B&V;TG(*<n}
zsGBR@us?FoJg$@Xjq7*T&yIO{q*eQG^woW}zd!wX4C>0=*<f3xZ?0K+`ith4xwG%I
zoIihF-*8{{^>t6WZ*9$<4x0>@eY1Mqu2(CU&zohOe(u4+=Kgte?d$6}7G_^x_jKNC
zm9059i%w6|m9PI(*!l5^$JVT?ppl_(si}eI_RnTqvEh@A?DQQUrvKi4Vya&bsCJNe
zo8UjsW@qK+XXSSaj~Ab}4NuCtxhWOY^}Mj)pyn6ZH)U^cwF-a+MU(zu*$e=kt<3%N
z<0ELbS8~5?SV+i~WxlgP%MbJJ?qX$SJw08&{`K1JUESTcw`O18l6hHZzSXYY>6=4W
zhi%Qgyew>OlxpAay4P#BuiNpc>&=@t|9{v2fB!H&RUE$Ur24}__WDng=Yy66JY9C$
z<#*JPqfLf41Kn3_faRFLgLdbPm-Z`v`g0uAZc|cMPrtaxb#3(ad3Lo`?R>IkIX4V)
zZfs!X7Q3=O{{Q2C`*Y{c>HRihIMpk^yOZC(tR9~CE|zbQulw<Eh4PZ1sHj`Q{x*e2
zMZ?!bZ-3XNz0SgB-prXlLAj>@I<uLi=f+z7;=;l}caM#m&)Zes+>{DxM?F6`cX#RQ
zYsY$}pFeqWr}q2Z^!c^lJas_1@mx|#w*|wIMrrp>e)GOMguS4F$NT$gi=Ul|+>&we
z+uPgR+}z%=pFf?}pI)rRz`*ds%~0!xb@{t9Cr?JAj8*l^+t;0)Wty3pc|Eo~ww+J*
z)s>aPrAnaDht(3IZ=MLR1<hevzu#m0N(a>2NV)`?8jRkS!>Pz4V_|@p1(Y_)5O7)%
zwA2eR4hCwkD7HMGUvGy{cD!F6G#v;U_W~~>gRF9NsX1|T>-D(mC%3D<zFHf<e;;UZ
zUG%n`i;G;t!meEeEiPbVU|`rNRCv(t8t+4C&@51*vt>JI;B2y3;la8e58D@mI!+9l
zf`td=tV%MrfO_Q$s!1i_Uiq9|O7{-h1ugfR+qZu~(bH3rk&(R8W;wUFz3nx>7qLEW
zZ_!Cmg>+xXjrI3g^ZPo#AA#m(udRu^yQ}o~w@(ZV3=TJsu1MJb{a*Fvw6n8}QoWkl
zc-eTRt}JwJkJ_5`^!aoB*j**ZdL%(hPwoGIwvR8l=n7gE7wm5fN~hBKdkT+siKfpj
z4ZCs>v`cOYWX<MV(6Y`~t5%=0|NqDQUd7_PySv=|-rn20yZHIJ)#2;owq#sv6_4AI
ze}5lnxai|C>BDWj)h`ydFXG~HXuX0w&$jST(Nbagbzizv+}h{(&M;7%=wb8c!(njG
z-mO;(w3z7I+uP>gRWxa7&yI8oJ9B(~e!gD;)L4;Gbz`l5d1<L)3uqF2>A7vNbrVl+
zo8{hW5irZY7Zdjq)ReDC3fTZ!n(g%C&F1r7bA9{5R|cv6Ec^fOuVTxYGiN?|fd&DO
z3omZEyRY`QfKy&x-pb9O77)k7N8S2<`#^I|P8_;XTQt^}fKrnUVg%sp`uf_-zUH8n
z5$$}kT#7O_6(9b7zb|i)aG*!h*omY3US&E*;;%0+o!j|%74MW>_Pv+}+FQ6pVy=>$
zQ^k*m?VuUqM~@x}I4$*>YGiEu{oUQ!HM8f<`*(nu|IZV3drrkAOP96^L~Y5~SoHLi
zl<B|t>T)^N(9-9Of5c3D&3hMlbf4%rAYd{xqx}e{;Ul&~M|hQYBos_ov7l3BQNuCS
z#fC!HJH0zRrbrkIJ3H7GrUe8zu!xkTcuIWPAmea(Q}Y(ylFKHspO0Iwyu5r@?yA}9
zp$DFS7O#DMt$M#!R^|Pw?{}+R%CgxW2qfnv7Paqe-EVR4<rBX5Uz|Pp<{soz{PV)y
zK59$GMbYq>!Y32m-`(9^e!NfC)YO!fm34Yt)ywMld&QT&7I0c0w>NL+(`i3HKli@Q
zz`*dNV^5Cwgmp`nfK~$rmzI82pI@`7^!2qxix#bm-M#FYS@)T_*5=VqL9OV$MS2PD
zpFVx^v-x;L*x$zRb}J}lyA^4G=iDae?f-rE{k|Jk3=9klJ{D;tfF{pomI!ZPWMDYq
zXeqMcbu1%jynA){`gwM>zvkIi%hmmOIN9Is=K*n0?)*K)neq3#-S7YHnQfMHqu^R(
z`rYdHd%4B+Y$m9JY;P;dp1JOP@twlsw*P)S1}%V7XaOxR7L%1>v~B^ZZxx8E|NAvZ
z9MtsxYLUV8ZCmbbCl1h}f^FQOtX?G}!&v?P-rkE3KIqr~JbiN=DAT^0=sbB&Z0*;p
zrP7m6K3NyLTTCzJ$M*eybz2Jle!VXLVHX1fL&E(cjfDRX`Rg-w9KNv7Ie*8)Hs)Z^
z_@0RuAL|=-xrzn%zU|qwXK}yXuO8!b8#q8i1(w?FOyB(LK26S<4w@hEo#M=Bo%;RV
zU2ZX*h@C}G`(&;AEFN{_-QD%|&CSgV7AV{<1$7>tpIX~6*GFxB#iP!R$;b0<ZdzLQ
zJJ4OW^t|o&J2jurcDg7<Zv_PgbCGt!{|D^$GYk@)Km#o=t_1so7W#wQze~NR=iT4;
z7c@V<HS6k*!pECpl^GZqt|+%(JNWPY|9|f}h1FDqI)A_0oqu_m@B2NU&uz)Pe6RNV
zUEkSee{b*q8_n_X`u=}kdnAoF#eycIuPC=OeGB%t6?Kv@PCMgY|Lbzt+Nh<sqPOS$
z-8}zq&XpB`D?vT}3E4#&;37`3Wp>`KmG`3Edn6{l0<D(bsn*U^R#{oevCz4l@9Vv)
zKOc{`g9?Qjhip*i;LMYN19|uN)p8`>+?4wDp43+P>NgwNw}7lZ;ls!J2E0z{-M-)N
zX3d(lWp-9p*1J8Q&j}o|d_Jf60XL}cQG04(gKgE97h<|mK69<M#_zA&oObrs*6eVO
z#OLSc8mFJLdB5lLGT+(LrcXDo`tl-YI;aQr^~A!`4gQu-r*I0Z@kpEHJUcVfIQ<-t
z;*+OOL&L+}w?BLO)OUu#!X-;oR-1wbPyZIZ;3?yH2&#as-|xA+zy5#5%yk;Epq4_-
z$D^RN(BE!lZ;n*}g|vSAxrFsmTUQl7KX?DnGxPY$r&Cw0T6O8trB9zeU0WMn{_4s~
z*KV<!>p+`$q$W8t{@!$2@Ai(u$6GQldwF?%`uur!+1pufyBHW46oM^9HiWN<2n-Co
zxU={<ByIOjbH1Li9aP8$goRCe>jcVh7eN{B&xgZjXPfVTx9jz_wb9exMu8*>i!wSY
zGBaPQPLKKY>9qd*f<v63EyT<H=5}>;y($B3zvQpUWnf^qutA3FM)I+q&VPZy!JiMZ
zgSMcs-&_kSk&UK1GydLqT<&xkDC91<Sc+_L?~|GN7BtkhptC4rk}+t+r{d`a$u|N{
zuC7P9_4gRm|NEmGy)7gx?Aq=0TU#>OcYxXvH<rtA-MGCy-~Mrr@tIsU28II%HQJfJ
z?fri5_d#}f6`@XV{k>b#&(F)fz3pt7D`>OgN*S&jpt((Fj((d@CsqV5-V_U(Z!_`Z
z^DWv|_xG0oXl4GUSWqX?B#Ms}G?LN|Dx@bOf`wnk!l32DDed(sy9F2+7&5lVaNPh^
zD&ODT&ENa=+RozVX}iTh5_5F?i@x1`U$_0+{I|EZMsG|y8ks)#>e}e-da=8<WL;hL
zE_Tt*3!q3@o?@9$f3M*~=;|=gVgLqlP%8JG_O#*e>iEA}yFtZ@gU%+I4WJxw7gQQ4
zh(*qOl5=vBDriAW9;l_U;s2%@w%zXia*8b}Ml<h%R;ne~KfPcS3#!*X2!RSs1vVy7
zL2|_50H~spNU5B(`2;AWSW7{CW@UAzZ|Cd2Z9Zeo4cb2PWWgMc8@AQo&Vb`+Lwu1^
zf_}sXh1;s2)eCioCnZ7KrlCf0fHVj&GlKTmCrx<jc}}kS&BiyNFksktqNp*;k%2+t
zKImM4q62c4MJZ*V5O0W(Py(e_Ljf#qLorYx0aPsWu;hTIi5nY0z8%%nL7RYR-jgHF
zz}|cI>B;-&*Z%r2d6wzy>`k%U?Q`?q&Yn8wMvcdMv9;Vnpqj4X7w3%$6M0ilN~?vv
zS>cnmTYxWkwb`{bw<11&`}B(Qy6CAJ3=BI@3KX6=5uE>K+PagwcvDZ_KYx4jTP+5L
z74sW-C4Zhi-qt!<_}{0C_ds=;!(W!16CIbYKj{dS{(Fs?VZ!3!9OH9^x8>cnN;@+H
z)N%l2`U?vWUk#7{`><U;Z8xY9ETyP_n^nNe+xxu5<DP!|e-^(sFIw~nv=7Z&f3HdD
zt1I{RR;O<VWoV-(JZ0Z%-&f!N{o`>zs0RA^`FVI)7^sjbe}8Z4)TzyEyjuGD=gUeN
z7#LDq4xHPx{azJl1$t|1D`=2+`@XMhr|Cx5eO(<7T59d7K6gvy=V$5jD%EbYGBGer
z5M;DIt>!;Z#%afwOWyw<%KuZm8W9j+kaVO&;83^zz8PCjfeJewM(fjJdNC4C7N5@;
zU%mxeH+VNHHg<07CXlQOqxI=)>tdyyUR+xn9aRn9u@o8_`uh5M`v<O!3=9`Cn98Qr
z{(8AwvBfO+){?EuqqgPWuUi1B&lh~uxjZ%XBFBemy3xzFsta|B=|;V|xf!%3Lo)Bx
zmdk$KJ1rE{=YA{Nrp>@7^1j?creb;h!h7}q|1$5iPT#P3&4vvIEdimSQ?JeQQCplb
zcmDkOP&cJ-*!<>%azAKLHGO{Vv}<u;VPbzDzgXOVt{|BG$97N&msQMWeR|Q$v)9%{
zPQE6$bxY38Pdgs>opu8C6HO*H%uNN&csO%_`i@b*_f&p%;;^myl5uZOrPwJZ28IO_
zS>EVK8zeY@$77Z*UApqvsZ*ywS`=Hf*X>Zso(l511XJ0xxs}gmf;I}Cn`?dE`u!fu
z;%A^{%FRuw;;|(cKRrF&-QC^JFF)@s6R7d)p)}JcZFk$@yIrE%pf$_9(q>okK<m99
z_nM!xc-)f{%F4jtu-Kw`ZtdOO<?&lGE`pMc-(0Jw&!2-vE<Zdt=sitmB4~brPw5<_
zbe#h}8AXDWg4e<C#;(%W^K7d(#j^UUCoup12B~vFJ9_KQf`WrD?=FAuqI7X(@bcW-
z+pgYzevn<>M@8tkTs3Idi4(`?XJ@@tggRXoZQ5k?S(aDQNM+acCRXlm%M%nXeNb=M
zZg%tO*O|KC@7LEWP2BPC*Xzx(9(rAIA)r~(<o7?$7@seBf3NoWxw$K+f%Ys<^oS|B
z=(<ttgt*56wr{zwuB=o9E%Pp@Hv_F71Z@S$xV`P|{eR!KgLeFuTA!I^`ug4O_kJou
zps9q9$K~&D**j(GRM4KJi~B+Iw#%k-|A{?3pW)uEl<C*i+j~x(I`!~yd;L9qM;RTK
zoP_(!{N~=;mU}zD_I32{opbwStwC$X6<hxQTK~W5(@FL0`yo+LRv8x-?5+OZCujTX
z&*$^`pY%Z64Z=b~Kw}m9`uU%poQ&R<bMySH#@afjitNd*ar0$v<?Z=+EXC+2sM~&1
zuf>P+#sNE!yZ(N=egEgV@>%Bj`~F9R(v-=ZH`{LKMenI7yb=)_TKe<p^!$5!cBY@7
zmwdeM?)<l)wL6L}iHF-_OD?+ZE`R^;ZGQdk|E7l@>KyiG$j{yMNT9m*+0&=0LY;qC
zvYfDDl;-1JKEJL?Y2pviQs~_uzunI7*YjR|HzF)-+Vtt+At6(Ceardv<>l3@S2GvC
zpQaaEb=LIyoPtA~KY#u_di1DxT*X7sa{NR?&=e}D45|M9ZuPods}?O%YB8w#@`9CH
zY)j_lv`b4ob)&Xqd^-H+b^QNb1rHA;_gR7_R4)b`;1pK7a^;FiWphDB$6w(EarbVS
z^j?2nSM_@BcD-FM7Cr8>URV11TKe45Ydw<2_v-)ubx|rjXZhSk7PR!vMd{;<#r=LW
zjg}TaKL;vLFD^d5Ds=Ut==5`QEHf@F@SAHTDk8Ea@9wYF@qb^*mfr!LnXzT>nK_o9
z|9n2Lucw!HdD+>c;_-WOZf-iGeWpMhv^>}_$z^-qT`n%JTU)cQ@2~&A{eIo<z*@Wa
zr=Gtk`nb@!-L1Y)+FWm<hs}=%&9lw(_kBDj4f6e>MT_!l9!u-*{StKL;m+stv^6y~
zH8d>V?RebAFTbz$_qSv37IB_G+##sU#m&9A9yEpD>5_DElIp|abqDV~PmK5?0UG5>
zJv}Y9^lGTARSD=!nCW`4ZoN{e9KXc%IAz%C{(YX`F7WSb{Qs?GkInc0Jo}<J;q9%h
zE04Xn{dAJIp66B`Nuwo^o71AV<#=wj`F^MPt6cY=5AF76Hq2|~7Jv5qdAmT~-mllL
z$5rd@`aMN6_|mSkbFIyHtz5K7Y4X{BFWl|9z+)OGQ;N>dvpp_Xox_p%>dH#VFxK`}
z750o$57zn$SJW<ZdHztkbh?K7)qi?^Dnj=EKK46vq|dJ{yYhVFaXD`Ah+OHs{k6Zp
zReyWqsWv(JSkFpxg_76T*6!J3vvs<=T&2od=fJ>=2O623K|8x*4~wSl=mt$7mcG80
zSv$)#`x|I>=(8PYmV_hm!-IpFrY`F@X0O|+_Um)_{$HUSiBC>URG(LobSwD#Ded(=
za<*2v;(M#UdgW>zj=l1B+xh1Ym(9-e+MW6F(b1DBMSp&L1eXa0TUYw<9bR<2;$G!*
zQ4x_PrN2*w@0;{6d-gKH4G%YL0PW~>I^HK6tecvex>Env{r~^Iuly#Rw?nbz!x`iA
zF7@y3?%uv*$B*0h|COye3>pWy`w_H%0yO>b{@&iQx3>(FkGaHu0Bs9vQ?>o`;jm)M
zo;`a&V?l>N8?krQN|jCtj*Pqsn)o{e+U~ZsN5<0Vtu-f~zhNEIo5|bFdY^usdFe*O
z-|PGTneIAmdOc=y)z??2PMwO}oL2i(JHGRkKlhEmJ5jsK-hO*~d%8~KBp<c=Rj=0y
zIMvnt+fn%V+S+LG+J&HPf0dP$iY<kYkGYoD|Nr}awrRGQlZ08$jayr@wY9XC`Op8?
z|Nlq-MKfu>8<Ou9ZQ1hWgmOQR;+J=KfA9T%Z}plrplPKJL1iZnS6A2Q?RjT6_%CKE
z`~2+e?d|#hL4&!|r>~FMxe2^<19YHI{=Glvtl!7%F3XLKya^gDvpsX=fBt2rvbOWi
zCfk%+KD=B$f7!M<VjGNYc)I11ITr5N@nh%nd7!T9w%psGMWQRBU-=#pcRbK`*ww`H
z)u*SY7uzqsX;k&?jbv@=vR$PwFR4!SSQEWn@0W2@Udo#r8&~rE{r#O^5i~zu{r%nA
zy?;Tg^M0kRTC?WNl6kXc&9bZg<ud=Ds;cT&x#K3UG@iGL$8jjmsrhtr<+O|((7I1h
z#qQ}@z|s9$IAQ*tt(i>g&wC^@e_P<#?8L#$&bQ|7j?Bx;Ug%Us9mu=3=BE1mnxNh3
zCJX-dS-<nR{l@fqjIa}^YFcc+e#fIO$Tq^==iTLNMdteCStO+I=mzb4Yq_x4y?@oJ
zRk!Zy?Rd~6aOkqX{oKU)Zme$v4&B*V99$dpdiC9+)4E&VgBFRm38?wZ5cqp@OT*lU
zpC@=8IQPLo=c~l?^Yi0(-%gz#E9L~+_c%$vCws%g0>QKWR@v9qyf|-uzs9(w0KB8?
z*_oNbrt4yM7M-1Cx_oX~R6C#SqG?;6`mb8GD(%b+L8l$B*X>?(e{--~uT*GA$d=U8
z)5`DHnoq70>OAst`Fy#*yFtrwqPN{y-!2Z?D&oxH+{WX%bqW|<xpKv4hC!meIUoD(
z2s=ijhiiR>Wwxz9@7T<ynssw`d45RBDbOhs=Xd9qmX?}a`1|d4{`$LdF)=nZKR)dG
z>u>k-$(GU+b-!=lSDSn?{?8-v(&{#WMN4O7XJvs#+60_T-q<V+UhWsTCqF`MH;1B&
z{Yua%Ri^U%#S_0>Jkjd&%fxHB-`qu;uT8u%^Z%CPSGQ(g7jUYqtW4e>AKiBTu$}2Q
zgE!vszk+VD{JwZpeCv8G^)e<i8NEf$6Rtcw{pRN8?yfF29*KaMm_0Q=Kl#o!YZX|e
zfAN-ZCTN%^`FNkIs_Mo0OP4O)UG|nsaS2~QP}J5e(Z8p5Cmrqbc{S6v`WrL9jR0?G
z)!(nzgPCv4N_%^2>*lnxS=ZJ?-rA#lu|!!|-EYmRRk!B9J$z7anty{&MfPQuxcR;x
zPO8s$Nqz}B`)c{!koSvHGmVd3-IjNEm-+pg;zvh1x8JX;-e323(d3O6ZE~Zx<#Zmp
zaed#nty`-9ezyN#{OqdPhHimFudc3k<|vi-$esBmbIKKs?;crA8~bFfujTdEwR);e
z-uPzA!{>?frK%;w{#pe@o4i}FYuB!o{SCFJPnR(sV7#<F|Nfspe{OD0SD$=x)22;F
zyT!l1y}jMFTkQFq;&W3pgF$Q75)L%Hyu7?!KyTlVM=$hVUtb@;r{d#=#KS?-oplG#
zo<5y@byeuySQl~krEC_a36GEU-v52?`?q&@t+TGIh}~V*D{ud=&-$Ik_dCVaUtV0?
z;%;8}=txF{-(Gfs+2;B4j8acMIM_UUTcHKZkx!f({O{c|Ih*+V*BsFDXV1w&_0OI@
zb$wiCe6!l*cS&Mm;$78Kx3**kZ?6vw47_^`G~z72|A%XdWX;}1C38JgUY*kui>=w`
z|NG9-b91d1XFgoKxI^uG<{ITbi$(XoMTn`c&7ObWSxu^1BCNb^^NZ_Cchu=Gh&z{X
z_@PqN-egeIbGqJK<8(h?-?KZP&y#ktc)#bf6G!p0GoYTuo12@h%iheex>E7=YWU>(
z{kt@`P84t|EiK)9c&6RoFP8-l86KCpT%c-dYTC@s4>~{v6ki*YkAv2k{8?WAYx$~G
zt5&burS&;5C{x-e`u@^&%jZ?C;*4%{cGH>9!29{yvXgu4W-D-=*<JQ_RkhhC&<>as
z?#IV^cfZ+mT1+?U%C_9w#pi9yZ)`{eErfo(et+Hd*z%>nTr{m_`dqu4R-#q<zdSG^
zLSpGYwzkCdOX^GWs_tw|cE6kVd-Icuw%r~lpH!Z3(P>YtxyX`Z(c{9dba}adf7?S>
z?G(_oON&ABu^!Nj{+>NHxnE*WPt#rec(w;9XBmd9-02Wh_U6XMl9!h*JW$wjqW1e;
z^WtY`7A;!jIav*~EoZ?3(3anfo130~dU`s4-_K`iHtSwqUOs)=v}aGB=HA$_@Lh=T
zMIIB-IB@*GPt(<MWcVb1|L445ke~al{d#&bXra-91q+rfQ=1jGc3UgA_`A>Vu3o(g
z+VRTq@VNcIkH>nY)6dTOI{W^gv@<gdYkz<1Js0IJ`DsRSUnkS;|Nq|qUzWS@UES<$
z-DziMxh~!+eIr4w)`{cdk~^i7>rM99i22@dZ9DI*mSoBExLYmZaH7ht6S=pxWP-+R
zA9X12``DY`BCxCE<)x+G(^F4RbG6^H-0gm7Xz1RL$D~1xm$#0ox=~v?`qGZ|$<8i2
zc>&Zx@9OF*dwc8ZUcD9DW*Vovh0ECfd@@;mZb{IUidQR_yM4~}n`2RUYKmrWZOnPQ
z-#Hu)L9PA!`)cn-v-3z?SQoqdfsc6m-oO7@G8(p<{XY46^Twzx8G^oi-+w&rzn-VB
zuiu}QedF2L+1qn(i-EThEWRM|^T*@<|6i8dw+bxcJ`pUu7Bn2cwfMocwbB1S+y6J7
z7TY55?`ix$)fR)gKR>oOchCR#WqGHI(yk-6lkME)YfCs1FD`Q3`a9>w2E|?b6hNW!
z?7VZ<y-nwLYgPZ}KWrf&W&7TuzyLI~Yx4IQt9Z<WET7oJVG$8KvahdO@v+zZ9>>?U
z*?SWXHof??1hmdEs^R0}e!E?7x7}W}Fs$G1R|a!+{Pw)q5{06tQ^S|t4G9juygq)v
zz@b2Q*{>Jf<z0Jcu9><CRC@~U{qyPc{OjpgRs=#8AZ*&icB<L=-oJXLE2pDpM^x0B
zmCamth3o3!c7D)Sws&`TKR-Wz{;X=9+?ngPT-~#3)v84wkNy4i_4oe&zxSUyb;{rF
zXUd-+A1?~UE4J*b{q1HOzBWp9t|`cx@HG(|tG~ai{QPX{lqoLvfAi{GzPcjTMO^gz
z`}_4TFD>0&{$5X4w-z*MX<?OqZVu1ynja6_FHURcll}GaxIE}otgYAMtY0pfyvTdv
z<!LW>J(=X46}~(D{Ja<YUSD6o{%+mVso`z=|L>ddQ!h*Z^3<Q7C-|r>ZZ}RnHD%SR
zRWF+UY)Rc^WW%%iyWzFFQt>Q|(tP?W8w*yh?@BR}{M#VqwByI4?%-Z!_dXuQHBnnv
zy>nCZn-lR{{l*mO{5>0&EqnI4EH*Y)miOoDJ3BvrdwctJS;Y3d*e})f>*MyWTD^Mm
z*1jS;(A>@E7mNGb1VF1Pwi%@5o}RAX|I7a8lgW~Hfdw92PlXkFx0@Ay{FT#G|Nncv
zzxMhK8w{dKclyt_3!GoYT()Un=wc@h^*IHHCi~k-I=#5HHG9?bC7zQni5)+6{(S$x
z{Zeu4)}Led{CL#;^XJb}>;HeA+vkVO&*1ug>(;hh>7{2Qn9ACAdqlHae}2C3+m!0d
z%Y41BiFX=X@(6F(d_thmV$YIm+`W7jO0~bfbk_aw*}Z3N_4jv~-4QP9_I|qs>d=39
za8TB|?9BP|?=LKL{{QbUXgS-*$H&bLQcu`^zXRIbxM$BEP=Dg9U%{<~&g~f?xA#<j
zez*VsKOXtOEo?6z9BjV4?|1g=dwZ+DR4jfkYLRpF6X%3u(X)3{)S4Y!yZzp;TiNTs
zzP!BLcea_UtLxuiUuWCb*KJu6wpZBqdf9Q=a?lx;d#k=?U0)ZQnD|h0`JAHrwckN!
zjNQs!fA;)&P%1n%ReSNg@VLsSe|~;u=aadyulDz%MT<ZSBfh=4`BjhY+LKrw8H<9C
zkB&OG@mT)*@%ZznPkolpW^n88DLB>xULT&7J@ZTDx0~sp>9<LfCN(uRJ$m%$U^BaZ
z)E14XJS#ol8?MjJ9|o;r0(H(eZ1}KZabK6%iq)$#+Y@CBI+sM=Vq|y_@Nx3|KTj66
z%SoB#+yE^l%DK+eE}OoAk>SCE;%76{)r2~?h{Og43a(u%JNGynp8;qXp3m-LO<MWg
z((6x7PTrn-dt34IbFAEAF$D)fCx!&x-j)m6JDDTS(XQ-#1Z2?W!pFxh``g!o)?T~?
zEj6*JEZWi`UeLgKgMpzSKucf0{@2Up>i+ZgKsq9Qvew^TUw{8>cK*Lh)Avnzd+C9J
kDr6ujrg;Po&^P~Qf4<?4U8-_EXjY2B)78&qol`;+0I|&4SpWb4

literal 20690
zcmeAS@N?(olHy`uVBq!ia0y~yU^Zf4V3OxxV_;xdUGTDyfgxa~r;B4q#jUq@IroH?
z?)_h%Q+8s{((ltIK3jW6NIN%Z^QBFzw{O`s_3o!d3up4MholL(Fbamah)ABXEOzo~
zo#Xr95yy(gC6<eoIvTo+I3Inlw?F;gYJcswo6qO$eaJBT@aL};Z`j{_e6#mj_WtYN
z_b$7u#mK<Ga3IZ1dolw914DxnGl=Tq0g)O;AW}pUL<XdQNQa3a)eBC5*eFU}L0(`$
zRtnLKtQ4Y|flQ^yPJ);UaZ<u*U8C)Hio9Rl+?*acT~OI=%L>jv9}e?R|GHDY{?ErZ
zAH`!!0?TU8&N2=C_;0#S<fL_bzuhva`0(J$a{u`z1rH9CoHf0kvVHn#>-T#mzuWnI
zUdgq{^ric1e=kcs+_rT0`+e4HV|Q;mT7OWk`pw3YZ#UD^=U1PYp!o7?c>Gi==}uAY
zu$22t?f<^?Kl|_2$5|h5<?s9XOzn5g`n}(xW|`;TW2=1o^=HjhgIu%a-&rMDK67;S
zC~anp*in!uWsu--@_Y5GmCKj8c8g8@Y84(|TYB>PkI(b}-&wRj=JYh((_B6e)B7&8
zC~e+n^Zico%O{il{rGLaOi)^U;(&tWvx!QZH+CKp`g9~=z3OD~nHt4U1AJYdeLAgw
z{@&@*>#@^wJtOv3l~(<HI{osEjmevD>+gQENiF{IN%i?><fNlZ=2g8~`Nni!-LIE#
zs_w{r-yV{*L2~}bN#1&%+TrWY*jei=m}MMp7BZ1hq4|umhEdm<wCI$3wcqdNWFNY)
z(0TKJ@%oScmQSa=`S<8|*Um>>+MCV`t1g?MVXSLw^T_yk`kvS8cB}NVPEle$p)kY!
z#-uKvHAnxwj{iT4ciH0~J(vCMZT~*iuV49n&W$30<9SEInk6ibzY8!tvF||>_a$L}
zo51`AyUI^b-n>7)PV(aoZv8zQ?mceWey>XVZpGu?n;Ut5-rxVQP5RQMrQWY?-d*z6
z_f4Nu=vMV~YWSwdiod?To-P}HDaLi#obQGADxas$-?@X~=-ZD~=WRaw%)PDdH)qD(
zxRV?8T6<(HjlRCRy87hf7&cz1DR0}tVk#c}%3MBoTI{KreU%S<W|?SyR<z5u`F76w
zz0T{1*##>1+<oTHC^NkOI6uDf=~UJJXwB<;+wK*=D}VU(+wJ`Pu<ehGkA8k+eDqJx
zdExole|~)28ePB4Z|<zWy&CiC|2&r8e3!L*rgeFq*rT6?0e^pgKflkq{{Qd$*MC-r
zo&2L!9%b(K=9$E9t!I}XESsHoYFXh&rd10?S$`hVu}ikRysyH1sn>Gfn)gZ5PgnSz
znf;@FPy55e?aKofyP5uq^nSZ)=K1T(_BlU2_xkJ48tLp?N3K2eDs$KSy3g#{_vM#<
z#=ZN#Xj1B}Uuns~byH_&ODoBrn#(uUXT8^^m3MenO-l_4);2OfVxm<(Z~E)Nr8hV2
z`^?eh<Lau$|2^n<*pcZ9Pqe34?#mHmI@;fRu<Rk@#_e~@qEBA$IWF4u|JUpF>GAV|
z`g>W!5*a%dbQ~AkcYn{0HTe@Yzt`R=JTCi{%bUgL{k^^3d7>82=M--Wh!%fTs?77)
zbXHZ&pSOm|$ENJDzwfGRpSNV=ONn_$Jej<F+c|~RrtF=dVam~Sn_FCD>W=Fds`J*I
zFqt0x(?O?c=M|IA%D9?ppTibEo&02GPqhwuB@}(`+SBT=cYju;M47vs*&SgZYHwdw
z8*^Xj@`Ww>rW$(Xw~qYjZO#AqI7vZT{YV5zyn0<q-1_s+Z%15SwzKQZv0wi}KHb#%
z88>COPj+8U&YAUIMY+yToOfPcc;|TNr|sG&7i2}cnI3DK;r8_0XD^#oD$%o&<s2uj
zTydi1Mqz!;%4A<B9vi0zv-9^^?*6-K^}1Ec#s5<4pG<V$#8@)tqI#ci$L-{|4g%Z@
zGG1@IFvGsS?%S-@VQWv#6Xssrt-o)_x#{-vtgF7f`0}V*f7$l?b=JRwdZf+ucH3U_
z6R|gA`}R>T`K{BwwuN(=|8k#p|LCy#&i9Xpxb;)6C%<*`WAXcE{;0G#X|3BGvFyNW
ztr5XO=j%TlWS6duVF^lRocO*>?DMLXFK7HN`1kX9dbUHd)sggb%^#OekIO1j6g()l
z@cRK~{uD-;=KV9CfBf^~_UlJ+$?q-iZ`C<>A#6|E8p}JU?1L-*_t*bXj_#JZtaksc
z{=d1F#cAJaJI+@<o0*=rUbJ8H&L*+TD(BXT^>@Gb_%pXg>WJhkt-td^jrMEJyv93G
zzvI#0qldJf<<`7^^m%>$#LSFa)1^eFTjzfIF8%#zP3L~u^C$h+Tnw40d1mUP*wa$g
z=ck?lCFR*ww?cwaK#6$L`7m%Ijw((`POb}`lznzC|5LwquVvw?H$yzxLN6Pw5|Pf5
znreMs`(sAzrqtSJf+90Fb^Fvzd~{l-U$d0QE$30ceh5dLM)5}{TQg-bzDJKg{+!kH
zL&@|@YTUk>N->R%(IGXB+?}nlIefaN4D(I1!<w1<N;Qq9?&+@yUwh-!L}6FeWx+ZM
zk55O%tm`T_*KYs)Zuk1KS!Sg*J}m{w%@=p<t;j7}`BLJ&wb-|fr`<kEwtq^_P3jH_
z=I*SxvnBKLE8pW0zlASrE-PR9&eZI^x9H4gCEEXezTdB}pL_f96Q07~Yq#Hvdj3r_
zcv;GJ`(G~>`<{DVQ(Ubh(Dgqq`2Oai=iy5)?^?LF?&FLbtP6L{c`|Q({zOgFx6)E;
z%uREX{vH3_`!nW`T+%+1@QIpV=V{Np{`%L?Rf*sby!L(hr1N1Xzc0Fz3J*a@G4RxG
z-K71onV=BNO7>k`^t^KE>ElJ4m+piH;2Ktq=TnYee8!y`x2K|T?sd+@$KNtOSyry@
z_paf5d|Ise^P%s8%zt)(N&@|#_dajsEq-=prLf4-KJ&==v8T0a<u`_@TwC(wclq&I
z{Sg+=c6%IIC*in8RY%9{snG8;&$#b!v@Pp+8F)@?=8dAuH%iXLFE7}zWgYkHp!e4C
z!C&5NJ|FfbF8TAmkcpXo+drKT*>k^cU$n!TRK1jAJ(4%ye#~EaNs@clkG=ChD90y=
z_<b(gpZ@gRdavcBnn{kgFMR2F@2Dekefx<iv%c~2cFmq;_S4CFTb7vc{o^%95C0OI
zd91HiJ3Vgw>*uT6GJ`WG-jujM-^(s~>HY4aX_;py_V)e^^<0;`qDG_KYRcywUjNFL
zN{0BJUeT8EE;Hnv&H9p`5qaB8uG=&z1(!|i)X%nzpHTnr^Ze2jy}Gp#4oBX$Mr$T1
zPRl=}TeJ4Zrek?5jy0ZL<(IQ^&c?`zEUi;csqOMv`d-;=?X%yh$D<}hSR5;q{Q0Fv
zfh)=WvRB<<)n&;t8PnG%=H&2Jt~{9d@Qc2Ry_*{UyS8P?IYoW9XInXJT-oX}WqZ((
zFK5~HrtZ3aV~>7l%B;(4@BDuB*duA)@weyg|JO`r-YC&<RP9fjUHVF;%U8ayx$`sh
z<rY2HeZNy4c8YLyR>t{F>@;oU6`dJ$dEY|E!r;qld<^<qcPIMQn9p8(VoDV#FP8oV
zwPmz)jiev{4nCQ_`b%}-Iit0fn>HHm*E+eCcPc0^7RiO5yuZ$*c~z$5Re`g%y{G0r
zpQ<N+`s}o4v)ltxPU#u#{PL7<n|9=hXGaQ^3VYmhj&_{w{kY@CEW!A1HE$!GvsoYc
zh_~)<+_VrJD))7yvqh9`KHWW@BwQdfM?T&D<n={UHaDEU9`@w-v}5;ImuUOR+>x4|
z^eOgXl5kOQ^M)Jqw_0vHab$~TY1Hc(`+n|wKXq4lCMelRecE@YG2+bW8?_2~-L-#{
zbFc6Hes6Z`zb01hYdcR&vEHvW_gZi6#Ll~oUOS`rxEkEQzh_6!ajCt%fhnI@vmWld
zEuGZu^RaqeNnC!H(&d=T%PuKxHr!`FwPsuC-#C+PNHxZDP>rz>Tw|PFkQL>&_WYJL
zPim)FpVyAHn>JhbtZDy(6H{VSoYq)8jgvVLdhXfBQca_y&9C-29GPw$w|wJYlh67k
zAN2yy#-?c1$~)@tBtMjL6v+1f6wsrqS!m0o8Mk@POR3BH^ThklZ+{lzweNSt!<*Gy
z%-Zkt(*?xd+g_U`vj08DEeRGA(PcZ8k1luVWz1c}+gs50Km1kPQrBgcccbn%M##O}
zc-!Hl-MOE2znAi@$$15?I!mIaD=kah<F@kU%=)%NtRZoy9v`^kl$5yFu*=8B?pgiH
zWYtFvAD>ydE&E{`ZnfUrbz<ha%*`9Sd{i%QnRNb|%>C|1NM(yLsEKj3fAy36@L7&m
zxAa8K&x}0zTcdnl_^uiq^XOUGa*h)<cb{PSsmbxUJRnIp%|j;s$d5_SpEH~<_tXl1
zX2I;X?|#Y}!){iuhx%vRUmRsOJ+`u$`}`b1?q!c^51EHdT=}Jz<>yBOL7vuZ`$yje
zxct9I^8IA8ows}4k!2q$dMzRhxA8qa{_n~1aGg83D>lBIA>SaqyYPvkRP^*ucdcew
zO)xB1xiizYy6l;sg+TL!N51!_?tmnhYvl^l)|>XVI`8SfQS|+obbiWvbFo>4@pJm?
zW5Xms$puuW%{Nh67GJZhqUOxwin@>c&i{O`ezc~a*)93j@g|*rel_W4B@<7EYdu-2
zS?1gIUAp<^pDTNw*4Te;&99kvXzdc-z0bX#e_f_wbk;Q2ENtS+l38J&Zl3xZ7qSVQ
zoZeUmdag4qx0_-ut2HxjYD%ywxZnxOSphD1jx6@twAAvMpvcUwE}u8s=j3nGFRlA<
zD0Z>dM9X;P=vP9!Q=T4CP~W#@shs%QeIFid;*Ctvs+4!!x6@p2qkX2|!^TS9x65N~
z{(f#POrHM7*!<gUzMp?I_le9uyJq7{iTCRE4cmSMS^UV!QVh(h|Mp?c2Oq7eJGSe&
zF57u0t1mYBle|yvi=Z8^<5j`+lIrDMib=h3`<AWyAe8+hY5J){tRXrA-hbb@%lJq?
ztx6V|X|dR^?$q6&>4y&HJBz%VFE@YVk2Cd4w`GMHtv3&yxH9FI$?^9ma?ZT>SDn9h
z(tFpPr?)&iaqQSW;i*q<wVh}$TJF3PoR%hm(o$9juOYZ}sR;4hW^x=<`UXxsX&!!D
z>i7iv`#KWYTf6+FV>-`l6jNZER_OQj#u>@k*2#$`wZFbp?PhQJQ8=NRr{K~1<G(vL
zpA(sHSKur*)9ar5zMtmHg7>&R>~}o+_UEkqhi*$N*}qt4+2vC+&0=fPp7($Le5wC^
z`~Isx8)7UQr5z=H&v?FkQP1^mNB5PrUD8n=E*{5bEwtcGX-VA^w^BY<=lb4KO{23<
zul?dX()6Qx<1+nw?q_COEU$m_PW<<m=h+87$tSHY5kFV-JbdTnT`P@*_n(;eKdGu_
z*^hP8qa2RTIx3-aq}*PH!~C&v&Kf0Qmyaj--}QR$nP2ldbxosgx_SQx_4>Mh%B8(~
zpL;#~x=hPxs?TiS*c7d~%ch?DUbJ6)s?OzW=F+oIz3`eAiq=*-ZL~_!><VggpJ&6#
z42=&BS&H_qnJbxtd}mLy&Ncg*KDl>VV1p8Kga<>aVD|r{{cj?YC9|26b5q_3&3Yds
z6kX^$dzyLfs;`?*x}DZ=n8>K3(y-}7{hf7JRLk78(#sVt9|Ie~AR-yz!5}^LM!mIP
zy4>$M#}D4$_`mW^x7>cN_?kO2GA|b`^W9o!b~8hgm8kIpT!RA0Lk%e76v(DR#t*P7
zHPL>*Y-|3W3D@-MR=qr(eg5zL!`Ek}Jl#;{>vh>`iOI_Y%k<0tt1Z2nKQoO1G@@~8
zouT%$CFOB-JKmhWWge&Vx^(~7wfRebzu#ZKxTcLy)@q4m@v|kz<?C(!z6#&(_5a`d
z|MR}xNbWB=#HqgIdR%qxWuF_%|IfM9y7tumJGXC^=sx}%Q~&qt%g^)w*UYjm&wKlJ
zb=X?3`+wh+Up~?)e7ReH-;A<*mCv`Not@QY{p{A(%=xw7Jm22hD$D>Hgt?K^edg2A
zsiHHlKM%XwV9mCaUA82^tl;l4^L-QbcD-1%CFka*FRxawU)CpUy=-y6UDaiu8|wP&
z*PQUlOW!r^W8J;l?{`any<EQh{-0;&Tfg&4nP`;0`x7Uqv;2h5Tq{%Asuv3*HKTVF
zEEHf^aDt&hDY*W?-|zeX+y4DDeZP-w^|v$EUi|;_-2VKv5AsDP6kl@3|8SDBsVI0`
zyl(fqU0c52t4_~8?cOUjb+4O1GpMg`So^U%-sktb-T7AU_k7Np|M|t@{<LV0q=vsQ
z{p+oybDrx&ZPD2McJ20iw{u_D|NmZpnZNEs^PAFtAKLAgnBS{d{B0v>2<X?j@_m!-
zzVAH0bX)H2b^pcMd1F$dPL=%q_kI6-xvCcn-~8WxuPXan<R_t`PbbyWSyz4d%aXtU
zZ&_HDn$L_0X=i3k-22`9e$D1LN|%><PdCZAvEj?_`~U6c*8ltI1Rfdd@_ETv&Qqno
z@5iH@2fr`R|7TMB_jSC#zs*M%vk&)jx8J?g#L9ii{{PSOISMk(w<5g%ZJz&kP1%9k
z$Gzsu;{X4;elx+eO!L>R>-(nJ+DTXzrI;<?eY7>%_qS+x%*1(>&t_V^e?B!l&U5$s
zebu|(Zo9qd`pf(K>sPH@dGcNL!&dQ4cc=dQwtfH7>+$urrSI?A?hdJ*n8|s$^gsh+
z=9d>2Z~jmAKAmp=`{tJ$$^EI<7t7fG{c`#1J%5AjYin}4PlfOQb+zPv?f0wq?uAV3
zbP{|2{pa)f{@d@@Ro}|r|98_`8)#hSAK3eRUiGzmb;l&rm&n;xU5V>A{j_o0-#f+U
z&u-Iua;N(J-mT2<tl#f3mQMNq(!c%`+w;~xkHq(%*e1ntdb<AkZ5Dri9Jlvd8@>J9
zwJZL!Of+}D-*j5fd;jmd@3ZD_PdeJQ<+R@JC0AF6FZb5pYa(0yX5(4wL~|#QPfWee
z86KCJ{P+L<f3Np%dYtg+ugk}u-?s0M{q#5G-oN;t7C#;|UpC+W^X!}asnIEqBxe74
ze>D5ZM$=YT4p6cWNXcTq!!A?c@b>LS@AC&5nX}g)*K^{x|5Nbl_V#=!!z7otZ#T9o
z$QQ70yk@uVcv!~`SIx5cE?q&x9ePJEE%n~)_-)<pce8R+I~wbF3cB?-ru}=f`TV(U
zDmi(s%T#@?YuFX?_=)t(|NGGX@>ceGU-h{qLEE-2311&~GudnJ{EGV@B^P(_NE$79
zqa+-?pzD72K2}bMyI%3^NZxSsN`rOZd5t@Z1kUTq6g)q&F4%BIZ?bw)`LY@T>En$u
zbw3_{m6Jc-bL8flM<0J0iXA;7*6_H`I&ZVKk#6d)V;k)rAMZckcfIoE(&?A}{eC|m
zJjxX>EV6HY!nZd!ZyK8S+kEnvWmj9ZjWy%hnVG&~Gk<=5p1%9$QA0D!&(F@D?mMk%
zvyL6&pi^=M_TTRm-!$Ye{7@$2BiDTUMysZTahgxrsyh-ndXbw{tkWL9TD{({_Sct{
zcXc(&TyJHs-@B=Kdvbx)@$4h-{6EerlyT>okStJr-u8RS_t?^_p*Iax4IkA0d^&ye
z??(&1-gqQ%JbBIc$epW}oXFw3Y#L|XSK9fUt?-W5p1L}Q`iH+ebuB*bOrE}T&J8UQ
zb&-zWQrbovt7CqDd;9vXoa@_3-g<&;5VvZ{7qEkRQ<uZ{eVUr1U{mtq!W)SWg*tVW
zvj3fqg)%E-kJmiett2WZFBmP_XY=XAo5I2k)jVgCdu#n<`UTl19GCiW<Cg$?&+{W6
zmG1w(_kEM;-~E5z=D(IT-n3I}=Gnwu#~v@PvH5x>_~zFeOLfj2nil+{?$hskmCxti
z`u*7U{|k5fQ|F4NiO+oG?X#Jg-^So>$z|WK|LSr=yY@3Obn!4~7`=9t5x%&){Qa9v
zH#aqQoe{H-1xuv3rhV=Da_*1&X&s~fCFS9HGm<&y)qFbnMr8lSWA=VGrT4oF?KaKb
zrc-CK)O)-3`Xe!48mG+Lf3k#?LBoh4KrlOOX?ghGn_p7Ta$Q~}G|g`ArPGsV@6)L~
zvxNzyfPn#&Wf>S4@YQ4t3{a)WgY%F|4>F97Qa=(iI1lnN1Gv^>0M|Bf2OuifhF56^
zw@gu!opGskt=aw4x$R50JTd$%1ajvAvBO_;&E5+IN2GxrmcYGGb;^uCOkgK<^(Zl8
zGC<}!Obko`t6!ihHj|V2o9}ksKT&gU%SnE_d1e3U`sKn_a+CGm|Ni^s+YNq`%KLYl
zzk3~*udmsAI(zZ9+)t*Wk`QBkBBx(XzCZui{7vy;W?bE84t?I{Ki67jww&$f&6!7M
z@GtfLEjQ&~$*1qL_N@O=RQjL!+RMr=9&kXpE<1QywU*CI{F|M{@9i5-_}JB+H(7SQ
z{z_c^+ordh)@o!Q&%OGhwsrmAFP8ItY_o4Io0@EQ<4Lxb5jcdeHJ0D6(_S0<C#356
z({}UQr*EmAUs8U3&59K#bY%CQt&aBlRlD48;?LU;%w9gSvtP0I->((9o7S3n>)+UQ
z@i$le?=yM!-ofoPJJ(zfNdYxE7}hXni`(ssoIc}_QLyL4m3LX!tT?g9=2QG7PT5;a
z)z9CwTr2Z?Vad~z_kG{?|96fu%G&Mr{`CC+e|G+#wX4+1>UQbcIS8Y47an;s{h5YQ
zVP(RLcPq=E|M!fw{;PLeSEnz(GdX(JE`7c0U;g`jn{~8)`O4>-5HE?${HkTtIz9Z{
z?}%5i#VI$xe|`OFw#oA~*Lwe*Jr)-{kr9;S7Muu)Nr|)hF1*tJx$@<v&t)F@F&liX
z_8(uEc_}kK|I@x5I`W?%zPT}d$NCTXCSPq_*+8kJL22{7=#)J9A6KvJefFXxXnEPo
zS#`gcZx!FSe$DSVnVrd28<TfWs$ael;g)H|jG%aGc&2G|J>JghZqZ%U(!AZj9({iO
zXV%}B%fGJP`@YrvosToyH-FQ)R%zSM*Z%0c^?Tlq{2-}<qV;d~dAsrZua28D?{3|5
z-s#sX_x{=RqH|>z4>+i&#HWPqGq8;<f5{rTP(#n?Igjj&XFjtd-dhIx{QCB4-ZH;k
zTDsNm4z5|iBXz-icmB~k?LptysV|c(kDWiwuDD*xw8mrC$5Vx>|G$Nb&z-(b!u*h!
zBsd?fSaTw#=HSiDJ0}uv?mz0fEPearOWFSSqTgF?UUqMa8;^9zr>}Bz|4cV2xuCiA
zdVI~7byt?BZ?EdF{?r(~>3YA?k`v(E@8IgHrmwZJbjgMjdw#z8yE3_*?|*m%+cLem
z*_R@n{e1P$?^ECXe*N{hId6^3KCL-;^IdY`eeGhk^zU~I52wD|Wcd8b%_`yO>S^}z
z+c)q?+bH<Xt5}*0&2gWUf_KagPn=l!>R|Nc&RDDT|Hr~6f1DmW!SC&T^|dq2jeU0{
z`MkQYuy)(;h&$rDm$@2tKKI@8Y|)#oT=BbWe}8+~yZrs?z5m|y+z#G8+3!SIb}-1`
zP>+5&+e;S~I={YSmU?Q+mg92OOV)0`XC+&DB{1{qs!-`Kb_@+loAq3m{n+$t>CBbE
z$NMrL+Amu&d;Km0>$+thEM{wH2WI8X52)()NzCk=%po#e)|IV6DVTLl%ub`yx3^5U
zW?fz7H!*TwP35i3<#R8!a0*{~bhP{O!*=<3zrMfUfAAeMgND)B(3C|TN5W>@5PiEZ
zdc9|}zwOR%`)=gw=o=OGoSC_te?@WG=6klU<diS3FYh<0e&q{F63cF7+}@UJ^{7L6
zQ$2V0nI)c+FYWvNF1ot*+nbx2(__nSCVXdP5Sdx3Yh?UxpWWB@myNZR@@>C6?D}+Q
z;@0TIM>#1QZ{M8KS%1#!%+~3Br}~zj^!9yYX14din$*Iq=ZBK>leo`LQeUoV#2_*=
zEBRm(>&^Y9rlzG`XF3FxgLap_&02q4FJa@=w40k!rJsO<$u2r2Z{M%}mEqsl-DLdx
ze6HyZzn>LlzjANBKT)#o<rL95hwe9Z8c&*gYkzs!qy`J+s5z4?_vgPmT%M+Sf<K8V
zAZ68zM@q&S7ZzMu8NA%8|7O<ewU-vTcAK53Wnge!rWzw6$<TF1P0#3XJHP*NnPQ)}
zUu#~U>=M;BJ5kGPSpV;jN%1ouadC0+>0&jRo~~+k{@z+fe@Y5(ewZ%tUd!lgM2ec<
zoEdx;4;pU0-kf&UYi;!Qd!R7NnYhvQ7$_^8sS=-Qk(YX0w%q6O@&5I;ha=`Js&u}~
z&Jd6yA_<Dt&G%f@W|-whWnEt8d#mhr?q=iJM_*1he*$u!8?18BFoM+}@cIJI0LwLR
zNr>{A%y(zc-Q90DOqphr7N=zdsvx_1lr|qNsGU9SVf++HP<;bdy}9*(Bb37<BFP6*
z>M+r;V~y3l+TUxplzj}Iy|p@fy3OQ!v74KkLB&=;ipWge)~(#~`GWskXFgN_6@Vg=
zu5HcHJ<a~}HWyocJrWPDs{Y-0{$NYdoA#{tyO-Vn_wvW%{`Gr#UjFK?IzQha;eN^O
z1M8>Oygl~r@d71gh6cwaN{dev6wH;WNx$<u==-e2m%l8W?|s_6UZeM2sYjUIr{C@+
z|2?Z}Dqc@IA8Y&VZ{{D(->Iz2cdRiflV=x|WMJS?5(PQp_0tE70&k~BCw+R&&wpEY
z?%lu3w_dNlw#C=mJwN!<&gow)HDYi4e(>i0qfM7?)XIdUFfbT&grtZ_@~xZRoPBJc
z{?}DM=bI$oc)lt2)UmJs0(bqJ@_pNhBVx1q-u8PxFZ*Bk>e9y>YaZ+=`p38R{8{dE
zSO4w{UXSXCG-c}wQ1NP+rm|nBDk-b%KX?C~nQc68=4V-z`YfM+P5*4|kG@Uw%D*0c
zetoCyyxkjR_T-;<x3c#4_kVfsXDMHPz4)HC;`yE*DWD4A&3z5azyd$(j~!p$?bvlb
zf6W>VBhZil+r+LOrQlY+xYCQ2FS`nrH~n8<Rao`v_Uh_cRxej=d9#y!^CzPWhx=;t
z#itjy@%a=l^S$$e&(8GE<eTr3i_hQHTr2gtvE+1S^>Rn`f1BbWJ5S%@zCYcn)Iv|!
zuHM?!Z6YHBC|QZjJbU$^y8I5+uW!nN!lEAiTC>u8so-I@&_8>29Qk^E-k$Qs))lWa
z<4^Y`pIpDc`eE40!yk`Nwb4uax`TfS-`oEarp0a8s<ik7s2K;EV1M@N!G?p(m!5Qn
zUyJ+ydaL+yP?e;7UTV(UsIqGlf-h?qtC|1YV?KYced(JE5C3~zIB;cYvG2D%)6?&7
z4wUH$)-QiuQd0G5j(=)&$~%c4Q?G3QoWEJuh=2ai_bdN%?)TK6`}^aTq%-Z8=KroW
z?|o4i;hO3S$~ll&o~c#z;ilyF<$3CJCxq=Y*!yi8XLygNtko8O<9EC4Zr>`pyHq@7
z>C%<YZNENP`|^n1+|BimD<A9cR+xKhzkif*{xi42f>#stmrvgP=i6T9U%O@JJ?s5b
z@wRrd-(B^uukM&#E%{Lks`g*@a&HSv0p*MZCp3&^uWl|ZsFQgfRB`)kc<c<>*OTJ=
zH(lCS``c>wgsD|=A5*`28wW4%lf8VX-Uptajwx*VDf8y~ZueQXUzb^Jzgzy=H1oqs
z_3Lr*|Nb<8?ebaqTr_?W!_jGTz50<eA#dEH8PhC6KTc9#esyQ<OXKh9yE}Ym{&>FT
zNKEz16>oM-UpCMD@w?Z-|Ltry2b=HzdWZStDQ@>`H#Z%Il>$AEPpYgfzCX0yd?@JC
zjhQdEr|wt2ymMJ_nA@lOotdvH&Ex+bzp}de`WqESP!Sw=6UoVwn!!1$!EuSB3cKYA
zM^IDS^_apXg-OjaA`>9VmTe;2M42a`OwYih#H(a^!Vz5PXc)mHK#3(mAX6Yy;G`p;
z(?mvw0}ul+^+>o#xk$MbO#qim1|23HPZZcKSAbUTG<!6A@D)vHMz#`MQh}@#mE<c5
z0Il+Y*r)(4KX@Uo0TqUDgF*U{4aRU6*lVCt1goDx1q-NXf*S35OoaifZ@}3N<V~<~
zEsz2c5riP;6itAHEyy)sk1jmHzyNa2$`b_;e}ZIS0RWOf3BRC;4<Y^p$((eA1OSGO
z;Aq1Lz^)!he6BcwH*6N1fVvY@KY>bgh%OFLB0!E0YPeJBFeKH0d@!jQlAJ&>0QLaf
zMo=0cI-(ocA`Enwda~!;TUdEJQB$|_%pMj{YJntBaF;I1t-x(2BT@)%6KGLqX!Zap
z0i`Gr$%mYvFgWGNS0vDN#_Gai_kNR-7Z<Fy`_42{eY^3vT;}Vw+x^^SOGBczW?iki
z&+h70!2}9knI{VDy4|8PzotDsHFah7^>r&lSBI5$o%!_i^kqNmw^KlacVhZ+bNKB4
ze7Lf|{{Poq{9QdCSVSZtmPd(c8=ZB!y)E~$pu5b(ztIsXYQD3ke7jqI|JLix>F51+
zzu9!U>Mpyhnw~J*o7s8NKek$Jzgu+c9=H<(>KnR!vC!FD_3zuwx8?lzv}No6eEh1n
zzW0{d?*Au?%lwwl`|-NO>3H%b`QI{g!utMB_y7)>r{LU{!>JRoLE-oIWy_ZNO_VfD
za>=^BF81x$8@4*zZs|mC%bENB@U+;Ju>RV5srplOk%F2!M#bl{=K8(-^m@&nHBz4&
z-x#Ks{f<0&eaEi<j4%Dw>-B&Cw|;zY`mXCTbC>b|jft}R<(yfv-g(m`6&prSIs%2Z
zmQl38QP;E!3mjJlFZU~z+k7=FdZ}x-*xJvvH#T*jLG)oOx_us|zI>4zbo|@#rRB%3
z?*03+*luRkwJpBd$7>9B@5{LI?dG%@fBv<%znymO-M?T&1O`mJIjQ^1hlhtR8y=Sl
zuC~3uX;aeCuF`$$6`!4-KR@l<oSTdE{m;ysd9R}GGvC?k^QtY*&is1sdF1p7T}_j6
zLtlQM>(+fnIsM%<Kl_*BU-SCcnt*z?-+s;7_}+VO)<5;L65n>_Oy5+vw|>ex`=y_B
zjdbtYicinlxn+stlU>gew#HeUaOBJT;oVa%I<q$fvuzo-{`a<&YkD<%Tv?vqQa!)s
zd(?tsU1uf*-cCJ!%BJqy^4E9%!Mnk>Yd?JZ-S%eQt?l*icg(zJ>b~r{dv3C>Ktk@0
ztq(yZ%NEx(Z#|<zpVNKT?`9n9myfUhw$Z}%^RGMUjCwZG+D6^y{%5?p|8)Q5FNyQl
zm)|%2yMMCn`VZTdx9#7)`sSVD@>6|lJB8VJtUgZvTIMM{uR+G7V+uH1Ze|sonQ&`M
z=1Sk$W|_Bhx1T+iBepy&?*4C`1Fh*Rl{P=vBYR0x{ZIL<+G~2M^DnPIoxf=^lgPa?
zaFb#?qDgV}-61s_M#~HVO-Q{WFi(2T2_5Ob53g)LJ@Ka4w4nZYyJv@Qu1n633D~~i
z$ja%-<(rqE`x&y$^;utIyl1WT|7&8B4t&u1qKikt#=K_Y&cXxc-v7eRtlIc~?%X#=
zCt5EPw%(@wGpg{z|C6zGvv-x)n9aTW_Jc|OljE)G3hcZ}kQC2wD7b=YLNh4bKy?!L
zhteG}^*>*foGzVyW#`+@-)c)4)xUe0&b3+wYTT@jn-h2Kvs!F*;H$ffqhG~b)|_ix
zk`QH(zUc!?0>nFfT1F0^Tc4j2U$*?U-d=^<XIE{Qux7^ZfLHgv9x-{m?)BwM>F@j(
zxAA+L$62mB`TXp>s$i?7`RCW{(W(D;@QvYNHhzJ53_M`hf$H3heINGxXxe;v$Hc>%
z>bbd3E}r~<QX8Mu(r>@I`|nNt_2tdAC!b&aJ@9n$_ho&uR!dAjr*Zi+uuX*d$Hc$F
z?P5p$`du&fShYUslh*T|-fOx1Th*H|vFP|DZSUmstG};4Q10*1>~9-c!DIlAX;9^|
z;OF7ZJ0|#mdd1rg9p93F-)`^6FSWgP>py&Y^*DHQa_Tdm-6$Fo4vWvctv=<RlJN0k
zQ-V|CW*S?9`jNI3$4fy$jBrxU93|o7C+1ZC`fu&=zt#QSnQiM#zV6v^X7<<ZC0_UA
zUcL{`6Pb5N803^L9&i&tdi{wb8x8LISt<JJTVzMh$=bQ{`O)Ox-(Jo8SNT7_bvdX6
zd%6HpjT|_pYgAbIsWJM}%D>gi_2y=8GM;i&bLQ--d-3aJX1{F;t2yZ}q+r7+;Q}hy
zL6xn7O{+SCr3NS|@@W|f%mcZ$D1Z+nEzrf&viyU{yat&m%}}?1WD&{bp{5NeF+E)X
zD)SefaH!z&f5>^l(Q*Z(9w*XhP-KOqFg(<>VLagoDiMnUCN{F}fLaWaZBb%Iu}V}D
zWEDeE0H|Fg6Oxj^sE6bb-EL7y4u6nwmKyA!WX|ErhU#t(|A(9oU@w9~0PaVGRS1VL
zwk-d^q5w7qlpYTSS8(}HNdk|S?x@^dej8M~`tY3syW44E;|Gy>hl;x&{xZ`&ykEl@
zEgYjcWTrr}nt}+(g`BQ!&2T?}{J**nI_P*M?d?72fa8M&GN9r|fgK#a=aIUrC=&jB
zc^~=y75_N*a!KF*)y20bAGV*$Hs{0ZkjwvWlw{s-zBH@w{rbK6pcenVx&PHmUsmMo
z+VlSV<yRZuKS%Q(U!D#7eYxL_SN^x$Um|S1ZFO-ps1tkmaGCe%V>K$V7d}rYx!jPS
z^8Kvr#$?b)V%5hx{wd$*UjDlQ)Cns_OCRj!^O~=Jy{{iz`KkQ1$jr#}caz#me^<W!
zTB&S5?^lW4rYE1@OYTXRy8b$W4;~r{Hm%D)Jb7@e@WajGf2O>aFZW+M^m*I*;FNb#
zTf%q!dGh~z^6ZV#H$NVipC8uy(_!v&eaWq{Qn$W7&%8Ck)&h}?K=CoTS^C~V4fSTf
zU!hf*Pt-3x3Y1?Pobs>gmfg!&|Jbio*8Ki<eED{_`X%3ffja7Utz_rsuAEtaZ+`)*
zPtvzqSyVony0W}KZ>9J1StU1T6kj?ezU=zF?}wDjmnky;UOvlD_VUZ+U%iXv_btBn
zdHw#Hl3h=Cd9ONjaJJvyw6}%&M_r$tbBsU9_elSq(&nl=?63Bot4*&l1a-I8%5E2B
zhh!wn6_A`1nagKq{`lK}ZNu2jO`E>V{C)X@v7h(kb$c~-=U?r<9DG0S?&T|<;Y(A`
z+s=5m=iw)l42S!-p1VEV`R)?;CZhmd*Jo4o_Mf}w{}I$^xU^<-nQwaO@65N!@mAj-
z-;6vZWfkdQqtEx}IV>GPbIs~I2Y2W1QR=lVkNEaKGC1E$xs5mL!$$G=f-))dqKI$#
zTSaCb^8P3`(?U$GOnSEEoBP@4qs$Ty?7LZ8sJ7Wc_w(uJwKi*IJ|8rBI3<7iLG^!4
zh(r#K)-`49@wGKsR>=wfbGUtCaCDuo+x^?B{{HO0y{{I1HtSXrp1%D2&lIb-E%B-Q
zMT-;PH*eO}{|V`=uF0J3_oBi_>gwkUTYmg`YXQmhu-tk+tL(!;>vi6s^7HG<o4R|y
zeBZtKwpxs#TIq`mc4v3I{*`lm(~aJo8xI)2&Wm0P%90bBLFE~^kU6h=<Y4CZn7LJ@
z?`MVWH!;3jbG7y5oQ2G<WrQ#9tNp#^iiQz1d%|PQlyBXP-xsRckJ}g5?|OeN`_hA8
z|8;xwe}3lILRY)<rEJgLV{CKoXz%^<;%=m4XRzk;bhU1y;-u;4GEW_`Pgi|*%iA3^
zMsaIv547Z9henh6-ez$3b@Oe{mo0jxFaK=z_vij^xb)k;^yT-b{q6I4KFjZBURw9O
z8LLhdKnhAw4G%6y!G%4dw9`ERE_FfPUZBLx;r|fSsRHp7*e!Roip~s=uQdhD%C9fJ
zo!Blr?b+Gc>3!e=7OqWU)#ej2MJE)$cAYuU$h`TsA~^dh!0cIZN@?@gYthS9y{BE-
zsk<(J_uFmHIv`mS6w)AL7Fl(l5#RTrwd8j0_NzPJuG{_2EB@D|>1PB{9qdvnHd8up
zhhpybyJat*+yAe8b!n;hucP{P6Yaij>|Yw4xAUlsAgB%l`3}^BJmDCUK9O@?^}C%W
zSyxt=BphfkDSLC{%O!99nbP3G99)HfRCc`5Gdg%XfB#(Y9*h60!`GXcgR2IZ6N?0{
zMyJG8zuj8$WTN{r(2|Cgp{u{&b9#Mk?PcYDn<Ok@^5@g^{b%}mL1Wsv6D7^_Vs4e+
zuRV*!Pj9z_S{TQq-|ubwzyHLMuesZ99>e01%*)GscYQo2{aR$Eh3x0J>bI#_EWK0r
z`|Zt_)A{ZHd{}d0OYUtm>%X7YZoij>MJWqtqS<f%9njvIq9-Rz%HPElJA-O*L<$Jp
z>-y}h`TaG2|Ie{3z69E+5?A*#6+>06Yud#{t}l<<|0}$ewR-KQ*N>0)`-7(5FWdco
zGr8<WVtdK^z29dkqldYcQK62{pSm-LTDdpBp8xkv`sEph$(#PiSHInw`Ty^Cb9eM8
z2u(>^?H!ktG=0<e{r~@NU7pw{fAY@>cRS4BUwtBH@7HUYH#eo;{D0p5{~s&+hM%ui
zuTM+YYeq|ZtGdoOvdc~podMdgVy&0<;=;nM-+AS1EdD-Gw>QM%gtW7>rrLd;c|L{x
z_wkPgKd;TaVYK;ipY^iia@A)rqAh%4<(G@@muHz~U(#N`C+PXN(|WsgbR#z%xdU!(
zA*Iy#i9gTg*G>L*r}%uyl|c8U+UxghidzV7R7?h!N#LCFs>^3X+1p#7b#ZTI%y?h>
zzWQbK{a<0P?(Qz%^?Y9SrQ7-Yb1~w2{RxrHB`+_PfR?s^mJzM5d8&<(2dlb${`C0>
z96tuyXcJrW@#xKu#^`QLohWIN5%6n!{qOB>{<q6jWo)ayS@-+xbgPmV34NHQ!kbN}
z^<LggpYID!X-8$2$M(q|0j1E@NsGR`xVZRS{vDR!Wj>zOZ#KGrTRg9DQ&Z^bu&Jf<
zm6UB-)nVm_Oh`(U$Hbp6-0f%GtNDBul!LdtUbj1KyKdYbi?<sN^UW?v!aa+$>3ijm
zhwaP%{rzoTiD_=YM9W_<7GFNdE<eZa>q`Hno6lKk@7CpV$T@ps3Mli)79J72xxdvl
z?a~s@oBd_a&djs5zP;+omD;YJ4<hs6WkbNkOh?zWudlAYe6@Q0IWLEEH`C|O%@w>k
z1^b9XDP%-p&Bfm$k`Fa)K=BGr2Md%gH+A`J4US29^n_)uRq3kFGc2~HUtHwcdvEy(
zl#*F=rdUYIqa8CIozC0+cG|vQS3wE8JJ={Zc%r0TO@-C78Og75OCvWXxi0>0dgRZ~
z&zH~4G>-nVRJAp|X5KWBdAfIP;YHt$m->eHawoc`-Pn-$vVob;1GHlDlJR+)$!h-d
z=J45kIB;cq{{1y4judKLg>>Kd-k%e>Ic@2+wbAC4^L34`F-3A8afF$d3knN0zE9`-
z-A_jDC`kNuCf#oG+_O2m-)>vIN=f+m?l(Ig_ho|iTBWOX2b-8<iS2-iH!Gjdy<GeK
zZus+UJLdiOc;;^RY2qp^qk~&AF9*#w%e}PicHZn<!>-GE(~B`9)nnq$H=9mxy5IBj
zu>8LXY<w~^`mEpWc=O9uJa)?6|GoJ&hv%Tz65$g$pPiW*Tzzc4qI27l<@2gmecN|j
zzWxq|!uW|Z&GX}`UMy_C#LRCK@SF*)h6)v%InTCw8E9^__N8~cSM|G{>2tZ!im}kh
zlt*h)3TGvu<ruTsN}E9o?v6v6fQVvv<4uv7f83QH=cL}<UB12;y-d*RK2yH$^V}`}
z|NUNWSNm&8-QQnVcY-=eNOgJOR9CgS51+nXk2n9?3hLLv6R*H*|B060ZX{pc`FvjX
zD{fHV37#Sq%;qZ<KWJoMwszYst=dPz^F2Vz-F`h1-yczp#T7l0#!`FGEe>@}`}E|b
zZydP!2dZmNfYZu~RZ5#*uiI@_iEhrSE*{iu0qHNH<P)@Z%8C;X69wlTLKKr)M$q0X
z!k1tHg!Q2DedN3c>i45X2e_Mthy>7p{Gs3qa889ZZlFD8gop;H3w@bs4!pS5GHO6B
zC*U1UG-sg{C7@0xiaXH)E+7R>05k}MR9u1@daiC2T>h{eX$czdMk~o(*#wZo3pD84
zvit+WTF}rpYSM;u!x1Ti9o$4lSORNuBSauQ@2G}iXloLr0^L@$$cJ>I#U8-JCtxD=
zgCk=Bw2y^|TP%J-FX02xgB=_Xh*}C1>?mb4tWSWj1>A{8gwMc*2ez0+N<onE7?h~i
zFhWWCu5OUg4&)rCh7nVsQ4P2$3zQb47i)A34^|7L<^+nn6xc;)o}D3QTeW1NbNjWO
zxu>RRX7<~D^N7yhTWa-c#o|k=LRX`fM<Ac%RE4dJnYpX%?X0M+S)t2M=)~`{dAsGZ
zU*@3}&diUGj=ubGnBV{J@9*I$=-F!X%S^n>Q3A2(h}@9i_$}Yp*LSXxang~FE9+u+
z-+H|<Ix=T#<hGod=&dD@nXeY?IKd-jqEY(l%F1)e+iqp8-jZ=~5nFunzdt`SpPiYh
z`|HxAL&kRg-dyLWy_187%$+ab1t6huDQ*1n^ZG2G&8Uh_30o79*xj?$;TlsRJRe7=
zh|GMYV<fEZ=L25C`xV+8$p3iR6TRRGjY)YFF+;Zej$yC7{l0H&QVYA%FpH<q=#-|+
z%gdIU<=$G7yZx@2_2$SqlOkzVsAn>s1J^;IVg^grNA0GNU5ugBM;P@0YML6Y<3K}x
zkZKh?4?bGQA=Sp<ss^Qc<^c__!!s_Z?m^8{89&ydm$q0EI?d`h?gFI72WXlJ(ME#J
zGay8UPOE9gB=q(RMxBS2Zox4}Mk9gr07Pz?Vz%zEwzE(Qs0j0_FE1vx@yTX6p)_Me
zC0&>OiriV0`s&u!?5wM+rlM9iPZlU`uDq}<_jbt<LHCsZTBz*=J}sl!tlVNcC;qKn
zxe~2rlL<*N(?6wa^!fRDf8+FXITxo-6Pfq1GAgKITgi9OoH00sJXp5>NJaR>%tp}a
z3x)>p3M>W&P#1xL0lcsabi55{IS1<Uc87_KW=5;S*ZaNS_j?^DBOeoJ^?hmO->=t$
zSBI^A6=V17V*j$keAYU;QCl+R#+%QRUSIn2>GbP6Yv0`1_%hx8cjV;r@w>}%e|?(1
ze-5ask9~h>srTIPKfbQ7-}`MNY|!FX)$6sF1Knk(hOLbXegE~^n#kby#mAoBzW=Z6
zZS`6G_tp1*Ujl7YIcNL*PSyI$i{1OH)F;m{%?|tZ%zXb$yH6AQQ{KP6=q|4+#y<P*
z!HWB9qqqCr&fA>}>cD5+4=?!ja(Vjq+|6grUb5T&a0DIP&=K0?%xAgeiA~4pCRT2<
zANSwi+M50AL%aQ)KD%EpR?Sq~G_iU1?&eK_6B`{<<}yCMv$OazBfHFmTif&Z-)lax
zzV7Slm%ZlqR#XVgWRI5fu>bRr|MJ}Ody(%8PfyqPw|=`Nc$Pt;Q`DY{LaRR?4hR4L
z_gA`R@ALWf`wo7Wx~wFB!0yut<;-^3GLQ1RrQv1Kd-@|7jy&cPzh;=w8?g7qqV7wx
z^Y_hceSNR`y{`_R+3Mzxug}c?c_w{RW5S=_W<D#8Xa{!CHYQLPa)$1L>AKO=DtG<=
z_gi>1r`_L|{>x8JR?ok(Uy}9j!*=;|&+@nODy1s3o4d28E>>c0K2g<tBH_x4z|8%B
zzpdUY*SPU|T=i1W;3;^Y9_SRGvg$WgpLgH?tGBy$``t2c=Qf^|!gk>?g|1c)TZE-#
zT+F`g+NidyK=<l$yRR!JWv|Avx`Z|LTz}n{#U@EdI^O)xy}ixXU8d0GTmHjCzotd!
zd9usb7)-R-aZpxgp4rDZZ^8E;o8|v3h<&6b<6`&W05f<j&P@ODK2hzkCC2A%jNk72
z{qCi+{I7s*M{{3YS$Q)#{mhJs`#$#Or+j{pEdM(qY+CX@^JmF>(%xO}HNQ6_Z{N>n
zUzY0Et}J`~cu(bLuYLX1@AsO^Ry=5Y$zT7&dD~HauDYC;H`3?VUMjntJH6`Lo105c
zFSVPU^CZE$Fu+X9$e?45tjjTRn`?1LW^KMPRjmTF#b-hHgKpE?g}*1d%gwx7{`G2j
zdiJX6da<+S)qcDA<)nX|(b<{C%gt&Yecku}@B8a)IS&;!ZM?1<BQ5vd@Q`|wwwY}G
z_ucn>|NnVzzxlZ9gss`v*PV+xP!YGLHuAdT6VRafTP>0IZ~y&%zn=HNf*<8w=PVxg
z<b*GB=C?KB{sA7M-1&T-GB}CP5s#}-)Q;S^Y`2K{odRdG0}fA{Pt0Ji|NA=rGH4X|
zrCjrY8&(q0cayHKlsNOW`9wty&#XhU{pMPwG9Nrv!*giviAl@n*IE5Nz5kE*wqq90
zDv$TcZhoWj)$UP#?$)beFZX;tcY0gS5viv)LZ59sE_a%3^2Xa?JPy*uJAas_DzisV
zf@Y1qkA;&GroHbgKFBJbvitNCmj%Zb=rN@}I?{P_)+-5_XW7MEht+@lzW;w-?mJyM
z7qf4>TANQqNQ-38d32xS*}J>Dm)m~7V=TRZPs&7t`-78GLHG4{=3DsRflot{zXKYc
zez|%6-<*?MZI5!lzPGn}Ie*Oq=9_u0AMfodeVumMl-+zr^Yxfw-&=*pWjE{E{r_>?
z{*~|PhzZ9Tl<&V~pUSIb|FP45O3j<q>-Wt9HOFsl$qcsc0BzTctFGr+k#x4~!i4_T
z<`WfbC7wJ#a_eQpV>#x7kG>1mul`Uq->&vj;c?mUYTJMBocV39tT^3UYj*R*!D$Z`
z<b<Ro<W^KYpId%u{=YBFb3T`@IjYICHb_x8yQJyH7unxV#~-}4|Nq&3v+v2)<`WBw
zpPzdPIvU5gBj-(NcfY>|-(HS85#FMSGVI?)*YoP%$%%a_HUF4KTm-*_fr53zhJr)^
zcI9JD(lU1h_}He$Rb}3)d_H&c%c;WIyZ3)+)lbPz-k8^X!eXbh-<x^D=Z-wk_;$R%
z`Rd|5&zN`L_`a`TL3qX9DIkAM_On`fuQ23e-7^3Aaqnx7WFKGWYWerq*VA1ketVWz
z_$BL!?_UN=_a&fof8!AQM9bxK%c8_OHgZ~mwrkEcHRlzaJ@YY->Bk?B98><xz0v-C
z&E|8fWI=J+%d3>!D3P5SSNU{m&e`BM>t_qk-kH4i^s%Uj;|q2d1ed<?OG!L?koRT6
zzGcR8^6cB$Ha;_AliGNj?OLtCOn#x}i;LaU@4u}nUtuA(>W*WIraU+M#KUd8s~^Xn
zIj?c-q1(sb8g*~m*e3SwjQKNb)5K=oi;g!}oH(GcsZ2K@XWzMg|BkKaHb$S#-F|nP
zZcEbt%MvS&K28ewae4l~EoHnFZ>$PF9u>bX?eZ+!E2+5L4ixsy5igEDd+_n`@#W|1
z|9!s6U|43_WgPi5`&ZIKiR{&n41Y>4PO^X0yLCg~e#!X{`|JNCSG``l{Zgm;ycx3Y
zjC^Lxn>SYb<t*)T^qjKWVuzVM^OZ@e-kVI<|Np)JzxT2<8JA--`ybs{_2JP;_4#M6
zwY4+*@+wJx+E^g)^GdLP>Tdsgg~w%Q`WMzX*}aIr?wFFSm*1tNEN!{NP-aSR7Y~n;
z_dLU_TU#=3N<F{v-gL*;Ytfq<Q)RYDW|u#j((brsZC~@xM?9*6U4>zjIo1X>pRkyx
za4sUK;@FC|IP;F{y{0Z3_rC_s9-ci~7yxqpW0U%Sf2`h{#wjydotr0|onm(FkX488
z$B*|uf4;K+%O&q4j}kUpOVrzYMbsRd|L2MN%>{}UKc7tYes@l=*?rA#1*2!4G7+-(
zUrR{DmftPCxu^eNP;&E$wi+E77q<GJr{lBgt6$jvdu*SzHv-u^8^PYe*{FbT;{)##
z12sfIO2KUt&<05O_B!wmHU`ihCkF6tWCq+yA&nPgr4Y^F%}lUX4QTx?19(pzXaPFN
gPKX1rDAjHG&u{a#R>u1M#F-$4p00i_>zopr0KR#-)c^nh

diff --git a/tensorflow/compiler/xla/g3doc/index.md b/tensorflow/compiler/xla/g3doc/index.md
index 6d2bd3dba72..7bbcd4309c7 100644
--- a/tensorflow/compiler/xla/g3doc/index.md
+++ b/tensorflow/compiler/xla/g3doc/index.md
@@ -4,9 +4,10 @@ XLA (Accelerated Linear Algebra) is a domain-specific compiler for linear
 algebra that can accelerate TensorFlow models with potentially no source code
 changes.
 
-The results are improvements in speed and memory usage: most internal benchmarks
-run ~1.15x faster after XLA is enabled. The dataset below is evaluated on a
-single NVidia V100 GPU:
+The results are improvements in speed and memory usage: e.g. in BERT
+[MLPerf](https://blog.tensorflow.org/2020/07/tensorflow-2-mlperf-submissions.html)
+submission using 8 Volta V100 GPUs using XLA has achieved a ~7x performance
+improvement and ~5x batch size improvement:
 
 <div style="width:90%; margin:auto; margin-bottom:10px; margin-top:20px;">
 <img style="width:90%" src="./images/tf_xla_performance.png">

From 47defbfd184f7fb7f404c6c9ba3d7765a9134343 Mon Sep 17 00:00:00 2001
From: Geeta Chavan <geetac@google.com>
Date: Mon, 14 Dec 2020 12:21:54 -0800
Subject: [PATCH 54/60]    Merge release notes to master

PiperOrigin-RevId: 347442384
Change-Id: Ic1b19ab98a80ba23f58b8ea79d492b5abf7bc5ca
---
 RELEASE.md | 839 ++++++++++++++++++++++++++---------------------------
 1 file changed, 414 insertions(+), 425 deletions(-)

diff --git a/RELEASE.md b/RELEASE.md
index 446735f1b0a..e34dedd12a3 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -111,459 +111,448 @@ This release contains contributions from many people at Google, as well as:
 
 # Release 2.4.0
 
-<INSERT SMALL BLURB ABOUT RELEASE FOCUS AREA AND POTENTIAL TOOLCHAIN CHANGES>
+ ## Major Features and Improvements
+
+* `tf.distribute` introduces experimental support for asynchronous training of
+  models via the [`tf.distribute.experimental.ParameterServerStrategy`]
+  (https://www.tensorflow.org/api_docs/python/tf/distribute/experimental/ParameterServerStrategy)
+  API. Please see the [tutorial](https://www.tensorflow.org/tutorials/distribute/parameter_server_training)
+  to learn more.
+
+* [`MultiWorkerMirroredStrategy`](https://www.tensorflow.org/api_docs/python/tf/distribute/MultiWorkerMirroredStrategy)
+  is now a stable API and is no longer considered experimental. Some of the
+  major improvements involve handling peer failure and many bug fixes. Please
+  check out the detailed tutorial on [Multi-worker training with Keras]
+  (https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras).
+
+* Introduces experimental support for a new module named [`tf.experimental.numpy`]
+  (https://www.tensorflow.org/api_docs/python/tf/experimental/numpy) which is a
+  NumPy-compatible API for writing TF programs. See the [detailed guide]
+  (https://www.tensorflow.org/guide/tf_numpy) to learn more. Additional details below.
+
+* Adds Support for
+  [TensorFloat-32](https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format/)
+  on Ampere based GPUs. TensorFloat-32, or TF32 for short, is a math mode for
+  NVIDIA Ampere based GPUs and is enabled by default.
+
+* A major refactoring of the internals of the Keras Functional API has been
+  completed, that should improve the reliability, stability, and performance of
+  constructing Functional models.
+
+* Keras mixed precision API [`tf.keras.mixed_precision`]
+  (https://www.tensorflow.org/api_docs/python/tf/keras/mixed_precision?version=nightly)
+  is no longer experimental and allows the use of 16-bit floating point formats
+  during training, improving performance by up to 3x on GPUs and 60% on TPUs.
+  Please see below for additional details.
+
+* TensorFlow Profiler now supports profiling `MultiWorkerMirroredStrategy` and
+  tracing multiple workers using the [sampling mode API]
+  (https://www.tensorflow.org/guide/profiler#profiling_apis).
+
+* TFLite Profiler for Android is available. See the detailed [guide]
+  (https://www.tensorflow.org/lite/performance/measurement#trace_tensorflow_lite_internals_in_android)
+  to learn more.
+
+* TensorFlow pip packages are now built with CUDA11 and cuDNN 8.0.2.
 
 ## Breaking Changes
 
-* <DOCUMENT BREAKING CHANGES HERE>
-* <THIS SECTION SHOULD CONTAIN API, ABI AND BEHAVIORAL BREAKING CHANGES>
-* Certain float32 ops run in lower precsion on Ampere based GPUs, including 
-  matmuls and convolutions, due to the use of
-  [TensorFloat-32](https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format/).
+* TF Core:
+  * Certain float32 ops run in lower precsion on Ampere based GPUs, including
+  matmuls and convolutions, due to the use of [TensorFloat-32]
+  (https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format/).
   Specifically, inputs to such ops are rounded from 23 bits of precision to 10
-  bits of precision. This is unlikely to cause issues in practice for deep
-  learning models. In some cases, TensorFloat-32 is also used for complex64 ops.
-  TensorFloat-32 can be disabled by running
-  `config.experimental.enable_tensor_float_32_execution(False)`. The "Major
-  Features and Improvements" section has more details.
-* The byte layout for string tensors across the C-API has been updated to match
+  bits of precision. This is unlikely to cause issues in practice for deep learning
+  models. In some cases, TensorFloat-32 is also used for complex64 ops.
+  TensorFloat-32 can be disabled by running `tf.config.experimental.enable_tensor_float_32_execution(False)`.
+  * The byte layout for string tensors across the C-API has been updated to match
   TF Core/C++; i.e., a contiguous array of `tensorflow::tstring`/`TF_TString`s.
-* C-API functions `TF_StringDecode`, `TF_StringEncode`, and
-  `TF_StringEncodedSize` are no longer relevant and have been removed; see
-  core/platform/ctstring.h for string access/modification in C.
-* Removed `tf.distribute.Strategy.experimental_run_v2` method, which was deprecated in TF 2.2.
-* `tensorflow.python`, `tensorflow.core` and `tensorflow.compiler` modules are
-    now hidden. These modules are not part of TensorFlow public API.
-* A major refactoring of the internals of the Keras Functional API may affect code that is relying on certain internal details:
-    * Code that uses `isinstance(x, tf.Tensor)` instead of `tf.is_tensor` when checking Keras symbolic inputs/outputs should switch to using `tf.is_tensor`.
-    * Code that is overly dependent on the exact names attached to symbolic tensors (e.g. assumes there will be ":0" at the end of the inputs, treats names as unique identifiers instead of using `tensor.ref()`, etc.)
-    * Code that uses `get_concrete_function` to trace Keras symbolic inputs directly should switch to building matching `tf.TensorSpec`s directly and tracing the `TensorSpec` objects.
-    * Code that relies on the exact number and names of the op layers that TensorFlow operations were converted into. These may have changed.
-    * Code that uses `tf.map_fn`/`tf.cond`/`tf.while_loop`/control flow as op layers and happens to work before TF 2.4. These will explicitly be unsupported now. Converting these ops to Functional API op layers was unreliable before TF 2.4, and prone to erroring incomprehensibly or being silently buggy.
-    * Code that directly asserts on a Keras symbolic value in cases where ops like `tf.rank` used to return a static or symbolic value depending on if the input had a fully static shape or not. Now these ops always return symbolic values.
-    * Code already susceptible to leaking tensors outside of graphs becomes slightly more likely to do so now.
-    * Code that tries directly getting gradients with respect to symbolic Keras inputs/outputs. Use GradientTape on the actual Tensors passed to the already-constructed model instead.
-    * Code that requires very tricky shape manipulation via converted op layers in order to work, where the Keras symbolic shape inference proves insufficient.
-    * Code that tries manually walking a `tf.keras.Model` layer by layer and assumes layers only ever have one positional argument. This assumption doesn't hold true before TF 2.4 either, but is more likely to cause issues know.
-    * Code that manually enters `keras.backend.get_graph()` before building a functional model. This is no longer needed.
-* Start enforcing input shape assumptions when calling Functional API Keras
+  * C-API functions `TF_StringDecode`, `TF_StringEncode`, and `TF_StringEncodedSize`
+  are no longer relevant and have been removed; see `core/platform/ctstring.h` for
+  string access/modification in C.
+  * `tensorflow.python`, `tensorflow.core` and `tensorflow.compiler` modules are
+  now hidden. These modules are not part of TensorFlow public API.
+  * `tf.raw_ops.Max` and `tf.raw_ops.Min` no longer accept inputs of type
+  `tf.complex64` or `tf.complex128`, because the behavior of these ops is not
+  well defined for complex types.
+  * XLA:CPU and XLA:GPU devices are no longer registered by default. Use
+  `TF_XLA_FLAGS=--tf_xla_enable_xla_devices` if you really need them, but this
+  flag will eventually be removed in subsequent releases.
+
+* `tf.keras`:
+  * The `steps_per_execution` argument in `model.compile()` is no longer experimental;
+  if you were passing `experimental_steps_per_execution`, rename it to
+  `steps_per_execution` in your code. This argument controls the number of batches
+  to run during each `tf.function` call when calling `model.fit()`. Running multiple
+  batches inside a single `tf.function` call can greatly improve performance on
+  TPUs or small models with a large Python overhead.
+  * A **major refactoring** of the internals of the Keras Functional API may affect code that
+  is relying on certain internal details:
+    * Code that uses `isinstance(x, tf.Tensor)` instead of `tf.is_tensor` when
+  checking Keras symbolic inputs/outputs should switch to using `tf.is_tensor`.
+    * Code that is overly dependent on the exact names attached to symbolic tensors
+  (e.g. assumes there will be ":0" at the end of the inputs, treats names as
+  unique identifiers instead of using `tensor.ref()`, etc.) may break.
+    * Code that uses full path for `get_concrete_function` to trace Keras symbolic
+  inputs directly should switch to building matching `tf.TensorSpec`s directly and
+  tracing the `TensorSpec` objects.
+    * Code that relies on the exact number and names of the op layers that TensorFlow
+  operations  were converted into may have changed.
+    * Code that uses `tf.map_fn`/`tf.cond`/`tf.while_loop`/control flow as op layers
+  and  happens to work before TF 2.4. These will explicitly be unsupported now.
+  Converting these ops to Functional API op layers was unreliable before TF 2.4,
+  and prone to erroring incomprehensibly  or being silently buggy.
+    * Code that directly asserts on a Keras symbolic value in cases where ops
+  like `tf.rank` used to  return a static or symbolic value depending on if the
+  input had a fully static shape or not. Now these ops always return symbolic values.
+    * Code already susceptible to leaking tensors outside of graphs becomes slightly
+  more likely to do so now.
+    * Code that tries directly getting gradients with respect to symbolic Keras
+  inputs/outputs. Use `GradientTape` on the actual Tensors passed to the already-constructed
+  model instead.
+    * Code that requires very tricky shape manipulation via converted op layers
+  in order to work, where the Keras symbolic shape inference proves insufficient.
+    * Code that tries manually walking a `tf.keras.Model` layer by layer and assumes
+  layers only ever have one positional argument. This assumption doesn't hold
+  true before TF 2.4 either, but is more likely to cause issues now.
+    * Code that manually enters `keras.backend.get_graph()` before building a
+  functional model is no longer needed.
+    * Start enforcing input shape assumptions when calling Functional API Keras
   models. This may potentially break some users, in case there is a mismatch
   between the shape used when creating `Input` objects in a Functional model,
   and the shape of the data passed to that model. You can fix this mismatch by
-  either calling the model with correctly-shaped data, or by relaxing `Input`
-  shape assumptions (note that you can pass shapes with `None` entries for axes
-  that are meant to be dynamic). You can also disable the input checking
-  entirely by setting `model.input_spec = None`.
-* TF pip packages now use CUDA11 and cuDNN 8.0.2.
-* XLA:CPU and XLA:GPU devices are no longer registered by default. Use
-  `TF_XLA_FLAGS=--tf_xla_enable_xla_devices` if you really need them (to be
-  removed).
-* `tf.raw_ops.Max` and `tf.raw_ops.Min` no longer accept inputs of type
-  `tf.complex64` or `tf.complex128`, because the behavior of these ops is not
-  well defined for complex types.
-* `tf.data.experimental.service.DispatchServer` now takes a config tuple
+  either calling the model with correctly-shaped data, or by relaxing `Input` shape
+  assumptions (note that you can pass shapes with `None` entries for axes that
+  are meant to be dynamic). You can also disable the input checking entirely by
+  setting `model.input_spec = None`.
+  * Several changes have been made to `tf.keras.mixed_precision.experimental`.
+  Note that it is now recommended to use the non-experimental
+  `tf.keras.mixed_precision` API.
+   * `AutoCastVariable.dtype` now refers to the actual variable dtype, not the
+  dtype it will be casted to.
+   * When mixed precision is enabled, `tf.keras.layers.Embedding` now outputs a
+  float16 or bfloat16 tensor instead of a float32 tensor.
+   * The property `tf.keras.mixed_precision.experimental.LossScaleOptimizer.loss_scale`
+  is now a tensor, not a `LossScale` object. This means to get a loss scale
+  of a `LossScaleOptimizer` as a tensor, you must now call `opt.loss_scale`instead of `opt.loss_scale()`.
+   * The property `should_cast_variables` has been removed from `tf.keras.mixed_precision.experimental.Policy`
+   * When passing a `tf.mixed_precision.experimental.DynamicLossScale` to `tf.keras.mixed_precision.experimental.LossScaleOptimizer`,
+  the `DynamicLossScale`'s multiplier must be 2.
+   * When passing a `tf.mixed_precision.experimental.DynamicLossScale` to
+  `tf.keras.mixed_precision.experimental.LossScaleOptimizer`, the weights of
+  the `DynanmicLossScale` are copied into the `LossScaleOptimizer` instead of being reused.
+  This means modifying the weights of the `DynamicLossScale` will no longer affect the weights of the LossScaleOptimizer, and vice versa.
+   * The global policy can no longer be set to a non-floating point policy in `tf.keras.mixed_precision.experimental.set_policy`
+   * In `Layer.call`, `AutoCastVariable`s will no longer be casted within
+  `MirroredStrategy.run` or `ReplicaContext.merge_call`. This is because a thread local
+  variable is used to determine whether `AutoCastVariable`s are casted, and those
+  two functions run with a different thread. Note this only applies if one of
+  these two functions is called within `Layer.call`; if one of those two functions calls `Layer.call`, `AutoCastVariable`s will still be casted.
+
+* `tf.data`:
+  * `tf.data.experimental.service.DispatchServer` now takes a config tuple
   instead of individual arguments. Usages should be updated to
   `tf.data.experimental.service.DispatchServer(dispatcher_config)`.
-* `tf.data.experimental.service.WorkerServer` now takes a config tuple
-  instead of individual arguments. Usages should be updated to
-  `tf.data.experimental.service.WorkerServer(worker_config)`.
-* `tf.quantization.quantize_and_dequantize_v2` has been introduced, which
-  updates the gradient definition for quantization which is outside the range
-  to be 0. To simulate the V1 the behavior of
-  tf.quantization.quantize_and_dequantize(...) use
-  tf.grad_pass_through(tf.quantization.quantize_and_dequantize_v2)(...).
-* `tf.distribute.Strategy.experimental_make_numpy_dataset` is removed. Please
-  use `tf.data.Dataset.from_tensor_slices` instead.
-* `experimental_hints` in `tf.distribute.StrategyExtended.reduce_to`,
-  `tf.distribute.StrategyExtended.batch_reduce_to`,
-  `tf.distribute.ReplicaContext.all_reduce` are renamed to `options`.
-  `tf.distribute.experimental.CollectiveHints` is renamed
-  `tf.distribute.experimental.CommunicationOptions`.
-  `tf.distribute.experimental.CollectiveCommunication` is renamed
-  `tf.distribute.experimental.CommunicationImplementation`.
-* `tf.keras.mixed_precision.experimental`:
-  * `AutoCastVariable.dtype` now refers to the actual variable dtype, not the
-    dtype it will be casted to.
-  * When mixed precision is enabled, `tf.keras.layers.Embedding` now outputs a
-    float16 or bfloat16 tensor instead of a float32 tensor.
-  * The property
-    `tf.keras.mixed_precision.experimental.LossScaleOptimizer.loss_scale` is now
-    a tensor, not a `LossScale` object. This means to get a loss scale of a
-    `LossScaleOptimizer` as a tensor, you must now call `opt.loss_scale` instead
-    of `opt.loss_scale()`.
-  * The property `should_cast_variables` has been removed from
-    `tf.keras.mixed_precision.experimental.Policy`
-  * When passing a `tf.mixed_precision.experimental.DynamicLossScale` to
-    `tf.keras.mixed_precision.experimental.LossScaleOptimizer`, the
-    `DynamicLossScale`'s multiplier must be 2.
-  * When passing a `tf.mixed_precision.experimental.DynamicLossScale` to
-    `tf.keras.mixed_precision.experimental.LossScaleOptimizer`, the weights of
-    the `DynanmicLossScale` are copied into the `LossScaleOptimizer` instead of
-    being reused. This means modifying the weights of the `DynamicLossScale`
-    will no longer affect the weights of the LossScaleOptimizer, and vice versa.
-  * The global policy can no longer be set to a non-floating point policy in
-    `tf.keras.mixed_precision.experimental.set_policy`
-  * In `Layer.call`, `AutoCastVariable`s will no longer be casted within
-    `MirroredStrategy.run` or `ReplicaContext.merge_call`. This is because a
-    thread local variable is used to determine whether `AutoCastVariable`s are
-    casted, and those two functions run with a different thread. Note this only
-    applies if one of these two functions is called within `Layer.call`; if one
-    of those two functions calls `Layer.call`, `AutoCastVariable`s will still be
-    casted.
-
-## Known Caveats
-
-* <CAVEATS REGARDING THE RELEASE (BUT NOT BREAKING CHANGES). E.G. ADDING A NEW DEPENDENCY, BUMPING A DEPENDENCY NUMBER, LACK OF SUPPORT ON SOME PLATFORM, ETC>
-
-## Major Features and Improvements
-
-* <INSERT MAJOR FEATURE HERE, USING MARKDOWN SYNTAX>
-* <IF RELEASE CONTAINS MULTIPLE FEATURES FROM SAME AREA, GROUP THEM TOGETHER>
-* A new module named `tf.experimental.numpy` is added, which is a NumPy-compatible API for writing TF programs. This module provides class `ndarray`, which mimics the `ndarray` class in NumPy, and wraps an immutable `tf.Tensor` under the hood. A subset of NumPy functions (e.g. `numpy.add`) are provided. Their inter-operation with TF facilities is seamless in most cases. See [tensorflow/python/ops/numpy_ops/README.md](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/numpy_ops/README.md) for details of what operations are supported and what are the differences from NumPy.
-* A major refactoring of the internals of the Keras Functional API has been completed, that should improve the reliability, stability, and performance of constructing Functional models.
-* Support for
-  [TensorFloat-32](https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format/)
-  on Ampere based GPUs has been added. TensorFloat-32, or TF32 for short, is a
-  math mode for NVIDIA Ampere GPUs which causes certain float32 ops, such as
-  matrix multiplications and convolutions, to run much faster on Ampere GPUs but
-  with reduced precision. This reduced precision has not been found to effect
-  convergence quality of deep learning models in practice. TensorFloat-32 is
-  enabled by default, but can be disabled with
-  `tf.config.experimental.enable_tensor_float_32_execution`.
+  * `tf.data.experimental.service.WorkerServer` now takes a config tuple instead
+  of individual arguments. Usages should be updated to  `tf.data.experimental.service.WorkerServer(worker_config)`.
 
 * `tf.distribute`:
-  * `MultiWorkerMirroredStrategy` is graduated out of experimental.
-    * Peer failure will no longer cause the cluster to hang.
-    * Major issues with saving are fixed.
-    * See [Multi-worker training with Keras](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras) for a tutorial.
-  * Deprecated `experimental_distribute_datasets_from_function` method and renamed it to `distribute_datasets_from_function` as it is no longer experimental.
-* The `tf.keras.mixed_precision` API has been made non-experimental. The major
-  changes to the new non-experimental API are:
-  * `tf.keras.mixed_precision.Policy` no longer takes in a
-    `tf.mixed_precision.experimental.LossScale` in the constructor, and no
-    longer has a `LossScale` associated with it. Instead, `Model.compile` will
-    automatically wrap the optimizer with a `LossScaleOptimizer` using dynamic
-    loss scaling if `Policy.name` is "mixed_float16".
-  * `tf.keras.mixed_precision.LossScaleOptimizer`'s constructor takes in
-    different arguments. In particular, it no longer takes in a `LossScale`, and
-    there is no longer a `LossScale` associated with the `LossScaleOptimizer`.
-    Instead, `LossScaleOptimizer` directly implements fixed or dynamic loss
-    scaling. See the documentation of
-    `tf.keras.mixed_precision.experimental.LossScaleOptimizer` for details on
-    the differences between the experimental `LossScaleOptimizer` and the new
-    non-experimental `LossScaleOptimizer`.
-  * `tf.mixed_precision.experimental.LossScale` and its subclasses are
-    deprecated, as all of its functionality now exists within
-    `tf.keras.mixed_precision.LossScaleOptimizer`
+  * Removes `tf.distribute.Strategy.experimental_make_numpy_dataset`. Please use
+  `tf.data.Dataset.from_tensor_slices` instead.
+  * Renames `experimental_hints` in `tf.distribute.StrategyExtended.reduce_to`,
+  `tf.distribute.StrategyExtended.batch_reduce_to`, `tf.distribute.ReplicaContext.all_reduce`
+  to `options`.
+  * Renames `tf.distribute.experimental.CollectiveHints` to `tf.distribute.experimental.CommunicationOptions`.
+  * Renames `tf.distribute.experimental.CollectiveCommunication` to `tf.distribute.experimental.CommunicationImplementation`.
+  * Renames `tf.distribute.Strategy.experimental_distribute_datasets_from_function` to `distribute_datasets_from_function` as it is no longer experimental.
+  * Removes `tf.distribute.Strategy.experimental_run_v2` method, which was deprecated in TF 2.2.
+
+* `tf.lite`:
+  * `tf.quantization.quantize_and_dequantize_v2` has been introduced, which updates the gradient definition for quantization which is outside the range
+     to be 0. To simulate the V1 the behavior of `tf.quantization.quantize_and_dequantize(...)` use
+  `tf.grad_pass_through(tf.quantization.quantize_and_dequantize_v2)(...)`.
+
+* Building TensorFlow:
+  * Windows platform builds: TensorFlow on Windows under MSVC is now built with
+  `--copt=/experimental:preprocessor --host_copt=/experimental:preprocessor`
+  (see `.bazelrc` for more details). Builds including TensorFlow may fail with
+  unexpected syntax errors if these flags are absent. See also
+  [this thread on SIG Build](https://groups.google.com/a/tensorflow.org/g/build/c/LbAw8RILvTg/m/ttnuhYU2BgAJ).
+
+## Known Caveats
+  * `tf.keras.mixed_precision`
+    * When using mixed precision, calling `RMSprop.apply_gradients` or
+  `Nadam.apply_gradients` outside a `tf.function` does not work and will raise
+  the AttributeError "Tensor.op is meaningless when eager execution is enabled".
+  See this [issue](https://github.com/tensorflow/tensorflow/issues/45536) for details and a workaround.
 
 ## Bug Fixes and Other Changes
 
-*   <SIMILAR TO ABOVE SECTION, BUT FOR OTHER IMPORTANT CHANGES / BUG FIXES>
-*   <IF A CHANGE CLOSES A GITHUB ISSUE, IT SHOULD BE DOCUMENTED HERE>
-*   <NOTES SHOULD BE GROUPED PER AREA>
-*   Security:
-    *   Fixes an undefined behavior causing a segfault in `tf.raw_ops.Switch`
-        ([CVE-2020-15190](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15190))
-    *   Fixes three vulnerabilities in conversion to DLPack format
-        ([CVE-2020-15191](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15191),
-        [CVE-2020-15192](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15192),
-        [CVE-2020-15193](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15193))
-    *   Fixes two vulnerabilities in `SparseFillEmptyRowsGrad`
-        ([CVE-2020-15194](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15194),
-        [CVE-2020-15195](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15195))
-    *   Fixes several vulnerabilities in `RaggedCountSparseOutput` and
-        `SparseCountSparseOutput` operations
-        ([CVE-2020-15196](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15196),
-        [CVE-2020-15197](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15197),
-        [CVE-2020-15198](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15198),
-        [CVE-2020-15199](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15199),
-        [CVE-2020-15200](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15200),
-        [CVE-2020-15201](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15201))
-    *   Fixes an integer truncation vulnerability in code using the work sharder
-        API
-        ([CVE-2020-15202](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15202))
-    *   Fixes a format string vulnerability in `tf.strings.as_string`
-        ([CVE-2020-15203](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15203))
-    *   Fixes segfault raised by calling session-only ops in eager mode
-        ([CVE-2020-15204](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15204))
-    *   Fixes data leak and potential ASLR violation from
-        `tf.raw_ops.StringNGrams`
-        ([CVE-2020-15205](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15205))
-    *   Fixes segfaults caused by incomplete `SavedModel` validation
-        ([CVE-2020-15206](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15206))
-    *   Fixes a data corruption due to a bug in negative indexing support in
-        TFLite
-        ([CVE-2020-15207](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15207))
-    *   Fixes a data corruption due to dimension mismatch in TFLite
-        ([CVE-2020-15208](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15208))
-    *   Fixes several vulnerabilities in TFLite saved model format
-        ([CVE-2020-15209](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15209),
-        [CVE-2020-15210](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15210),
-        [CVE-2020-15211](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15211))
-    *   Fixes several vulnerabilities in TFLite implementation of segment sum
-        ([CVE-2020-15212](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15212),
-        [CVE-2020-15213](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15213),
-        [CVE-2020-15214](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15214))
-    *   Fixes a segfault in `tf.quantization.quantize_and_dequantize`
-        ([CVE-2020-15265](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15265))
-    *   Fixes an undefined behavior float cast causing a crash
-        ([CVE-2020-15266](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15266))
-*   TF Core:
-    *   `tf.types.experimental.TensorLike` is a new `Union` type that can be
-        used as type annotation for variables representing a Tensor or a value
-        that can be converted to Tensor by `tf.convert_to_tensor`.
-    *   Calling ops with a python constants or numpy values is now consistent
-        with tf.convert_to_tensor behavior. This avoids operations like
-        tf.reshape truncating inputs such as from int64 to int32.
-    *   Added `tf.sparse.map_values` to apply a function to the `.value`s of
-        `SparseTensor` arguments.
-    *   The Python bitwise operators for `Tensor` (`__and__`, `__or__`,
-        `__xor__` and `__invert__` now support non-`bool` arguments and apply
-        the corresponding bitwise ops. `bool` arguments continue to be supported
-        and dispatch to logical ops. This brings them more in line with Python
-        and NumPy behavior.
-    *   Added `tf.SparseTensor.with_values`. This returns a new SparseTensor
-        with the same sparsity pattern, but with new provided values. It is
-        similar to the `with_values` function of `RaggedTensor`.
-    *   Added `StatelessCase` op, and uses it if none of case branches has
-        stateful ops.
-    *   Added `tf.config.experimental.get_memory_usage` to return total memory
-        usage of the device.
-    *   Added gradients for `RaggedTensorToVariant` and `RaggedTensorFromVariant`.
-    *   Improve shape inference of nested function calls by supporting constant folding across Arg nodes which makes more static values available to shape inference functions.
-*   `tf.data`:
-    *   tf.data service:
-    *   Added new `tf.data.experimental.service.register_dataset` and
-        `tf.data.experimental.service.from_dataset_id` APIs to enable one
-        process to register a dataset with the tf.data service, and another
-        process to consume data from the dataset.
-    *   Added support for dispatcher fault tolerance. To enable fault tolerance,
-        configure a `work_dir` when running your dispatcher server and set
-        `dispatcher_fault_tolerance=True`. The dispatcher will store its state
-        to `work_dir`, so that on restart it can continue from its previous
-        state after restart.
-    *   Added support for sharing dataset graphs via shared filesystem instead
-        of over RPC. This reduces load on the dispatcher, improving performance
-        of distributing datasets. For this to work, the dispatcher's `work_dir`
-        must be accessible from workers. If the worker fails to read from the
-        `work_dir`, it falls back to using RPC for dataset graph transfer.
-    *   Added support for a new "distributed_epoch" processing mode. This
-        processing mode distributes a dataset across all tf.data workers,
-        instead of having each worker process the full dataset. See
-        [the tf.data service docs](https://www.tensorflow.org/api_docs/python/tf/data/experimental/service#understand_processing_mode)
-        to learn more.
-    *   Added optional `exclude_cols` parameter to CsvDataset. This parameter is
-        the complement of `select_cols`; at most one of these should be
-        specified.
-    *   We have implemented an optimization which reorders data-discarding
-        transformations such as `take` and `shard` to happen earlier in the
-        dataset when it is safe to do so. The optimization can be disabled via
-        the `experimental_optimization.reorder_data_discarding_ops` dataset
-        option.
-    *   `tf.data.Options` were previously immutable and can now be overridden.
-    *   `tf.data.Dataset.from_generator` now supports Ragged and Sparse tensors
-        with a new `output_signature` argument, which allows `from_generator` to
-        produce any type describable by a `tf.TypeSpec`.
-    *   `tf.data.experimental.AUTOTUNE` is now available in the core API as
-        `tf.data.AUTOTUNE`.
-*   `tf.image`:
-    *   Added deterministic `tf.image.stateless_random_*` functions for each
-        `tf.image.random_*` function. Added a new op
-        `stateless_sample_distorted_bounding_box` which is a deterministic
-        version of `sample_distorted_bounding_box` op. Given the same seed,
-        these stateless functions/ops produce the same results independent of
-        how many times the function is called, and independent of global seed
-        settings.
-*   `tf.distribute`:
-    *   (Experimental) Parameter server training:
-        *   Replaced the existing
-            `tf.distribute.experimental.ParameterServerStrategy` symbol with
-            a new class that is for parameter server training in TF2. Usage with
-            the old symbol, usually with Estimator, should be replaced with
-            `tf.compat.v1.distribute.experimental.ParameterServerStrategy`.
-        *   Added `tf.distribute.experimental.coordinator.*` namespace,
-            including the main API `ClusterCoordinator` for coordinating the
-            training cluster, the related data structure `RemoteValue`
-            and `PerWorkerValue`.
-*   `tf.keras`:
-    *   Improvements from the functional API refactoring:
-        *   Functional model construction does not need to maintain a global
-            workspace graph, removing memory leaks especially when building many
-            models or very large models.
-        *   Functional model construction should be ~8-10% faster on average.
-        *   Functional models can now contain non-symbolic values in their call
-            inputs inside of the first positional argument.
-        *   Several classes of TF ops that were not reliably converted to Keras
-            layers during functional API construction should now work, e.g.
-            `tf.image.ssim_multiscale`
-        *   Error messages when Functional API construction goes wrong (and when
-            ops cannot be converted to Keras layers automatically) should be
-            clearer and easier to understand.
-    *   `Optimizer.minimize` can now accept a loss `Tensor` and a `GradientTape`
-        as an alternative to accepting a `callable` loss.
-    *   Added `beta` hyperparameter to FTRL optimizer classes (Keras and others)
-        to match FTRL paper
-        (https://research.google.com/pubs/archive/41159.pdf).
-    *   Added `mobilenet_v3` to keras application model.
-    *   `Optimizer.__init__` now accepts a `gradient_aggregator` to allow for
-        customization of how gradients are aggregated across devices, as well as
-        `gradients_transformers` to allow for custom gradient transformations
-        (such as gradient clipping).
-    *   The `steps_per_execution` argument in `compile()` is no longer
-        experimental; if you were passing `experimental_steps_per_execution`,
-        rename it to `steps_per_execution` in your code. This argument controls
-        the number of batches to run during each `tf.function` call when calling
-        `fit()`. Running multiple batches inside a single `tf.function` call can
-        greatly improve performance on TPUs or small models with a large Python
-        overhead.
-    *   Improvements to Keras preprocessing layers:
-        *   TextVectorization can now accept a vocabulary list or file as an
-            init arg.
-        *   TextVectorization, StringLookup, and IntegerLookup can now accept a
-            vocabulary file via the `set_vocab_from_file` method.
-        *   Normalization can now accept mean and variance values as init args.
-    *   In `Attention` and `AdditiveAttention` layers, the `call()` method now
-        accepts a `return_attention_scores` argument. When set to
-        True, the layer returns the attention scores as an additional output
-        argument.
-    *   Added `tf.metrics.log_cosh` and `tf.metrics.logcosh` API entrypoints
-        with the same implementation as their `tf.losses` equivalent.
-    *   For Keras model, the individual call of `Model.evaluate` uses no cached
-        data for evaluation, while `Model.fit` uses cached data when
-        `validation_data` arg is provided for better performance.
-    *   Added a `save_traces` argument to `model.save`/
-        `tf.keras.models.save_model` which determines whether the SavedModel
-        format stores the Keras model/layer call functions. The traced functions
-        allow Keras to revive custom models and layers without the original
-        class definition, but if this isn't required the tracing can be
-        disabled with the added option.
-*   `tf.function` / AutoGraph:
-    *   Added `experimental_follow_type_hints` argument for `tf.function`. When
-        True, the function may use type annotations to optimize the tracing
-        performance.
-    *   Added support for `iter(DistributedDataset)` in AutoGraph `for` loops.
-    *   AutoGraph now allows creating new symbols inside a TensorFlow loop, if
-        the values of these symbols at an iteration does not depend on the
-        previous iteration. These types of loops must run at least one
-        iteration, and will raise a runtime error otherwise.
-    *   Variables contained in `tf.Module`s that are set as attributes of
-        custom Keras `Layer`s and `Model`s are now tracked in
-        the properties `layer.trainable_variables` and
-        `layer.non_trainable_variables`.
+### TF Core:
+  * Introduces experimental support for a new module named [`tf.experimental.numpy`]
+  (https://www.tensorflow.org/api_docs/python/tf/experimental/numpy), which is a
+  NumPy-compatible API for writing TF programs. This module provides class
+  `ndarray`, which mimics the `ndarray` class in NumPy, and wraps an immutable
+  `tf.Tensor` under the hood. A subset of NumPy functions (e.g. `numpy.add`) are
+  provided. Their inter-operation with TF facilities is seamless in most cases.
+    See [tensorflow/python/ops/numpy_ops/README.md](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/numpy_ops/README.md)
+    for details of what operations are supported and what are the differences
+  from NumPy.
+  * `tf.types.experimental.TensorLike` is a new `Union` type that can be used as
+  type annotation for variables representing a Tensor or a value
+    that can be converted to Tensor by `tf.convert_to_tensor`.
+  * Calling ops with a python constants or numpy values is now consistent with
+  tf.convert_to_tensor behavior. This avoids operations like
+    tf.reshape truncating inputs such as from int64 to int32.
+  * Adds `tf.sparse.map_values` to apply a function to the `.value`s of
+  `SparseTensor` arguments.
+  * The Python bitwise operators for `Tensor` (`__and__`, `__or__`, `__xor__` and `__invert__` now support non-`bool`
+  arguments and apply the corresponding bitwise ops. `bool` arguments continue
+  to be supported and dispatch to logical ops. This brings them more in line with
+  Python and NumPy behavior.
+  * Adds `tf.SparseTensor.with_values`. This returns a new SparseTensor with the same sparsity pattern, but with new provided values. It is
+    similar to the `with_values` function of `RaggedTensor`.
+  * Adds `StatelessCase` op, and uses it if none of case branches has stateful ops.
+  * Adds `tf.config.experimental.get_memory_usage` to return total memory usage of the device.
+  * Adds gradients for `RaggedTensorToVariant` and `RaggedTensorFromVariant`.
+  * Improve shape inference of nested function calls by supporting constant
+  folding across Arg nodes which makes more static values available to shape
+  inference functions.
+* `tf.debugging`:
+  * `tf.debugging.assert_shapes()` now works on `SparseTensor`s (Fixes [#36268](https://github.com/tensorflow/tensorflow/issues/36268)).
+* GPU
+  * Adds Support for [TensorFloat-32](https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format/)
+  on Ampere based GPUs.TensorFloat-32, or TF32 for short, is a math mode for
+  NVIDIA Ampere based GPUs which causes certain float32 ops, such as matrix
+  multiplications and convolutions, to run much faster on Ampere GPUs but with
+  reduced precision. This reduced precision has not been found to effect
+  convergence quality of deep learning models in practice. TensorFloat-32 is
+  enabled by default, but can be disabled with `tf.config.experimental.enable_tensor_float_32_execution`.
+* `tf.math`:
+  * Adds `tf.math.erfcinv`, the inverse to `tf.math.erfc`.
+* `tf.nn`:
+  *   `tf.nn.max_pool2d` now supports explicit padding.
+* `tf.image`:
+  * Adds deterministic `tf.image.stateless_random_*` functions for each
+  `tf.image.random_*` function. Added a new op `stateless_sample_distorted_bounding_box`
+  which is a deterministic version of `sample_distorted_bounding_box` op.
+  Given the same seed, these stateless functions/ops produce the same results
+  independent of how many times the function is called, and independent of global seed settings.
+  * Adds deterministic `tf.image.resize` backprop CUDA kernels for
+  `method=ResizeMethod.BILINEAR` (the default method). Enable by setting the environment
+  variable `TF_DETERMINISTIC_OPS` to `"true"` or `"1"`.
+* `tf.print`:
+  * Bug fix in `tf.print()` with `OrderedDict` where if an `OrderedDict`
+  didn't have the keys sorted, the keys and values were not being printed
+    in accordance with their correct mapping.
+* `tf.train.Checkpoint`:
+  * Now accepts a `root` argument in the initialization, which generates a
+  checkpoint with a root object. This allows users to create a `Checkpoint`
+  object that     is compatible with Keras `model.save_weights()` and
+  `model.load_weights`. The checkpoint is also compatible with the checkpoint
+  saved in the `variables/` folder in the SavedModel.
+  * When restoring, `save_path` can be a path to a SavedModel. The function will
+  automatically find the checkpoint in the SavedModel.
 
-    Example:
+### `tf.data`:
+  * Adds new `tf.data.experimental.service.register_dataset` and
+  `tf.data.experimental.service.from_dataset_id` APIs to enable one process to
+  register a dataset with the tf.data service, and another process to consume
+  data from the dataset.
+  * Adds support for dispatcher fault tolerance. To enable fault tolerance,
+  configure a `work_dir` when running your dispatcher server and set
+  `dispatcher_fault_tolerance=True`. The dispatcher will store its state to
+  `work_dir`, so that on restart it can continue from its previous state after restart.
+  * Adds support for sharing dataset graphs via shared filesystem instead of
+  over RPC. This reduces load on the dispatcher, improving performance
+    of distributing datasets. For this to work, the dispatcher's `work_dir`
+  must be accessible from workers. If the worker fails to read from the `work_dir`,
+  it falls back to using RPC for dataset graph transfer.
+  * Adds support for a new "distributed_epoch" processing mode.
+  This processing mode distributes a dataset across all tf.data workers,
+    instead of having each worker process the full dataset. See
+  [the tf.data service docs](https://www.tensorflow.org/api_docs/python/tf/data/experimental/service#understand_processing_mode)
+  to learn more.
+  * Adds optional `exclude_cols` parameter to CsvDataset. This parameter is the
+  complement of `select_cols`; at most one of these should be specified.
+  * We have implemented an optimization which reorders data-discarding
+  transformations such as `take` and `shard` to happen earlier in the dataset
+  when it is safe to do so. The optimization can be disabled via the
+  `experimental_optimization.reorder_data_discarding_ops` dataset option.
+  * `tf.data.Options` were previously immutable and can now be overridden.
+  * `tf.data.Dataset.from_generator` now supports Ragged and Sparse tensors with
+  a new `output_signature` argument, which allows `from_generator` to produce any
+  type describable by a `tf.TypeSpec`.
+  * `tf.data.experimental.AUTOTUNE` is now available in the core API as `tf.data.AUTOTUNE`.
 
-    ```
-    for batch in data:
-      outputs = train_step(batch)
-    tf.print('final outputs', outputs)
-    ```
+### `tf.distribute`:
+  * Introduces experimental support for asynchronous training of models via
+  `tf.distribute.experimental.ParameterServerStrategy`:
+    * Replaces the existing `tf.distribute.experimental.ParameterServerStrategy`
+  symbol with a new class that is for parameter server training in TF2. Usage of
+  the old symbol, usually with Estimator API, should be **replaced** with
+  [`tf.compat.v1.distribute.experimental.ParameterServerStrategy`].
+    * Added `tf.distribute.experimental.coordinator.*` namespace, including the
+  main API `ClusterCoordinator` for coordinating the training cluster, the
+  related data structure `RemoteValue` and `PerWorkerValue`.
+  * `MultiWorkerMirroredStrategy`](https://www.tensorflow.org/api_docs/python/tf/distribute/MultiWorkerMirroredStrategy)
+  is now a stable API and is no longer considered experimental. Some of the major
+  improvements involve handling peer failure and many bug fixes. Please check out
+  the detailed tutorial on [Multi-worer training with Keras](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras).
+  * Adds `tf.distribute.Strategy.gather` and `tf.distribute.ReplicaContext.all_gather`
+  APIs to support gathering dense distributed values.
+  * Fixes various issues with saving a distributed model.
 
-    See tensorflow/python/autograph/g3doc/reference/limitations.md for more
-    info.
+### `tf.keras`:
+  * Improvements from the Functional API refactoring:
+    * Functional model construction does not need to maintain a global workspace
+  graph, removing memory leaks especially when building many models or very large models.
+    * Functional model construction should be ~8-10% faster on average.
+    * Functional models can now contain non-symbolic values in their call inputs
+  inside of the first positional argument.
+    * Several classes of TF ops that were not reliably converted to Keras layers
+  during functional API construction should now work, e.g.`tf.image.ssim_multiscale`
+    * Error messages when Functional API construction goes wrong (and when ops cannot be converted to Keras layers automatically) should be
+      clearer and easier to understand.
+  * `Optimizer.minimize` can now accept a loss `Tensor` and a `GradientTape`
+  as an alternative to accepting a `callable` loss.
+  * Adds `beta` hyperparameter to [FTRL](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Ftrl)
+  optimizer classes (Keras and others) to match [FTRL paper](https://research.google.com/pubs/archive/41159.pdf).
+  * `Optimizer.__init__` now accepts a `gradient_aggregator` to allow for customization
+  of how gradients are aggregated across devices, as well as `gradients_transformers`
+  to allow for custom gradient transformations (such as gradient clipping).
+  * Improvements to Keras preprocessing layers:
+    * TextVectorization can now accept a vocabulary list or file as an init arg.
+    * Normalization can now accept mean and variance values as init args.
+  * In `Attention` and `AdditiveAttention` layers, the `call()` method now accepts a `return_attention_scores` argument. When set to
+    True, the layer returns the attention scores as an additional output argument.
+  * Adds `tf.metrics.log_cosh` and `tf.metrics.logcosh` API entrypoints with the
+  same implementation as their `tf.losses` equivalent.
+  * For Keras model, the individual call of `Model.evaluate` uses no cached data
+  for evaluation, while `Model.fit` uses cached data when `validation_data` arg
+  is provided for better performance.
+  * Adds a `save_traces` argument to `model.save`/ `tf.keras.models.save_model`
+  which determines whether the SavedModel format stores the Keras model/layer call
+  functions. The traced functions allow Keras to revive custom models and layers
+  without the original class definition, but if this isn't required the tracing
+  can be disabled with the added option.
+  * The `tf.keras.mixed_precision` API is now non-experimental.
+  The non-experimental API differs from the experimental API in several ways.
+    * `tf.keras.mixed_precision.Policy` no longer takes in a `tf.mixed_precision.
+  experimental.LossScale` in the constructor, and no longer has a `LossScale`
+  associated with it. Instead, `Model.compile` will automatically wrap the optimizer
+  with a `LossScaleOptimizer` using dynamic loss scaling if `Policy.name`
+  is "mixed_float16".
+    * `tf.keras.mixed_precision.LossScaleOptimizer`'s constructor takes in different
+  arguments. In particular, it no longer takes in a `LossScale`, and there is
+  no longer a `LossScale` associated with the `LossScaleOptimizer`. Instead,
+  `LossScaleOptimizer` directly implements fixed or dynamic loss scaling. See the
+  documentation of [`tf.keras.mixed_precision.experimental.LossScaleOptimizer`]
+  (https://www.tensorflow.org/api_docs/python/tf/keras/mixed_precision/experimental/LossScaleOptimizer?version=nightly)
+  for details on the differences between the experimental `LossScaleOptimizer`
+  and the new non-experimental `LossScaleOptimizer`.
+    * `tf.mixed_precision.experimental.LossScale` and its subclasses are
+  deprecated, as all of its functionality now exists within `tf.keras.mixed_precision.LossScaleOptimizer`
 
-*   `tf.lite`:
+### `tf.lite`:
+  * `TFLiteConverter`:
+    * Support optional flags `inference_input_type` and `inference_output_type`
+  for full integer quantized models. This allows users to modify the model input
+  and output type to integer types (`tf.int8`, `tf.uint8`) instead of defaulting
+  to float type (`tf.float32`).
+  * NNAPI
+    * Adds NNAPI Delegation support for requantization use cases by converting
+  the operation into a dequantize-quantize pair.
+    * Removes deprecated `Interpreter.setUseNNAPI(boolean)` Java API. Use
+  `Interpreter.Options.setUseNNAPI` instead.
+    * Deprecates `Interpreter::UseNNAPI(bool)` C++ API. Use `NnApiDelegate()`
+  and related delegate configuration methods directly.
+    * Deprecates `Interpreter::SetAllowFp16PrecisionForFp32(bool)` C++ API.
+  Prefer controlling this via delegate options, e.g. `tflite::StatefulNnApiDelegate::Options::allow_fp16'
+  or `TfLiteGpuDelegateOptionsV2::is_precision_loss_allowed`.
+  * GPU
+    * GPU acceleration now supports quantized models by default
+  * `DynamicBuffer::AddJoinedString()` will now add a separator if the first string to be joined is empty.
+  *  Adds support for cumulative sum (cumsum), both as builtin op and MLIR conversion.
 
-    *   `TFLiteConverter`:
-        *   Support optional flags `inference_input_type` and
-            `inference_output_type` for full integer quantized models. This
-            allows users to modify the model input and output type to integer
-            types (`tf.int8`, `tf.uint8`) instead of defaulting to float type
-            (`tf.float32`).
-    *   TFLite Profiler for Android is available. See the detailed
-        [guide](https://www.tensorflow.org/lite/performance/measurement#trace_tensorflow_lite_internals_in_android).
-    * NNAPI
-        *   Added NNAPI Delegation support for requantization use cases by
-            converting the operation into a dequantize-quantize pair.
-        *   Removed deprecated `Interpreter.setUseNNAPI(boolean)` Java API.
-            *   Use `Interpreter.Options.setUseNNAPI` instead.
-        *   Deprecate `Interpreter::UseNNAPI(bool)` C++ API.
-            *   Use `NnApiDelegate()` and related delegate configuration methods
-                directly.
-        *   Deprecate `Interpreter::SetAllowFp16PrecisionForFp32(bool)` C++ API
-            *   Prefer controlling this via delegate options, e.g.
-                `tflite::StatefulNnApiDelegate::Options::allow_fp16' or
-                `TfLiteGpuDelegateOptionsV2::is_precision_loss_allowed`.
-    *   `DynamicBuffer::AddJoinedString()` will now add a separator if the first
-        string to be joined is empty.
-    *  Added support for cumulative sum (cumsum), both as builtin op and MLIR conversion.
-    *   <ADD RELEASE NOTES HERE>
+### `TensorRT`
+  * Issues a warning when the `session_config` parameter for the TF1 converter
+  is used or the `rewrite_config_template` field in the TF2 converter parameter
+  object is used.
 
-*   `tf.random`:
+### TPU Enhancements:
+  * Adds support for the `beta` parameter of the FTRL optimizer for TPU
+  embeddings. Users of other TensorFlow platforms can implement equivalent
+  behavior by adjusting the `l2` parameter.
 
-    *   <ADD RELEASE NOTES HERE>
+### XLA Support:
+  * xla.experimental.compile is deprecated, use `tf.function(experimental_compile=True)` instead.
+  * Adds `tf.function.experimental_get_compiler_ir` which returns compiler IR
+  (currently 'hlo' and 'optimized_hlo') for given input for given function.
 
-*   Math and Linear Algebra:
+### Security:
+  * Fixes an undefined behavior causing a segfault in `tf.raw_ops.Switch`,
+  ([CVE-2020-15190](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15190))
+  * Fixes three vulnerabilities in conversion to DLPack format
+    * [CVE-2020-15191](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15191),
+    * [CVE-2020-15192](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15192),
+    * [CVE-2020-15193](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15193)
+  * Fixes two vulnerabilities in `SparseFillEmptyRowsGrad`
+    * [CVE-2020-15194](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15194),
+    * [CVE-2020-15195](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15195)
+  * Fixes several vulnerabilities in `RaggedCountSparseOutput` and `SparseCountSparseOutput` operations
+    * [CVE-2020-15196](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15196),
+    * [CVE-2020-15197](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15197),
+    * [CVE-2020-15198](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15198),
+    * [CVE-2020-15199](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15199),
+    * [CVE-2020-15200](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15200),
+    * [CVE-2020-15201](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15201)
+  * Fixes an integer truncation vulnerability in code using the work sharder API,
+  ([CVE-2020-15202](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15202))
+  * Fixes a format string vulnerability in `tf.strings.as_string`,
+  ([CVE-2020-15203](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15203))
+  * Fixes segfault raised by calling session-only ops in eager mode,
+  ([CVE-2020-15204](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15204))
+  * Fixes data leak and potential ASLR violation from `tf.raw_ops.StringNGrams`,
+  ([CVE-2020-15205](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15205))
+  * Fixes segfaults caused by incomplete `SavedModel` validation,
+  ([CVE-2020-15206](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15206))
+  * Fixes a data corruption due to a bug in negative indexing support in TFLite,
+  ([CVE-2020-15207](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15207))
+  * Fixes a data corruption due to dimension mismatch in TFLite,
+  ([CVE-2020-15208](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15208))
+  * Fixes several vulnerabilities in TFLite saved model format
+    * [CVE-2020-15209](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15209),
+    * [CVE-2020-15210](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15210),
+    * [CVE-2020-15211](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15211)
+  * Fixes several vulnerabilities in TFLite implementation of segment sum
+    * [CVE-2020-15212](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15212),
+    * [CVE-2020-15213](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15213),
+    * [CVE-2020-15214](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15214)
+  * Fixes a segfault in `tf.quantization.quantize_and_dequantize`,
+  ([CVE-2020-15265](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15265))
+  * Fixes an undefined behavior float cast causing a crash,
+  ([CVE-2020-15266](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-15266))
+  * Fixes a lack of validation in `tf.raw_ops.DataFormatVecPermute` and
+  `tf.raw_ops.DataFormatDimMap` which can cause uninitialized memory access,
+  read outside bounds of arrays, data corruption and segmentation faults
+  ([CVE-2020-26267](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-26267))
+  * Fixes a crash caused by writing to read only memory region
+  ([CVE-2020-26268](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-26268))
+  * Fixes a heap out of bounds access in filesystem globbing implementation
+  ([CVE-2020-26269](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-26269))
 
-    * Add `tf.math.erfcinv`, the inverse to `tf.math.erfc`.
-
-*   TPU Enhancements:
-
-    *   Added support for the `beta` parameter of the FTRL optimizer for TPU
-        embeddings. Users of other TensorFlow platforms can implement equivalent
-        behavior by adjusting the `l2` parameter.
-    *   <ADD RELEASE NOTES HERE>
-
-*   XLA Support:
-
-    *   xla.experimental.compile is deprecated, use
-        `tf.function(experimental_compile=True)` instead
-    *   Added `tf.function.experimental_get_compiler_ir` which returns compiler
-        IR (currently 'hlo' and 'optimized_hlo') for given input for given
-        function.
-    *   <ADD RELEASE NOTES HERE>
-
-*   Tracing and Debugging:
-
-    *   <ADD RELEASE NOTES HERE>
-
-*   `tf.train.Checkpoint`:
-
-    *   Now accepts a `root` argument in the initialization, which generates a
-        checkpoint with a root object. This allows users to create a
-        `Checkpoint` object that is compatible with Keras `model.save_weights()`
-        and `model.load_weights`. The checkpoint is also compatible with the
-        checkpoint saved in the `variables/` folder in the SavedModel.
-    *   When restoring, `save_path` can be a path to a SavedModel. The function
-        will automatically find the checkpoint in the SavedModel.
-
-*   `tf.nn`:
-
-    *   `tf.nn.max_pool2d` now supports explicit padding.
-
-*   `tf.debugging`:
-
-    *   `tf.debugging.assert_shapes()` now works on `SparseTensor`s (#36268).
-
-*   `tf.print`:
-
-    *   Bug fix in `tf.print()` with `OrderedDict` where if an `OrderedDict`
-        didn't have the keys sorted, the keys and values were not being printed
-        in accordance with their correct mapping.
-
-*    `TensorRT`
-
-    *   We now issue a warning when the `session_config` parameter for the TF1
-        converter is used or the `rewrite_config_template` field in the TF2
-        converter parameter object is used.
-
-*   Other:
-
-    *   We have replaced uses of "whitelist" and "blacklist" with "allowlist"
-        and "denylist" where possible. Please see
-        https://developers.google.com/style/word-list#blacklist for more
-        context.
-    *   Add `tf.config.experimental.mlir_bridge_rollout` which will help us
-        rollout the new MLIR TPU bridge.
-    *   Added `tf.experimental.register_filesystem_plugin` to load modular
-        filesystem plugins from Python
-    *   <ADD RELEASE NOTES HERE>
+### Other:
+  * We have replaced uses of "whitelist" and "blacklist" with "allowlist" and
+  "denylist" where possible. Please see [this list](https://developers.google.com/style/word-list#blacklist) for more context.
+  * Adds `tf.config.experimental.mlir_bridge_rollout` which will help us rollout the new MLIR TPU bridge.
+  * Adds `tf.experimental.register_filesystem_plugin` to load modular filesystem plugins from Python
 
 ## Thanks to our Contributors
 
-This release contains contributions from many people at Google, as well as:
+This release contains contributions from many people at Google as well as the following external contributors:
 
-stjohnso98, <NAME>, <HERE>, <USING>, <GITHUB>, <HANDLE>
+8bitmp3, aaa.jq, Abhineet Choudhary, Abolfazl Shahbazi, acxz, Adam Hillier, Adrian Garcia Badaracco, Ag Ramesh, ahmedsabie, Alan Anderson, Alexander Grund, Alexandre Lissy, Alexey Ivanov, Amedeo Cavallo, anencore94, Aniket Kumar Singh, Anthony Platanios, Ashwin Phadke, Balint Cristian, Basit Ayantunde, bbbboom, Ben Barsdell, Benjamin Chetioui, Benjamin Peterson, bhack, Bhanu Prakash Bandaru Venkata, Biagio Montaruli, Brent M. Spell, bubblebooy, bzhao, cfRod, Cheng Chen, Cheng(Kit) Chen, Chris Tessum, Christian, chuanqiw, codeadmin_peritiae, COTASPAR, CuiYifeng, danielknobe, danielyou0230, dannyfriar, daria, DarrenZhang01, Denisa Roberts, dependabot[bot], Deven Desai, Dmitry Volodin, Dmitry Zakharov, drebain, Duncan Riach, Eduard Feicho, Ehsan Toosi, Elena Zhelezina, emlaprise2358, Eugene Kuznetsov, Evaderan-Lab, Evgeniy Polyakov, Fausto Morales, Felix Johnny, fo40225, Frederic Bastien, Fredrik Knutsson, fsx950223, Gaurav Singh, Gauri1 Deshpande, George Grzegorz Pawelczak, gerbauz, Gianluca Baratti, Giorgio Arena, Gmc2, Guozhong Zhuang, Hannes Achleitner, Harirai, HarisWang, Harsh188, hedgehog91, Hemal Mamtora, Hideto Ueno, Hugh Ku, Ian Beauregard, Ilya Persky, jacco, Jakub Beránek, Jan Jongboom, Javier Montalt Tordera, Jens Elofsson, Jerry Shih, jerryyin, jgehw, Jinjing Zhou, jma, jmsmdy, Johan Nordström, John Poole, Jonah Kohn, Jonathan Dekhtiar, jpodivin, Jung Daun, Kai Katsumata, Kaixi Hou, Kamil Rakoczy, Kaustubh Maske Patil, Kazuaki Ishizaki, Kedar Sovani, Koan-Sin Tan, Koki Ibukuro, Krzysztof Laskowski, Kushagra Sharma, Kushan Ahmadian, Lakshay Tokas, Leicong Li, levinxo, Lukas Geiger, Maderator, Mahmoud Abuzaina, Mao Yunfei, Marius Brehler, markf, Martin Hwasser, Martin Kubovčík, Matt Conley, Matthias, mazharul, mdfaijul, Michael137, MichelBr, Mikhail Startsev, Milan Straka, Ml-0, Myung-Hyun Kim, Måns Nilsson, Nathan Luehr, ngc92, nikochiko, Niranjan Hasabnis, nyagato_00, Oceania2018, Oleg Guba, Ongun Kanat, OscarVanL, Patrik Laurell, Paul Tanger, Peter Sobot, Phil Pearl, PlusPlusUltra, Poedator, Prasad Nikam, Rahul-Kamat, Rajeshwar Reddy T, redwrasse, Rickard, Robert Szczepanski, Rohan Lekhwani, Sam Holt, Sami Kama, Samuel Holt, Sandeep Giri, sboshin, Sean Settle, settle, Sharada Shiddibhavi, Shawn Presser, ShengYang1, Shi,Guangyong, Shuxiang Gao, Sicong Li, Sidong-Wei, Srihari Humbarwadi, Srinivasan Narayanamoorthy, Steenu Johnson, Steven Clarkson, stjohnso98, Tamas Bela Feher, Tamas Nyiri, Tarandeep Singh, Teng Lu, Thibaut Goetghebuer-Planchon, Tim Bradley, Tomasz Strejczek, Tongzhou Wang, Torsten Rudolf, Trent Lo, Ty Mick, Tzu-Wei Sung, Varghese, Jojimon, Vignesh Kothapalli, Vishakha Agrawal, Vividha, Vladimir Menshakov, Vladimir Silyaev, VoVAllen, Võ Văn Nghĩa, wondertx, xiaohong1031, Xiaoming (Jason) Cui, Xinan Jiang, Yair Ehrenwald, Yasir Modak, Yasuhiro Matsumoto, Yimei Sun, Yiwen Li, Yixing, Yoav Ramon, Yong Tang, Yong Wu, yuanbopeng, Yunmo Koo, Zhangqiang, Zhou Peng, ZhuBaohe, zilinzhu, zmx
 
 
 # Release 2.3.1

From cae2f149e1f3dacfd993de89fa2e897e7790b4e7 Mon Sep 17 00:00:00 2001
From: "T.J. Alumbaugh" <talumbau@google.com>
Date: Mon, 14 Dec 2020 12:42:42 -0800
Subject: [PATCH 55/60] Add function to check for deprecated paths via weak
 symbols. As a first step, define the function and link with
 CpuBackendContext.

PiperOrigin-RevId: 347446604
Change-Id: Ib7fb6820daa18194dd54081d233c8ae960bda5f1
---
 tensorflow/lite/kernels/BUILD                 | 14 +++++++
 .../lite/kernels/cpu_backend_context.cc       | 20 +++++++++-
 tensorflow/lite/kernels/cpu_backend_context.h |  9 +++++
 tensorflow/lite/kernels/cpu_backend_gemm.h    | 17 +++-----
 .../lite/kernels/cpu_backend_gemm_test.cc     |  2 +-
 .../lite/kernels/cpu_backend_gemm_x86.h       | 39 +++++++------------
 .../lite/kernels/deprecated_backends.cc       | 24 ++++++++++++
 7 files changed, 87 insertions(+), 38 deletions(-)
 create mode 100644 tensorflow/lite/kernels/deprecated_backends.cc

diff --git a/tensorflow/lite/kernels/BUILD b/tensorflow/lite/kernels/BUILD
index e0ab2724c5f..a0808aafc80 100644
--- a/tensorflow/lite/kernels/BUILD
+++ b/tensorflow/lite/kernels/BUILD
@@ -320,6 +320,18 @@ cc_library(
     }),
 )
 
+# Provide a library for clients to link to if they need to stay on deprecated
+# arithmetic backends. Include as a dependency of cpu_backend_gemm to start.
+# TODO(b/168923364): Move to dependent targets.
+cc_library(
+    name = "deprecated_backends",
+    srcs = [
+        "deprecated_backends.cc",
+    ],
+    compatible_with = get_compatible_with_portable(),
+    alwayslink = 1,
+)
+
 cc_library(
     name = "cpu_backend_context",
     srcs = [
@@ -337,6 +349,7 @@ cc_library(
         "//conditions:default": ["-DTFLITE_HAVE_CPUINFO"],
     }),
     deps = [
+        ":deprecated_backends",  # TODO(b/168923364): Move to dependent targets.
         ":tflite_with_ruy",
         ":op_macros",
         # For now this unconditionally depends on both ruy and gemmlowp.
@@ -345,6 +358,7 @@ cc_library(
         "@ruy//ruy:context",
         "@gemmlowp",
         "//tensorflow/lite/c:common",
+        "//tensorflow/lite:macros",
         "//tensorflow/lite:external_cpu_backend_context",
         "//tensorflow/lite/kernels/internal:compatibility",
     ] + select({
diff --git a/tensorflow/lite/kernels/cpu_backend_context.cc b/tensorflow/lite/kernels/cpu_backend_context.cc
index 6eacb3d2216..c8d658e9005 100644
--- a/tensorflow/lite/kernels/cpu_backend_context.cc
+++ b/tensorflow/lite/kernels/cpu_backend_context.cc
@@ -24,6 +24,7 @@ limitations under the License.
 #include "public/gemmlowp.h"
 #include "ruy/context.h"  // from @ruy
 #include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/core/macros.h"
 #include "tensorflow/lite/external_cpu_backend_context.h"
 #include "tensorflow/lite/kernels/internal/compatibility.h"
 #include "tensorflow/lite/kernels/op_macros.h"
@@ -35,7 +36,13 @@ const int kDefaultNumThreadpoolThreads = 1;
 
 namespace tflite {
 
-#ifdef TFLITE_HAVE_CPUINFO
+// Use weak symbols if possible to dispatch to deprecated paths.
+#if TFLITE_HAS_ATTRIBUTE_WEAK && !defined(__APPLE__)
+extern TFLITE_ATTRIBUTE_WEAK bool UseGemmlowpOnX86();
+#endif  // defined(TFLITE_HAS_ATTRIBUTE_WEAK) && !(__APPLE__)
+
+// TODO(b/138922878) Enable when Ruy builds on Apple.
+#if defined(TFLITE_HAVE_CPUINFO) && !defined(__APPLE__)
 CpuBackendContext::CpuInfo::~CpuInfo() {
   if (init_status_ == InitStatus::kInitialized) {
     cpuinfo_deinitialize();
@@ -144,4 +151,15 @@ bool CpuBackendContext::HasAvxOrAbove() {
   return cpuinfo_.Avx() || cpuinfo_.Avx2Fma() || cpuinfo_.Avx512();
 }
 
+bool CpuBackendContext::PreferGemmlowpOnX86() {
+  bool use_gemmlowp_on_x86 = false;
+#if defined(TFLITE_X86_PLATFORM) && TFLITE_HAS_ATTRIBUTE_WEAK && \
+    !defined(__APPLE__)
+  if (::tflite::UseGemmlowpOnX86 != nullptr) {
+    use_gemmlowp_on_x86 = ::tflite::UseGemmlowpOnX86();
+  }
+#endif  // TFLITE_X86_PLATFORM && TFLITE_HAS_ATTRIBUTE_WEAK && !(__APPLE__)
+  return use_gemmlowp_on_x86 || !HasAvxOrAbove();
+}
+
 }  // namespace tflite
diff --git a/tensorflow/lite/kernels/cpu_backend_context.h b/tensorflow/lite/kernels/cpu_backend_context.h
index e0207176eb4..eda2712086d 100644
--- a/tensorflow/lite/kernels/cpu_backend_context.h
+++ b/tensorflow/lite/kernels/cpu_backend_context.h
@@ -16,6 +16,11 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_CPU_BACKEND_CONTEXT_H_
 #define TENSORFLOW_LITE_KERNELS_CPU_BACKEND_CONTEXT_H_
 
+#if (defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || \
+     defined(_M_X64))
+#define TFLITE_X86_PLATFORM
+#endif
+
 #include <memory>
 
 #include "public/gemmlowp.h"
@@ -52,6 +57,10 @@ class CpuBackendContext final : public TfLiteInternalBackendContext {
 
   bool HasAvxOrAbove();
 
+  // Gemmlowp on x86 is a deprecated path but some clients may still use
+  // this path based on link time dependencies.
+  bool PreferGemmlowpOnX86();
+
  private:
   // Copy the wrapper class for cpuinfo from Ruy.
   class CpuInfo final {
diff --git a/tensorflow/lite/kernels/cpu_backend_gemm.h b/tensorflow/lite/kernels/cpu_backend_gemm.h
index 6950e182dfa..9c687f6466b 100644
--- a/tensorflow/lite/kernels/cpu_backend_gemm.h
+++ b/tensorflow/lite/kernels/cpu_backend_gemm.h
@@ -50,14 +50,7 @@ namespace cpu_backend_gemm {
 //  ENABLED && (AVX
 //  or above available)
 
-
-#if (defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || \
-     defined(_M_X64))
-#define TFLITE_X86_PLATFORM
-#endif
-
-// TODO(b/168923364)  Set TFLITE_X86_RUY_ENABLED default 'on' when ready.
-#if defined(TFLITE_X86_PLATFORM) && defined(TFLITE_X86_RUY_ENABLED)
+#if !defined(TFLITE_WITH_RUY) && defined(TFLITE_X86_PLATFORM)
 /* GEMM dispatch implementation for x86.
  */
 template <typename LhsScalar, typename RhsScalar, typename AccumScalar,
@@ -72,12 +65,10 @@ template <typename LhsScalar, typename RhsScalar, typename AccumScalar,
           typename DstScalar, QuantizationFlavor quantization_flavor>
 struct GemmImpl : detail::GemmImplUsingRuy<LhsScalar, RhsScalar, AccumScalar,
                                            DstScalar, quantization_flavor> {};
-#endif
 
-#if !defined(TFLITE_WITH_RUY) && !defined(TFLITE_X86_RUY_ENABLED)
+#if !defined(TFLITE_WITH_RUY)
 
 /* Specializations using gemmlowp */
-
 template <typename SrcScalar, typename DstScalar,
           QuantizationFlavor quantization_flavor>
 struct GemmImpl<SrcScalar, SrcScalar, std::int32_t, DstScalar,
@@ -114,7 +105,9 @@ template <>
 struct GemmImpl<float, float, float, float, QuantizationFlavor::kFloatingPoint>
     : detail::GemmImplUsingEigen {};
 
-#endif  // not TFLITE_WITH_RUY && not TFLITE_X86_RUY_ENABLED
+#endif  // not TFLITE_WITH_RUY
+
+#endif  // not TFLITE_WITH_RUY and TFLITE_X86_PLATFORM
 
 /* Public entry point */
 
diff --git a/tensorflow/lite/kernels/cpu_backend_gemm_test.cc b/tensorflow/lite/kernels/cpu_backend_gemm_test.cc
index 521e7bb03fd..06bc7a05b7b 100644
--- a/tensorflow/lite/kernels/cpu_backend_gemm_test.cc
+++ b/tensorflow/lite/kernels/cpu_backend_gemm_test.cc
@@ -297,7 +297,7 @@ void PerformGemmThenCompareResultsThenAgainWithClamping(
 // done so far. Until that is done, the best that we can do is to search for
 // a good exponent value by trial-and-error. This is expensive, as each try
 // requires computing a whole GEMM. This is thus probably a major contribution
-// to the overall latency of this tesat. To partially mitigate that,
+// to the overall latency of this test. To partially mitigate that,
 // we use a bisection to reduce the required number of tries.
 //
 // This function is recursive. The bisect_min and bisect_max arguments
diff --git a/tensorflow/lite/kernels/cpu_backend_gemm_x86.h b/tensorflow/lite/kernels/cpu_backend_gemm_x86.h
index 20af9536d47..39d37c7a0c3 100644
--- a/tensorflow/lite/kernels/cpu_backend_gemm_x86.h
+++ b/tensorflow/lite/kernels/cpu_backend_gemm_x86.h
@@ -41,25 +41,27 @@ struct GemmImplX86 {
       const MatrixParams<DstScalar>& dst_params, DstScalar* dst_data,
       const GemmParams<AccumScalar, DstScalar, quantization_flavor>& params,
       CpuBackendContext* context) {
-    // Run-time dispatch to Ruy for platforms with AVX or above.
-    if (context->HasAvxOrAbove()) {
-      detail::GemmImplUsingRuy<LhsScalar, RhsScalar, AccumScalar, DstScalar,
-                               quantization_flavor>::Run(lhs_params, lhs_data,
-                                                         rhs_params, rhs_data,
-                                                         dst_params, dst_data,
-                                                         params, context);
-    } else {
-      // Dispatch to gemmlowp for SSE.
+    // TODO(b/168923364) Ruy is preferred on x86, but check if the deprecated
+    // path is enabled.
+    if (context->PreferGemmlowpOnX86()) {
+      // Dispatch to gemmlowp.
       detail::GemmImplUsingGemmlowp<
           LhsScalar, RhsScalar, AccumScalar, DstScalar,
           quantization_flavor>::Run(lhs_params, lhs_data, rhs_params, rhs_data,
                                     dst_params, dst_data, params, context);
+
+      return;
     }
+    // Run-time dispatch to Ruy for platforms with AVX or above.
+    detail::GemmImplUsingRuy<LhsScalar, RhsScalar, AccumScalar, DstScalar,
+                             quantization_flavor>::Run(lhs_params, lhs_data,
+                                                       rhs_params, rhs_data,
+                                                       dst_params, dst_data,
+                                                       params, context);
   }
 };
 
-// For float, again prefer Ruy in all cases, but defer to eigen if no flavor of
-// AVX is present.
+// For float, defer to eigen for now.
 template <>
 struct GemmImplX86<float, float, float, float,
                    QuantizationFlavor::kFloatingPoint> {
@@ -69,19 +71,8 @@ struct GemmImplX86<float, float, float, float,
                   const GemmParams<float, float,
                                    QuantizationFlavor::kFloatingPoint>& params,
                   CpuBackendContext* context) {
-    // Run-time dispatch to Ruy for platforms with AVX or above.
-    if (context->HasAvxOrAbove()) {
-      detail::GemmImplUsingRuy<
-          float, float, float, float,
-          QuantizationFlavor::kFloatingPoint>::Run(lhs_params, lhs_data,
-                                                   rhs_params, rhs_data,
-                                                   dst_params, dst_data, params,
-                                                   context);
-    } else {
-      // Dispatch to gemmlowp for SSE.
-      GemmImplUsingEigen::Run(lhs_params, lhs_data, rhs_params, rhs_data,
-                              dst_params, dst_data, params, context);
-    }
+    GemmImplUsingEigen::Run(lhs_params, lhs_data, rhs_params, rhs_data,
+                            dst_params, dst_data, params, context);
   }
 };
 
diff --git a/tensorflow/lite/kernels/deprecated_backends.cc b/tensorflow/lite/kernels/deprecated_backends.cc
new file mode 100644
index 00000000000..56886533e07
--- /dev/null
+++ b/tensorflow/lite/kernels/deprecated_backends.cc
@@ -0,0 +1,24 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+namespace tflite {
+
+// Include this target as a dependency in order to define this function for
+// CpuBackendContext. Its use is to control execution of deprecated paths
+// by providing a symbol definition for otherwise "weak" symbol
+// declarations in CpuBackendContext.
+extern bool UseGemmlowpOnX86() { return true; }
+
+}  // namespace tflite

From df96df98624b3973d45bebd5ed7297469a5addb4 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 14 Dec 2020 12:53:30 -0800
Subject: [PATCH 56/60] Add unistd.h required for getuid().

Without this, we get the following build failure:
```
tensorflow/tools/graph_transforms/transform_graph.cc: In function 'std::__cxx11::string tensorflow::graph_transforms::ExpandPath(const string&)':
tensorflow/tools/graph_transforms/transform_graph.cc:150:36: error: 'getuid' was not declared in this scope
       struct passwd* pw = getpwuid(getuid());
                                    ^~~~~~
tensorflow/tools/graph_transforms/transform_graph.cc:150:36: note: suggested alternative: 'getpwuid'
       struct passwd* pw = getpwuid(getuid());
                                    ^~~~~~
                                    getpwuid
```

PiperOrigin-RevId: 347448636
Change-Id: I7ef9efaa2edb5dbb88529e6c76357ec0057b87ea
---
 tensorflow/tools/graph_transforms/transform_graph.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tensorflow/tools/graph_transforms/transform_graph.cc b/tensorflow/tools/graph_transforms/transform_graph.cc
index 5b9fa84cc15..a004d7f789b 100644
--- a/tensorflow/tools/graph_transforms/transform_graph.cc
+++ b/tensorflow/tools/graph_transforms/transform_graph.cc
@@ -26,6 +26,7 @@ limitations under the License.
 #include "tensorflow/tools/graph_transforms/transform_utils.h"
 #if !defined(PLATFORM_WINDOWS)
 #include <pwd.h>
+#include <unistd.h>
 #endif
 
 namespace tensorflow {

From c025ecf14633357a047d98cb1b55f9d24c8a85f3 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Mon, 14 Dec 2020 13:29:19 -0800
Subject: [PATCH 57/60] Stop using
 grpc::internal::ClientAsyncResponseReaderFactory (deprecated)

PiperOrigin-RevId: 347456833
Change-Id: I8943183b8dac5e80fcffc8faeddeec1267a1308c
---
 .../tpu/kernels/tpu_compilation_cache_grpc.cc     | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/tensorflow/core/tpu/kernels/tpu_compilation_cache_grpc.cc b/tensorflow/core/tpu/kernels/tpu_compilation_cache_grpc.cc
index c3aa62805c0..1b21ad8bc70 100644
--- a/tensorflow/core/tpu/kernels/tpu_compilation_cache_grpc.cc
+++ b/tensorflow/core/tpu/kernels/tpu_compilation_cache_grpc.cc
@@ -66,9 +66,12 @@ grpc::TpuCompilationCacheService::Stub::Stub(
 grpc::TpuCompilationCacheService::Stub::AsyncGetTpuProgramRaw(
     ::grpc::ClientContext* context, const RequestType& request,
     ::grpc::CompletionQueue* cq) {
-  return ::grpc::internal::ClientAsyncResponseReaderFactory<
-      ResponseType>::Create(channel_.get(), cq, rpcmethod_get_tpu_program_,
-                            context, request, true);
+  ::grpc::ClientAsyncResponseReader<
+      grpc::TpuCompilationCacheService::ResponseType>* result =
+      ::grpc::internal::ClientAsyncResponseReaderHelper::Create<ResponseType>(
+          channel_.get(), cq, rpcmethod_get_tpu_program_, context, request);
+  result->StartCall();
+  return result;
 }
 
 ::grpc::ClientAsyncResponseReader<
@@ -76,9 +79,9 @@ grpc::TpuCompilationCacheService::Stub::AsyncGetTpuProgramRaw(
 grpc::TpuCompilationCacheService::Stub::PrepareAsyncGetTpuProgramRaw(
     ::grpc::ClientContext* context, const RequestType& request,
     ::grpc::CompletionQueue* cq) {
-  return ::grpc::internal::ClientAsyncResponseReaderFactory<
-      ResponseType>::Create(channel_.get(), cq, rpcmethod_get_tpu_program_,
-                            context, request, false);
+  return ::grpc::internal::ClientAsyncResponseReaderHelper::Create<
+      ResponseType>(channel_.get(), cq, rpcmethod_get_tpu_program_, context,
+                    request);
 }
 
 grpc::TpuCompilationCacheService::Service::Service() {

From 3ae790096cd98268474478c28480f3057e240b6b Mon Sep 17 00:00:00 2001
From: Peter Hawkins <phawkins@google.com>
Date: Mon, 14 Dec 2020 14:03:13 -0800
Subject: [PATCH 58/60] Add a test case for a larger QR decomposition.

PiperOrigin-RevId: 347464495
Change-Id: Id0b54d1cdfc88a544c1c698664580eebd2d9823e
---
 tensorflow/compiler/tests/qr_op_test.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tensorflow/compiler/tests/qr_op_test.py b/tensorflow/compiler/tests/qr_op_test.py
index d6d97fd3ad4..75fe7d6f8d4 100644
--- a/tensorflow/compiler/tests/qr_op_test.py
+++ b/tensorflow/compiler/tests/qr_op_test.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import itertools
+import unittest
 
 from absl.testing import parameterized
 import numpy as np
@@ -129,6 +130,11 @@ class QrOpTest(xla_test.XLATestCase, parameterized.TestCase):
     x_np = self._random_matrix(np.float32, (2000, 2000))
     self._test(x_np, full_matrices=True)
 
+  @unittest.skip("Test times out on CI")
+  def testLarge17500x128(self):
+    x_np = self._random_matrix(np.float32, (17500, 128))
+    self._test(x_np, full_matrices=True)
+
   @parameterized.parameters((23, 25), (513, 23))
   def testZeroColumn(self, rows, cols):
     x_np = self._random_matrix(np.complex64, (rows, cols))

From e7e9a0c449803648d5103a39b868ee3abed31bdf Mon Sep 17 00:00:00 2001
From: Qiao Zhang <zhangqiaorjc@google.com>
Date: Mon, 14 Dec 2020 15:10:33 -0800
Subject: [PATCH 59/60] Move CreateModuleConfig to a new hlo_module_util
 header.

Move CreateExecutionOptions to xla/client/executable_build_options.h.

PiperOrigin-RevId: 347478735
Change-Id: Iab07bcd31e89ee8e24f2385d0a515dba03e505ad
---
 tensorflow/compiler/xla/client/BUILD          |   1 +
 .../xla/client/executable_build_options.cc    |  31 +++++
 .../xla/client/executable_build_options.h     |   6 +
 tensorflow/compiler/xla/service/BUILD         |  17 +++
 .../compiler/xla/service/hlo_module_util.cc   | 131 ++++++++++++++++++
 .../compiler/xla/service/hlo_module_util.h    |  44 ++++++
 .../compiler/xla/service/local_service.cc     |  31 +----
 tensorflow/compiler/xla/service/service.cc    |  85 +-----------
 8 files changed, 238 insertions(+), 108 deletions(-)
 create mode 100644 tensorflow/compiler/xla/service/hlo_module_util.cc
 create mode 100644 tensorflow/compiler/xla/service/hlo_module_util.h

diff --git a/tensorflow/compiler/xla/client/BUILD b/tensorflow/compiler/xla/client/BUILD
index 171afa42351..6baeca85149 100644
--- a/tensorflow/compiler/xla/client/BUILD
+++ b/tensorflow/compiler/xla/client/BUILD
@@ -95,6 +95,7 @@ cc_library(
     hdrs = ["executable_build_options.h"],
     deps = [
         "//tensorflow/compiler/xla:debug_options_flags",
+        "//tensorflow/compiler/xla:execution_options_util",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto_cc",
diff --git a/tensorflow/compiler/xla/client/executable_build_options.cc b/tensorflow/compiler/xla/client/executable_build_options.cc
index f39a3e79fe5..647232300e2 100644
--- a/tensorflow/compiler/xla/client/executable_build_options.cc
+++ b/tensorflow/compiler/xla/client/executable_build_options.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include "absl/strings/str_format.h"
 #include "tensorflow/compiler/xla/debug_options_flags.h"
+#include "tensorflow/compiler/xla/execution_options_util.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 
 namespace xla {
@@ -99,4 +100,34 @@ string ExecutableBuildOptions::ToString() const {
       device_ordinal_, result_layout, num_replicas_);
 }
 
+ExecutionOptions CreateExecutionOptions(
+    const ExecutableBuildOptions& build_options,
+    const ProgramShape* program_shape) {
+  ExecutionOptions execution_options = CreateDefaultExecutionOptions();
+  if (build_options.has_debug_options()) {
+    *execution_options.mutable_debug_options() = build_options.debug_options();
+  }
+  if (build_options.result_layout() != nullptr) {
+    *execution_options.mutable_shape_with_output_layout() =
+        build_options.result_layout()->ToProto();
+  } else {
+    Shape result_shape(program_shape->result());
+    LayoutUtil::SetToDefaultLayout(&result_shape);
+    *execution_options.mutable_shape_with_output_layout() =
+        result_shape.ToProto();
+  }
+  execution_options.set_num_replicas(build_options.num_replicas());
+  execution_options.set_num_partitions(build_options.num_partitions());
+  execution_options.set_use_spmd_partitioning(
+      build_options.use_spmd_partitioning());
+  execution_options.set_deduplicate_hlo(build_options.deduplicate_hlo());
+  if (build_options.has_device_assignment()) {
+    TF_CHECK_OK(build_options.device_assignment().Serialize(
+        execution_options.mutable_device_assignment()));
+  }
+  execution_options.set_alias_passthrough_params(
+      build_options.alias_passthrough_params());
+  return execution_options;
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/client/executable_build_options.h b/tensorflow/compiler/xla/client/executable_build_options.h
index c55f5750da7..104a49c3ce4 100644
--- a/tensorflow/compiler/xla/client/executable_build_options.h
+++ b/tensorflow/compiler/xla/client/executable_build_options.h
@@ -141,6 +141,12 @@ class ExecutableBuildOptions {
   tensorflow::thread::ThreadPool* compile_thread_pool_ = nullptr;
 };
 
+// Creates an ExecutionOptions based on a given ExecutableBuildOptions and
+// ProgramShape.
+ExecutionOptions CreateExecutionOptions(
+    const ExecutableBuildOptions& build_options,
+    const ProgramShape* program_shape);
+
 }  // namespace xla
 
 #endif  // TENSORFLOW_COMPILER_XLA_CLIENT_EXECUTABLE_BUILD_OPTIONS_H_
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index 36982700d5e..0a0c31908af 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -936,6 +936,7 @@ cc_library(
         ":hlo_evaluator",
         ":hlo_execution_profile",
         ":hlo_module_config",
+        ":hlo_module_util",
         ":hlo_proto_util",
         ":platform_util",
         ":source_map_util",
@@ -977,6 +978,7 @@ cc_library(
         ":hlo",
         ":hlo_execution_profile",
         ":hlo_module_config",
+        ":hlo_module_util",
         ":platform_util",
         ":service",
         ":shaped_buffer",
@@ -1528,6 +1530,21 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "hlo_module_util",
+    srcs = ["hlo_module_util.cc"],
+    hdrs = ["hlo_module_util.h"],
+    deps = [
+        ":compiler",
+        ":hlo_module_config",
+        "//tensorflow/compiler/xla:shape_util",
+        "//tensorflow/compiler/xla:status",
+        "//tensorflow/compiler/xla:statusor",
+        "@com_google_absl//absl/types:optional",
+        "@com_google_absl//absl/types:span",
+    ],
+)
+
 cc_library(
     name = "hlo_module_group_util",
     srcs = ["hlo_module_group_util.cc"],
diff --git a/tensorflow/compiler/xla/service/hlo_module_util.cc b/tensorflow/compiler/xla/service/hlo_module_util.cc
new file mode 100644
index 00000000000..106c50c6e8a
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_module_util.cc
@@ -0,0 +1,131 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/hlo_module_util.h"
+
+#include "absl/types/span.h"
+#include "tensorflow/compiler/xla/service/compiler.h"
+#include "tensorflow/compiler/xla/service/hlo_module_config.h"
+#include "tensorflow/compiler/xla/shape.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/compiler/xla/status.h"
+#include "tensorflow/compiler/xla/statusor.h"
+
+namespace xla {
+
+namespace {
+
+Status ValidateResultShape(const Shape& client_shape,
+                           const Shape& result_shape) {
+  TF_RETURN_IF_ERROR(ShapeUtil::ValidateShapeWithOptionalLayout(client_shape));
+  if (!ShapeUtil::Compatible(client_shape, result_shape)) {
+    return InvalidArgument(
+        "Shape used to set computation result layout %s is not compatible "
+        "with result shape %s",
+        ShapeUtil::HumanStringWithLayout(client_shape),
+        ShapeUtil::HumanString(result_shape));
+  }
+  return Status::OK();
+}
+}  // namespace
+
+StatusOr<std::unique_ptr<HloModuleConfig>> CreateModuleConfig(
+    const ProgramShape& program_shape,
+    absl::Span<const Shape* const> argument_shapes,
+    const ExecutionOptions* execution_options, int default_num_replicas,
+    absl::optional<int> num_threads, const AotCompilationOptions* aot_options) {
+  auto config = absl::make_unique<HloModuleConfig>(program_shape);
+  ComputationLayout* computation_layout =
+      config->mutable_entry_computation_layout();
+  const int64 argument_shapes_size = argument_shapes.size();
+  if (program_shape.parameters_size() != argument_shapes_size) {
+    return InvalidArgument("computation takes %d parameters, but %u given",
+                           program_shape.parameters_size(),
+                           argument_shapes.size());
+  }
+  for (int i = 0, end = argument_shapes.size(); i < end; ++i) {
+    // Verify that shape of arguments matches the shape of the arguments in the
+    // ProgramShape.
+    if (!ShapeUtil::Compatible(*argument_shapes[i],
+                               program_shape.parameters(i))) {
+      return InvalidArgument(
+          "Argument does not match shape of computation parameter %d: want "
+          "%s, got %s",
+          i, ShapeUtil::HumanString(program_shape.parameters(i)),
+          ShapeUtil::HumanString(*argument_shapes[i]));
+    }
+    TF_RETURN_IF_ERROR(
+        computation_layout->mutable_parameter_layout(i)->CopyLayoutFromShape(
+            *argument_shapes[i]));
+  }
+  if (execution_options != nullptr &&
+      execution_options->has_shape_with_output_layout()) {
+    const Shape shape_with_output_layout(
+        execution_options->shape_with_output_layout());
+    TF_RETURN_IF_ERROR(
+        ValidateResultShape(shape_with_output_layout, program_shape.result()));
+    TF_RETURN_IF_ERROR(
+        computation_layout->mutable_result_layout()->CopyLayoutFromShape(
+            shape_with_output_layout));
+  } else {
+    // If the result layout is not set, then choose the default.
+    computation_layout->mutable_result_layout()->SetToDefaultLayout();
+  }
+
+  if (execution_options != nullptr) {
+    if (execution_options->num_replicas() > 0) {
+      config->set_replica_count(execution_options->num_replicas());
+    } else {
+      config->set_replica_count(default_num_replicas);
+    }
+    if (execution_options->num_partitions() > 0) {
+      config->set_num_partitions(execution_options->num_partitions());
+    }
+    config->set_use_spmd_partitioning(
+        execution_options->use_spmd_partitioning());
+    config->set_deduplicate_hlo(execution_options->deduplicate_hlo());
+    config->set_seed(execution_options->seed());
+    config->set_launch_id(execution_options->launch_id());
+    config->set_debug_options(execution_options->debug_options());
+  } else {
+    config->set_replica_count(default_num_replicas);
+    config->set_debug_options(GetDebugOptionsFromFlags());
+  }
+
+  if (num_threads.has_value()) {
+    config->set_intra_op_parallelism_threads(*num_threads);
+  }
+
+  if (execution_options != nullptr &&
+      execution_options->has_device_assignment()) {
+    TF_ASSIGN_OR_RETURN(
+        auto device_assignment,
+        DeviceAssignment::Deserialize(execution_options->device_assignment()));
+    config->set_static_device_assignment(*device_assignment);
+  }
+  config->set_alias_passthrough_params(
+      execution_options->alias_passthrough_params());
+
+  if (aot_options != nullptr &&
+      aot_options->fusion_config_collection() != FusionConfigCollection::kOff) {
+    config->set_fusion_config_collection(
+        aot_options->fusion_config_collection());
+    *config->mutable_fusion_config() = aot_options->fusion_config();
+  }
+
+  return std::move(config);
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_module_util.h b/tensorflow/compiler/xla/service/hlo_module_util.h
new file mode 100644
index 00000000000..93d11eae5e6
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_module_util.h
@@ -0,0 +1,44 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_MODULE_UTIL_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_MODULE_UTIL_H_
+
+#include <memory>
+
+#include "absl/types/optional.h"
+#include "absl/types/span.h"
+#include "tensorflow/compiler/xla/service/compiler.h"
+#include "tensorflow/compiler/xla/service/hlo_module_config.h"
+#include "tensorflow/compiler/xla/shape.h"
+#include "tensorflow/compiler/xla/status.h"
+#include "tensorflow/compiler/xla/statusor.h"
+
+namespace xla {
+
+// Creates an HloModuleConfig for a given program shape and arguments.
+// If execution_options does not set num_replicas, default_num_replicas is used.
+// num_threads is optional; if not given, intra_op_parallelism_threads not set.
+// aot_options is optional; if not given a default is used.
+StatusOr<std::unique_ptr<HloModuleConfig>> CreateModuleConfig(
+    const ProgramShape& program_shape,
+    absl::Span<const Shape* const> argument_shapes,
+    const ExecutionOptions* execution_options, int default_num_replicas,
+    absl::optional<int> num_threads = absl::nullopt,
+    const AotCompilationOptions* aot_options = nullptr);
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_MODULE_UTIL_H_
diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc
index ea8c45d3d46..e18767523bb 100644
--- a/tensorflow/compiler/xla/service/local_service.cc
+++ b/tensorflow/compiler/xla/service/local_service.cc
@@ -32,6 +32,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_execution_profile.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_module_config.h"
+#include "tensorflow/compiler/xla/service/hlo_module_util.h"
 #include "tensorflow/compiler/xla/service/platform_util.h"
 #include "tensorflow/compiler/xla/shape_layout.h"
 #include "tensorflow/compiler/xla/shape_util.h"
@@ -94,36 +95,6 @@ absl::optional<const OpMetadata*> ParameterMetadata(
   return absl::nullopt;
 }
 
-ExecutionOptions CreateExecutionOptions(
-    const ExecutableBuildOptions& build_options,
-    const ProgramShape* program_shape) {
-  ExecutionOptions execution_options = CreateDefaultExecutionOptions();
-  if (build_options.has_debug_options()) {
-    *execution_options.mutable_debug_options() = build_options.debug_options();
-  }
-  if (build_options.result_layout() != nullptr) {
-    *execution_options.mutable_shape_with_output_layout() =
-        build_options.result_layout()->ToProto();
-  } else {
-    Shape result_shape(program_shape->result());
-    LayoutUtil::SetToDefaultLayout(&result_shape);
-    *execution_options.mutable_shape_with_output_layout() =
-        result_shape.ToProto();
-  }
-  execution_options.set_num_replicas(build_options.num_replicas());
-  execution_options.set_num_partitions(build_options.num_partitions());
-  execution_options.set_use_spmd_partitioning(
-      build_options.use_spmd_partitioning());
-  execution_options.set_deduplicate_hlo(build_options.deduplicate_hlo());
-  if (build_options.has_device_assignment()) {
-    TF_CHECK_OK(build_options.device_assignment().Serialize(
-        execution_options.mutable_device_assignment()));
-  }
-  execution_options.set_alias_passthrough_params(
-      build_options.alias_passthrough_params());
-  return execution_options;
-}
-
 }  // namespace
 
 StatusOr<std::vector<std::unique_ptr<Executable>>>
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index cf781b4fcdd..d915f5de4b3 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -38,6 +38,7 @@ limitations under the License.
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
 #include "tensorflow/compiler/xla/service/hlo_module.h"
 #include "tensorflow/compiler/xla/service/hlo_module_config.h"
+#include "tensorflow/compiler/xla/service/hlo_module_util.h"
 #include "tensorflow/compiler/xla/service/hlo_proto_util.h"
 #include "tensorflow/compiler/xla/service/platform_util.h"
 #include "tensorflow/compiler/xla/service/source_map_util.h"
@@ -256,88 +257,16 @@ StatusOr<std::unique_ptr<HloModuleConfig>> Service::CreateModuleConfig(
     absl::Span<const Shape* const> argument_shapes,
     const ExecutionOptions* execution_options,
     const AotCompilationOptions* aot_options) {
-  auto config = absl::make_unique<HloModuleConfig>(program_shape);
-  ComputationLayout* computation_layout =
-      config->mutable_entry_computation_layout();
-  const int64 argument_shapes_size = argument_shapes.size();
-  if (program_shape.parameters_size() != argument_shapes_size) {
-    return InvalidArgument("computation takes %d parameters, but %u given",
-                           program_shape.parameters_size(),
-                           argument_shapes.size());
-  }
-  for (int i = 0, end = argument_shapes.size(); i < end; ++i) {
-    // Verify that shape of arguments matches the shape of the arguments in the
-    // ProgramShape.
-    if (!ShapeUtil::Compatible(*argument_shapes[i],
-                               program_shape.parameters(i))) {
-      return InvalidArgument(
-          "Argument does not match shape of computation parameter %d: want "
-          "%s, got %s",
-          i, ShapeUtil::HumanString(program_shape.parameters(i)),
-          ShapeUtil::HumanString(*argument_shapes[i]));
-    }
-    TF_RETURN_IF_ERROR(
-        computation_layout->mutable_parameter_layout(i)->CopyLayoutFromShape(
-            *argument_shapes[i]));
-  }
-  if (execution_options != nullptr &&
-      execution_options->has_shape_with_output_layout()) {
-    const Shape shape_with_output_layout(
-        execution_options->shape_with_output_layout());
-    TF_RETURN_IF_ERROR(
-        ValidateResultShape(shape_with_output_layout, program_shape.result()));
-    TF_RETURN_IF_ERROR(
-        computation_layout->mutable_result_layout()->CopyLayoutFromShape(
-            shape_with_output_layout));
-  } else {
-    // If the result layout is not set, then choose the default.
-    computation_layout->mutable_result_layout()->SetToDefaultLayout();
-  }
-
-  if (execution_options != nullptr) {
-    if (execution_options->num_replicas() > 0) {
-      config->set_replica_count(execution_options->num_replicas());
-    } else {
-      config->set_replica_count(options_.number_of_replicas());
-    }
-    if (execution_options->num_partitions() > 0) {
-      config->set_num_partitions(execution_options->num_partitions());
-    }
-    config->set_use_spmd_partitioning(
-        execution_options->use_spmd_partitioning());
-    config->set_deduplicate_hlo(execution_options->deduplicate_hlo());
-    config->set_seed(execution_options->seed());
-    config->set_launch_id(execution_options->launch_id());
-    config->set_debug_options(execution_options->debug_options());
-  } else {
-    config->set_replica_count(options_.number_of_replicas());
-    config->set_debug_options(GetDebugOptionsFromFlags());
-  }
-
+  int default_num_replicas = options_.number_of_replicas();
+  absl::optional<int> num_threads;
   if (execute_backend_ != nullptr &&
       execute_backend_->eigen_intra_op_thread_pool() != nullptr) {
-    config->set_intra_op_parallelism_threads(
-        execute_backend_->eigen_intra_op_thread_pool()->NumThreads());
+    num_threads = execute_backend_->eigen_intra_op_thread_pool()->NumThreads();
   }
 
-  if (execution_options != nullptr &&
-      execution_options->has_device_assignment()) {
-    TF_ASSIGN_OR_RETURN(
-        auto device_assignment,
-        DeviceAssignment::Deserialize(execution_options->device_assignment()));
-    config->set_static_device_assignment(*device_assignment);
-  }
-  config->set_alias_passthrough_params(
-      execution_options->alias_passthrough_params());
-
-  if (aot_options != nullptr &&
-      aot_options->fusion_config_collection() != FusionConfigCollection::kOff) {
-    config->set_fusion_config_collection(
-        aot_options->fusion_config_collection());
-    *config->mutable_fusion_config() = aot_options->fusion_config();
-  }
-
-  return std::move(config);
+  return xla::CreateModuleConfig(program_shape, argument_shapes,
+                                 execution_options, default_num_replicas,
+                                 num_threads, aot_options);
 }
 
 StatusOr<std::unique_ptr<HloModuleConfig>> Service::CreateModuleConfig(

From bd359e57d8c6a0fe843cdba40d8ec5ecb14cb28a Mon Sep 17 00:00:00 2001
From: Jean-Baptiste Lespiau <jblespiau@google.com>
Date: Mon, 14 Dec 2020 15:43:18 -0800
Subject: [PATCH 60/60] Move the parsing of arguments in the header file.

PiperOrigin-RevId: 347485123
Change-Id: I42ef0d10b20462c406bdc55ca82b365fd44ebdb9
---
 tensorflow/compiler/xla/python/jax_jit.cc | 141 +++++++++-------------
 tensorflow/compiler/xla/python/jax_jit.h  |  28 +++++
 2 files changed, 85 insertions(+), 84 deletions(-)

diff --git a/tensorflow/compiler/xla/python/jax_jit.cc b/tensorflow/compiler/xla/python/jax_jit.cc
index 72bbb57cb0d..c5a4fa53ab7 100644
--- a/tensorflow/compiler/xla/python/jax_jit.cc
+++ b/tensorflow/compiler/xla/python/jax_jit.cc
@@ -176,6 +176,62 @@ H AbslHashValue(H h, const CallSignature& s) {
   return h;
 }
 
+// Filter out static arguments, flatten and concatenate other arguments (i.e.
+// dynamic positional and keyword arguments), filling `arguments` in place.
+void ParseArguments(const py::args& args, const py::kwargs& py_kwargs,
+                    absl::Span<int const> static_argnums,
+                    ParsedArgumentsAsBuffers& arguments) {
+  arguments.flat_dynamic_args.reserve(args.size() + py_kwargs.size() -
+                                      static_argnums.size());
+  arguments.signature.dynamic_positional_args_treedef.reserve(
+      args.size() - static_argnums.size());
+
+  // Positional arguments.
+  for (size_t i = 0; i < args.size(); ++i) {
+    if (std::find(static_argnums.begin(), static_argnums.end(), i) ==
+        static_argnums.end()) {
+      PyTreeDef pytree_def;
+      pytree_def.FlattenInto(args[i], arguments.flat_dynamic_args);
+      arguments.signature.dynamic_positional_args_treedef.push_back(pytree_def);
+    } else {
+      arguments.signature.static_args.emplace_back(
+          // borrow is mandatory here.
+          py::reinterpret_borrow<py::object>(args[i]));
+    }
+  }
+
+  // Keyword arguments.
+  std::vector<std::pair<py::handle, py::handle>> kwargs(py_kwargs.begin(),
+                                                        py_kwargs.end());
+  // We first intern the keys, then sort them (by name, as in the Python path)
+  // (see also PyTreeDef::Flatten) and then create the signatures.
+  // TODO(jblespiau): We should be able to sort the keys by interned-key
+  // pointers, but this requires the Python compilation to do the same.
+  arguments.signature.keyword_args.resize(kwargs.size());
+  for (size_t i = 0; i < kwargs.size(); ++i) {
+    // Intern the key if not already interned.
+    if (!PyUnicode_CHECK_INTERNED(kwargs[i].first.ptr())) {
+      PyObject* key = kwargs[i].first.ptr();
+      kwargs[i].first.inc_ref();
+      PyUnicode_InternInPlace(&key);
+      arguments.keep_alive_objects.push_back(
+          py::reinterpret_steal<py::object>(key));
+      kwargs[i].first = py::handle(key);
+    }
+  }
+
+  std::sort(kwargs.begin(), kwargs.end(),
+            [](const std::pair<py::handle, py::handle>& a,
+               const std::pair<py::handle, py::handle>& b) {
+              return a.first < b.first;
+            });
+  for (size_t i = 0; i < kwargs.size(); ++i) {
+    arguments.signature.keyword_args[i].key = kwargs[i].first;
+    arguments.signature.keyword_args[i].value_treedef.FlattenInto(
+        kwargs[i].second, arguments.flat_dynamic_args);
+  }
+}
+
 namespace {
 const py::dtype* DtypeTo32BitDtype(const py::dtype& dtype) {
   static const auto* int64_dt = new py::dtype("int64");
@@ -501,87 +557,6 @@ CompiledFunction::~CompiledFunction() {
   }
 }
 
-namespace {
-
-// The resulting information of the parsing and conversion of the arguments.
-struct ParsedArgumentsAsBuffers {
-  // The call signature will be filled during 2 steps:
-  // - `FlattenArguments` will fill the static arguments and the pytree
-  //    structures
-  // - the shapes and dtypes are filled later, by `ParseAndTransferArguments`.
-  CallSignature signature;
-  // The concatenation of the dynamic positional arguments and the sorted
-  // keyword arguments. We do not need ownership, thus the py::handle.
-  // TODO(jblespiau): We do not need py::object here and py::handle suffice and
-  // will prevent any counter increment.
-  std::vector<py::object> flat_dynamic_args;
-  std::vector<py::object> keep_alive_objects;
-
-  // The following is only valid if the parsing succeeds.
-  std::vector<xla::PjRtBuffer*> arg_buffers;
-  // We may need to keep these objects around, because:
-  // (a) we need to extend the lifetime of objects created within
-  //    `ConvertArgsToBuffers`
-  // (b) `arg_buffers` do not maintain ownership
-  std::vector<std::unique_ptr<xla::PjRtBuffer>> keep_alive;
-};
-
-// Filter out static arguments, flatten and concatenate other arguments (i.e.
-// dynamic positional and keyword arguments), filling `arguments` in place.
-void FlattenArguments(const py::args& args, const py::kwargs& py_kwargs,
-                      absl::Span<int const> static_argnums,
-                      ParsedArgumentsAsBuffers& arguments) {
-  arguments.flat_dynamic_args.reserve(args.size() + py_kwargs.size() -
-                                      static_argnums.size());
-  arguments.signature.dynamic_positional_args_treedef.reserve(
-      args.size() - static_argnums.size());
-
-  // Positional arguments.
-  for (size_t i = 0; i < args.size(); ++i) {
-    if (std::find(static_argnums.begin(), static_argnums.end(), i) ==
-        static_argnums.end()) {
-      PyTreeDef pytree_def;
-      pytree_def.FlattenInto(args[i], arguments.flat_dynamic_args);
-      arguments.signature.dynamic_positional_args_treedef.push_back(pytree_def);
-    } else {
-      arguments.signature.static_args.emplace_back(
-          // borrow is mandatory here.
-          py::reinterpret_borrow<py::object>(args[i]));
-    }
-  }
-
-  // Keyword arguments.
-  std::vector<std::pair<py::handle, py::handle>> kwargs(py_kwargs.begin(),
-                                                        py_kwargs.end());
-  // We first intern the keys, then sort them (by name, as in the Python path)
-  // (see also PyTreeDef::Flatten) and then create the signatures.
-  // TODO(jblespiau): We should be able to sort the keys by interned-key
-  // pointers, but this requires the Python compilation to do the same.
-  arguments.signature.keyword_args.resize(kwargs.size());
-  for (size_t i = 0; i < kwargs.size(); ++i) {
-    // Intern the key if not already interned.
-    if (!PyUnicode_CHECK_INTERNED(kwargs[i].first.ptr())) {
-      PyObject* key = kwargs[i].first.ptr();
-      kwargs[i].first.inc_ref();
-      PyUnicode_InternInPlace(&key);
-      arguments.keep_alive_objects.push_back(
-          py::reinterpret_steal<py::object>(key));
-      kwargs[i].first = py::handle(key);
-    }
-  }
-
-  std::sort(kwargs.begin(), kwargs.end(),
-            [](const std::pair<py::handle, py::handle>& a,
-               const std::pair<py::handle, py::handle>& b) {
-              return a.first < b.first;
-            });
-  for (size_t i = 0; i < kwargs.size(); ++i) {
-    arguments.signature.keyword_args[i].key = kwargs[i].first;
-    arguments.signature.keyword_args[i].value_treedef.FlattenInto(
-        kwargs[i].second, arguments.flat_dynamic_args);
-  }
-}
-
 // Converts flattened arguments contained in ParsedArgumentsAsBuffers in
 // place. If arguments are `DeviceArray`, they must all be on the same `Device`.
 //
@@ -669,8 +644,6 @@ Status ConvertArgsToBuffers(bool jax_enable_x64, xla::PyClient& pyclient,
   return Status::OK();
 }
 
-}  // namespace
-
 CacheEntry* CompiledFunction::GetCacheEntryIfPresent(
     const CallSignature& signature) {
   auto found_iterator = executables_.find(signature);
@@ -791,7 +764,7 @@ py::object CompiledFunction::Call(py::args args, py::kwargs kwargs) {
     return fun_(*args, **kwargs);
   }
   ParsedArgumentsAsBuffers arguments;
-  FlattenArguments(args, kwargs, static_argnums_, arguments);
+  ParseArguments(args, kwargs, static_argnums_, arguments);
 
   // The C++ jit do not support Tracers arguments inputs yet. The Python-based
   // jit function will be called if any of the dynamic arguments is unsupported.
diff --git a/tensorflow/compiler/xla/python/jax_jit.h b/tensorflow/compiler/xla/python/jax_jit.h
index c61522ff686..11855a668c2 100644
--- a/tensorflow/compiler/xla/python/jax_jit.h
+++ b/tensorflow/compiler/xla/python/jax_jit.h
@@ -109,6 +109,34 @@ H AbslHashValue(H h, const CallSignature::KwargEntry& kw) {
 template <typename H>
 H AbslHashValue(H h, const CallSignature& s);
 
+// The resulting information of the parsing and conversion of the arguments.
+struct ParsedArgumentsAsBuffers {
+  // The call signature will be filled during 2 steps:
+  // - `ParseArguments` will fill the static arguments and the pytree
+  //    structures
+  // - the shapes and dtypes are filled later, by `ParseAndTransferArguments`.
+  CallSignature signature;
+  // The concatenation of the dynamic positional arguments and the sorted
+  // keyword arguments.
+  std::vector<pybind11::object> flat_dynamic_args;
+  std::vector<pybind11::object> keep_alive_objects;
+
+  // The following is only valid if the parsing succeeds.
+  std::vector<xla::PjRtBuffer*> arg_buffers;
+  // We may need to keep these objects around, because:
+  // (a) we need to extend the lifetime of objects created within
+  //    `ConvertArgsToBuffers`
+  // (b) `arg_buffers` do not maintain ownership
+  std::vector<std::unique_ptr<xla::PjRtBuffer>> keep_alive;
+};
+
+// Filter out static arguments, flatten and concatenate other arguments (i.e.
+// dynamic positional and keyword arguments), filling `arguments` in place.
+void ParseArguments(const pybind11::args& args,
+                    const pybind11::kwargs& py_kwargs,
+                    absl::Span<int const> static_argnums,
+                    ParsedArgumentsAsBuffers& arguments);
+
 struct DevicePutResult {
   explicit DevicePutResult(PjRtBuffer* b, bool weak_type)
       : buffer(b), weak_type(weak_type), owned_buffer(nullptr) {}