From 552d6a22f66f6f2c7eb749ac3e79d83a9476b0e2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Wed, 30 Oct 2019 00:56:13 -0700
Subject: [PATCH] Merged commit includes the following changes: 277453541  by
 A. Unique TensorFlower<gardener@tensorflow.org>:

    Fix import path.

--
277445856  by A. Unique TensorFlower<gardener@tensorflow.org>:

    Separate `update_bazel_(linux|macos)` call from `kokoro_init_(linux|macos)` function. Modify all build scripts to run `update_bazel_*` right after `kokoro_init_*` call.

--
277440435  by A. Unique TensorFlower<gardener@tensorflow.org>:
    Automated rollback of changelist 277217247.

277434196  by A. Unique TensorFlower<gardener@tensorflow.org>:

    TFLM: Move runtime tensor initialization upfront and allow tensor_info to be freed.

    This CL will save `sizeof(TensorInfo) * tensors_size` bytes in tensor_arena.

    The cascaded design of SimpleMemoryAllocator provides a save and intuitive abstraction. The Child allocator serves as a temporary allocator. Whatever allocated in the child will be freed once it goes out of scope. And it doesn't not affect allocation in the parent - parent allocator is locked down while child allocator is available.

--
277428110  by A. Unique TensorFlower<gardener@tensorflow.org>:
    Automated rollback of changelist 276610559.

277426570  by A. Unique TensorFlower<gardener@tensorflow.org>:

    Exit gracefully when outside compilation is invoked outside TPUReplicateContext
    scope.

--
277423900  by A. Unique TensorFlower<gardener@tensorflow.org>:

    Tensor tracer: adding ShapeN to list of ops that should not be traced.

--
277422316  by A. Unique TensorFlower<gardener@tensorflow.org>:

    [XLA] Don't try to inline dead instructions.

    Inlining previous instruction could make other call instructions
    dead. Don't inline those dead instructions.

--
277414458  by A. Unique TensorFlower<gardener@tensorflow.org>:

    Op documentation update.
    	update of g3doc/tfl_ops.md
    	update of g3doc/tf_ops.md

--

PiperOrigin-RevId: 277453541
---
 tensorflow/compiler/mlir/g3doc/tf_ops.md      | 3787 ++++++++++++++---
 tensorflow/compiler/mlir/g3doc/tfl_ops.md     | 1720 +++++---
 tensorflow/compiler/mlir/xla/BUILD            |   11 +
 .../compiler/mlir/xla/convert_op_folder.cc    |   84 +
 .../compiler/mlir/xla/convert_op_folder.h     |   31 +
 tensorflow/compiler/mlir/xla/hlo_utils.cc     |   59 +
 tensorflow/compiler/mlir/xla/hlo_utils.h      |    5 +
 tensorflow/compiler/mlir/xla/ir/hlo_ops.cc    |   61 +-
 tensorflow/compiler/mlir/xla/ir/hlo_ops.td    |   14 +
 .../compiler/mlir/xla/ir/hlo_ops_base.td      |   16 +
 .../compiler/mlir/xla/mlir_hlo_to_hlo.cc      |    4 +
 .../compiler/mlir/xla/tests/legalize-tf.mlir  |   12 +
 .../mlir/xla/transforms/legalize_tf.cc        |    1 +
 .../xla/transforms/legalize_tf_patterns.td    |   12 +
 .../compiler/xla/service/call_inliner.cc      |    7 +-
 .../compiler/xla/service/call_inliner_test.cc |   40 +
 .../experimental/micro/micro_allocator.cc     |   44 +-
 .../micro/simple_memory_allocator.cc          |   21 +
 .../micro/simple_memory_allocator.h           |   19 +-
 .../micro/simple_memory_allocator_test.cc     |   28 +
 tensorflow/lite/kernels/add.cc                |   16 +-
 tensorflow/lite/kernels/add_test.cc           |   88 +
 .../internal/optimized/integer_ops/add.h      |   18 +
 .../internal/optimized/integer_ops/mul.h      |   18 +
 .../internal/optimized/optimized_ops.h        |   28 +
 tensorflow/lite/kernels/mul.cc                |    6 +-
 tensorflow/lite/kernels/mul_test.cc           |  111 +
 tensorflow/python/tpu/tensor_tracer.py        |    2 +-
 tensorflow/python/tpu/tpu.py                  |   10 +-
 tensorflow/tools/ci_build/release/common.sh   |    6 +-
 30 files changed, 5084 insertions(+), 1195 deletions(-)
 create mode 100644 tensorflow/compiler/mlir/xla/convert_op_folder.cc
 create mode 100644 tensorflow/compiler/mlir/xla/convert_op_folder.h

diff --git a/tensorflow/compiler/mlir/g3doc/tf_ops.md b/tensorflow/compiler/mlir/g3doc/tf_ops.md
index 8fd22a72a14..c7074c054d8 100644
--- a/tensorflow/compiler/mlir/g3doc/tf_ops.md
+++ b/tensorflow/compiler/mlir/g3doc/tf_ops.md
@@ -1,95 +1,290 @@
 <!-- Autogenerated by mlir-tblgen; don't manually edit -->
-# Operation definition
-## tf.Abs (TF::AbsOp)
+# Dialect 'tf' definition
+
+The TensorFlow dialect.
+
+This dialect maps to TensorFlow operations.
+
+Invariants:
+
+* All values are of Tensor type (in particular, scalars are
+  represented using zero-dimentional tensors);
+
+TODO: Make invariants more structured so that we can reference them in ops.
+
+[TOC]
+
+## Operation definition
+### tf.Abs (TF::AbsOp)
 Computes the absolute value of a tensor.
 
-### Description:
+#### Description:
 
 Given a tensor `x`, this operation returns a tensor containing the absolute
 value of each element in `x`. For example, if x is an input element and y is
 an output element, this operation computes \\(y = |x|\\).
 
-### Operands:
-1. `x`: tensor of floating-point or 32/64-bit integer values
+#### Operands:
+1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `y`: tensor of floating-point or 32/64-bit integer values
+#### Results:
+1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer values
 
-## tf.AddN (TF::AddNOp)
+### tf.AddN (TF::AddNOp)
 Add all input tensors element wise.
 
-### Description:
+#### Description:
 
+Inputs must be of same size and shape.
 
-### Operands:
-1. `inputs`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex128 type or complex64 type or TensorFlow variant type values
+  ```python
+  x = [9, 7, 10]
+  tf.math.add_n(x) ==> 26
+  ```
 
-### Attributes:
+#### Operands:
+1. `inputs`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or TensorFlow uint8 type or TensorFlow variant type values
+
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `N` | `IntegerAttr` | 64-bit integer attribute whose minimal value is 1 attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `sum`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex128 type or complex64 type or TensorFlow variant type values
+#### Results:
+1. `sum`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or TensorFlow uint8 type or TensorFlow variant type values
 
-## tf.Add (TF::AddOp)
+### tf.Add (TF::AddOp)
 Returns x + y element-wise.
 
-### Description:
+#### Description:
 
 *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
-### Operands:
-1. `x`: tensor of number or TensorFlow string type values
-1. `y`: tensor of number or TensorFlow string type values
+#### Operands:
+1. `x`: tensor of floating-point or 8/16/32/64-bit integer or 64/128-bit complex type or TensorFlow uint8 type or TensorFlow string type values
+1. `y`: tensor of floating-point or 8/16/32/64-bit integer or 64/128-bit complex type or TensorFlow uint8 type or TensorFlow string type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `z`: tensor of number or TensorFlow string type values
+#### Results:
+1. `z`: tensor of floating-point or 8/16/32/64-bit integer or 64/128-bit complex type or TensorFlow uint8 type or TensorFlow string type values
 
-## tf.AddV2 (TF::AddV2Op)
+### tf.AddV2 (TF::AddV2Op)
 Returns x + y element-wise.
 
-### Description:
+#### Description:
 
 *NOTE*: `Add` supports broadcasting. `AddN` does not. More about broadcasting
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
-### Operands:
-1. `x`: tensor of number values
-1. `y`: tensor of number values
+#### Operands:
+1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint8 type values
+1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint8 type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `z`: tensor of number values
+#### Results:
+1. `z`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint8 type values
 
-## tf.AvgPool (TF::AvgPoolOp)
+### tf.All (TF::AllOp)
+
+Computes the "logical and" of elements across dimensions of a tensor.
+  
+
+#### Description:
+
+Reduces `input` along the dimensions given in `axis`. Unless
+`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+`axis`. If `keep_dims` is true, the reduced dimensions are
+retained with length 1.
+
+#### Operands:
+1. `input`: tensor of 1-bit integer values
+1. `reduction_indices`: tensor of 32/64-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `keep_dims` | `BoolAttr` | bool attribute attribute |
+| `Tidx` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of 1-bit integer values
+
+### tf.Any (TF::AnyOp)
+
+Computes the "logical or" of elements across dimensions of a tensor.
+  
+
+#### Description:
+
+Reduces `input` along the dimensions given in `axis`. Unless
+`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+`axis`. If `keep_dims` is true, the reduced dimensions are
+retained with length 1.
+
+#### Operands:
+1. `input`: tensor of 1-bit integer values
+1. `reduction_indices`: tensor of 32/64-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `keep_dims` | `BoolAttr` | bool attribute attribute |
+| `Tidx` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of 1-bit integer values
+
+### tf.ArgMax (TF::ArgMaxOp)
+
+Returns the index with the largest value across dimensions of a tensor.
+  
+
+#### Description:
+
+Note that in case of ties the identity of the return value is not guaranteed.
+
+Usage:
+  ```python
+  import tensorflow as tf
+  a = [1, 10, 26.9, 2.8, 166.32, 62.3]
+  b = tf.math.argmax(input = a)
+  c = tf.keras.backend.eval(b)
+  # c = 4
+  # here a[4] = 166.32 which is the largest element of a across axis 0
+  ```
+
+#### Operands:
+1. `input`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or TensorFlow uint8 type values
+1. `dimension`: tensor of 32/64-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+| `Tidx` | `Attribute` | derived attribute attribute |
+| `output_type` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of 32/64-bit integer values
+
+### tf.ArgMin (TF::ArgMinOp)
+
+Returns the index with the smallest value across dimensions of a tensor.
+  
+
+#### Description:
+
+Note that in case of ties the identity of the return value is not guaranteed.
+
+Usage:
+  ```python
+  import tensorflow as tf
+  a = [1, 10, 26.9, 2.8, 166.32, 62.3]
+  b = tf.math.argmin(input = a)
+  c = tf.keras.backend.eval(b)
+  # c = 0
+  # here a[0] = 1 which is the smallest element of a across axis 0
+  ```
+
+#### Operands:
+1. `input`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or TensorFlow uint8 type values
+1. `dimension`: tensor of 32/64-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+| `Tidx` | `Attribute` | derived attribute attribute |
+| `output_type` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of 32/64-bit integer values
+
+### tf.Assert (TF::AssertOp)
+Asserts that the given condition is true.
+
+#### Description:
+
+If `condition` evaluates to false, print the list of tensors in `data`.
+`summarize` determines how many entries of the tensors to print.
+
+#### Operands:
+1. `condition`: tensor of 1-bit integer values
+1. `data`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `summarize` | `IntegerAttr` | 64-bit integer attribute attribute |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+
+### tf.AssignAddVariableOp (TF::AssignAddVariableOp)
+Adds a value to the current value of a variable.
+
+#### Description:
+
+Any ReadVariableOp with a control dependency on this op is guaranteed to
+see the incremented value or a subsequent newer one.
+
+#### Operands:
+1. `resource`: tensor of TensorFlow resource type values
+1. `value`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `dtype` | `Attribute` | derived attribute attribute |
+
+#### Results:
+
+### tf.AssignVariableOp (TF::AssignVariableOp)
+Assigns a new value to a variable.
+
+#### Description:
+
+Any ReadVariableOp with a control dependency on this op is guaranteed to return
+this value or a subsequent newer value of the variable.
+
+#### Operands:
+1. `resource`: tensor of TensorFlow resource type values
+1. `value`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `dtype` | `Attribute` | derived attribute attribute |
+
+#### Results:
+
+### tf.AvgPool (TF::AvgPoolOp)
 Performs average pooling on the input.
 
-### Description:
+#### Description:
 
 Each entry in `output` is the mean of the corresponding size `ksize`
 window in `value`.
 
-### Operands:
+#### Operands:
 1. `value`: tensor of floating-point values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `ksize` | `ArrayAttr` | 64-bit integer array attribute with at least 4 elements attribute |
@@ -98,13 +293,93 @@ window in `value`.
 | `data_format` | `StringAttr` | 'NHWC' or 'NCHW' convnet data format attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of floating-point values
 
-## tf.BatchToSpaceND (TF::BatchToSpaceNDOp)
+### tf.BatchMatMul (TF::BatchMatMulOp)
+Multiplies slices of two tensors in batches.
+
+#### Description:
+
+Multiplies all slices of `Tensor` `x` and `y` (each slice can be
+viewed as an element of a batch), and arranges the individual results
+in a single output tensor of the same batch size. Each of the
+individual slices can optionally be adjointed (to adjoint a matrix
+means to transpose and conjugate it) before multiplication by setting
+the `adj_x` or `adj_y` flag to `True`, which are by default `False`.
+
+The input tensors `x` and `y` are 2-D or higher with shape `[..., r_x, c_x]`
+and `[..., r_y, c_y]`.
+
+The output tensor is 2-D or higher with shape `[..., r_o, c_o]`, where:
+
+    r_o = c_x if adj_x else r_x
+    c_o = r_y if adj_y else c_y
+
+It is computed as:
+
+    output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :])
+
+#### Operands:
+1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
+1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `adj_x` | `BoolAttr` | bool attribute attribute |
+| `adj_y` | `BoolAttr` | bool attribute attribute |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
+
+### tf.BatchMatMulV2 (TF::BatchMatMulV2Op)
+Multiplies slices of two tensors in batches.
+
+#### Description:
+
+Multiplies all slices of `Tensor` `x` and `y` (each slice can be
+viewed as an element of a batch), and arranges the individual results
+in a single output tensor of the same batch size. Each of the
+individual slices can optionally be adjointed (to adjoint a matrix
+means to transpose and conjugate it) before multiplication by setting
+the `adj_x` or `adj_y` flag to `True`, which are by default `False`.
+
+The input tensors `x` and `y` are 2-D or higher with shape `[..., r_x, c_x]`
+and `[..., r_y, c_y]`.
+
+The output tensor is 2-D or higher with shape `[..., r_o, c_o]`, where:
+
+    r_o = c_x if adj_x else r_x
+    c_o = r_y if adj_y else c_y
+
+It is computed as:
+
+    output[..., :, :] = matrix(x[..., :, :]) * matrix(y[..., :, :])
+
+*NOTE*: `BatchMatMulV2` supports broadcasting in the batch dimensions. More
+about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html).
+
+#### Operands:
+1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
+1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `adj_x` | `BoolAttr` | bool attribute attribute |
+| `adj_y` | `BoolAttr` | bool attribute attribute |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
+
+### tf.BatchToSpaceND (TF::BatchToSpaceNDOp)
 BatchToSpace for N-D tensors of type T.
 
-### Description:
+#### Description:
 
 This operation reshapes the "batch" dimension 0 into `M + 1` dimensions of shape
 `block_shape + [batch]`, interleaves these blocks back into the grid defined by
@@ -113,48 +388,71 @@ the input.  The spatial dimensions of this intermediate result are then
 optionally cropped according to `crops` to produce the output.  This is the
 reverse of SpaceToBatch.  See below for a precise description.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of tf.dtype values
 1. `block_shape`: tensor of 32/64-bit integer values
 1. `crops`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 | `Tcrops` | `Attribute` | derived attribute attribute |
 | `Tblock_shape` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.BiasAdd (TF::BiasAddOp)
-Adds `bias` to `value`.
+### tf.BiasAddGrad (TF::BiasAddGradOp)
 
-### Description:
+The backward operation for "BiasAdd" on the "bias" tensor.
+  
 
-This is a special case of `tf.add` where `bias` is restricted to be 1-D.
-Broadcasting is supported, so `value` may have any number of dimensions.
+#### Description:
 
-### Operands:
-1. `value`: tensor of number values
-1. `bias`: tensor of number values
+It accumulates all the values from out_backprop into the feature dimension.
+For NHWC data format, the feature dimension is the last. For NCHW data format,
+the feature dimension is the third-to-last.
 
-### Attributes:
+#### Operands:
+1. `out_backprop`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or TensorFlow uint8 type values
+
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `data_format` | `StringAttr` | 'NHWC' or 'NCHW' convnet data format attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `output`: tensor of number values
+#### Results:
+1. `output`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or TensorFlow uint8 type values
 
-## tf.Bitcast (TF::BitcastOp)
+### tf.BiasAdd (TF::BiasAddOp)
+Adds `bias` to `value`.
+
+#### Description:
+
+This is a special case of `tf.add` where `bias` is restricted to be 1-D.
+Broadcasting is supported, so `value` may have any number of dimensions.
+
+#### Operands:
+1. `value`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or TensorFlow uint8 type values
+1. `bias`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or TensorFlow uint8 type values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `data_format` | `StringAttr` | 'NHWC' or 'NCHW' convnet data format attribute |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or TensorFlow uint8 type values
+
+### tf.Bitcast (TF::BitcastOp)
 
 Bitcasts a tensor from one type to another without copying data.
   
 
-### Description:
+#### Description:
 
 Given a tensor `input`, this operation returns a tensor that has the same buffer
 data as `input` with datatype `type`.
@@ -173,25 +471,22 @@ For example,
 
 Example 1:
 
-```python
 >>> a = [1., 2., 3.]
->>> equality_bitcast = tf.bitcast(a,tf.complex128)
-tensorflow.python.framework.errors_impl.InvalidArgumentError: Cannot bitcast from float to complex128: shape [3] [Op:Bitcast]
->>> equality_cast = tf.cast(a,tf.complex128)
+>>> equality_bitcast = tf.bitcast(a, tf.complex128)
+Traceback (most recent call last):
+...
+InvalidArgumentError: Cannot bitcast from 1 to 18 [Op:Bitcast]
+>>> equality_cast = tf.cast(a, tf.complex128)
 >>> print(equality_cast)
 tf.Tensor([1.+0.j 2.+0.j 3.+0.j], shape=(3,), dtype=complex128)
-```
 
 Example 2:
 
-```python
 >>> tf.bitcast(tf.constant(0xffffffff, dtype=tf.uint32), tf.uint8)
-<tf.Tensor: ... shape=(4,), dtype=uint8, numpy=array([255, 255, 255, 255], dtype=uint8)>
-```
+<tf.Tensor: shape=(4,), dtype=uint8, numpy=array([255, 255, 255, 255], dtype=uint8)>
 
 Example 3:
 
-```python
 >>> x = [1., 2., 3.]
 >>> y = [0., 2., 3.]
 >>> equality= tf.equal(x,y)
@@ -203,30 +498,51 @@ tf.Tensor([False True True], shape=(3,), dtype=bool)
 tf.Tensor([0. 1. 1.], shape=(3,), dtype=float32)
 >>> print(equality_bitcast)
 tf.Tensor(
-[[ 0 0 0 0]
- [ 0 0 128 63]
- [ 0 0 128 63]], shape=(3, 4), dtype=uint8)
-```
+    [[  0   0   0   0]
+     [  0   0 128  63]
+     [  0   0 128  63]], shape=(3, 4), dtype=uint8)
 
 *NOTE*: Bitcast is implemented as a low-level cast, so machines with different
 endian orderings will give different results.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of number values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 | `type` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of number values
 
-## tf.BroadcastTo (TF::BroadcastToOp)
+### tf.BroadcastGradientArgs (TF::BroadcastGradientArgsOp)
+
+Return the reduction indices for computing gradients of s0 op s1 with broadcast.
+  
+
+#### Description:
+
+This is typically used by gradient computations for a broadcasting operation.
+
+#### Operands:
+1. `s0`: tensor of 32/64-bit integer values
+1. `s1`: tensor of 32/64-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `r0`: tensor of 32/64-bit integer values
+1. `r1`: tensor of 32/64-bit integer values
+
+### tf.BroadcastTo (TF::BroadcastToOp)
 Broadcast an array for a compatible shape.
 
-### Description:
+#### Description:
 
 Broadcasting is the process of making arrays to have compatible shapes
 for arithmetic operations. Two shapes are compatible if for each
@@ -236,110 +552,184 @@ and works its way forward.
 
 For example,
 
-```python
 >>> x = tf.constant([1, 2, 3])
 >>> y = tf.broadcast_to(x, [3, 3])
->>> sess.run(y)
-array([[1, 2, 3],
-       [1, 2, 3],
-       [1, 2, 3]], dtype=int32)
-```
+>>> print(y)
+tf.Tensor(
+    [[1 2 3]
+     [1 2 3]
+     [1 2 3]], shape=(3, 3), dtype=int32)
 
 In the above example, the input Tensor with the shape of `[1, 3]`
 is broadcasted to output Tensor with shape of `[3, 3]`.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of tf.dtype values
 1. `shape`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 | `Tidx` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.Cast (TF::CastOp)
+### tf.Cast (TF::CastOp)
 Cast x of type SrcT to y of DstT.
 
-### Description:
+#### Description:
 
 
-### Operands:
+#### Operands:
 1. `x`: tensor of tf.dtype values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `Truncate` | `BoolAttr` | bool attribute attribute |
 | `SrcT` | `Attribute` | derived attribute attribute |
 | `DstT` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `y`: tensor of tf.dtype values
 
-## tf.Ceil (TF::CeilOp)
+### tf.Ceil (TF::CeilOp)
 Returns element-wise smallest integer not less than x.
 
-### Description:
+#### Description:
 
 
-### Operands:
+#### Operands:
 1. `x`: tensor of floating-point values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `y`: tensor of floating-point values
 
-## tf.Concat (TF::ConcatOp)
+### tf.CheckNumerics (TF::CheckNumericsOp)
+Checks a tensor for NaN and Inf values.
+
+#### Description:
+
+When run, reports an `InvalidArgument` error if `tensor` has any values
+that are not a number (NaN) or infinity (Inf). Otherwise, passes `tensor` as-is.
+
+#### Operands:
+1. `tensor`: tensor of floating-point values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `message` | `StringAttr` | string attribute attribute |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of floating-point values
+
+### tf.ComplexAbs (TF::ComplexAbsOp)
+Computes the complex absolute value of a tensor.
+
+#### Description:
+
+Given a tensor `x` of complex numbers, this operation returns a tensor of type
+`float` or `double` that is the absolute value of each element in `x`. All
+elements in `x` must be complex numbers of the form \\(a + bj\\). The absolute
+value is computed as \\( \sqrt{a^2 + b^2}\\).
+
+#### Operands:
+1. `x`: tensor of complex type with 64-bit float elements or complex type with 32-bit float elements values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+| `Tout` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `y`: tensor of 32/64-bit float values
+
+### tf.Complex (TF::ComplexOp)
+Converts two real numbers to a complex number.
+
+#### Description:
+
+Given a tensor `real` representing the real part of a complex number, and a
+tensor `imag` representing the imaginary part of a complex number, this
+operation returns complex numbers elementwise of the form \\(a + bj\\), where
+*a* represents the `real` part and *b* represents the `imag` part.
+
+The input tensors `real` and `imag` must have the same shape.
+
+For example:
+
+```
+# tensor 'real' is [2.25, 3.25]
+# tensor `imag` is [4.75, 5.75]
+tf.complex(real, imag) ==> [[2.25 + 4.75j], [3.25 + 5.75j]]
+```
+
+#### Operands:
+1. `real`: tensor of 32/64-bit float values
+1. `imag`: tensor of 32/64-bit float values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+| `Tout` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `out`: tensor of complex type with 64-bit float elements or complex type with 32-bit float elements values
+
+### tf.Concat (TF::ConcatOp)
 Concatenates tensors along one dimension.
 
-### Description:
+#### Description:
 
 
-### Operands:
+#### Operands:
 1. `concat_dim`: tensor of 32-bit integer values
 1. `values`: tensor of tf.dtype values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `N` | `IntegerAttr` | 64-bit integer attribute whose minimal value is 2 attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.ConcatV2 (TF::ConcatV2Op)
+### tf.ConcatV2 (TF::ConcatV2Op)
 Concatenates tensors along one dimension.
 
-### Description:
+#### Description:
 
 
-### Operands:
+#### Operands:
 1. `values`: tensor of tf.dtype values
 1. `axis`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `N` | `IntegerAttr` | 64-bit integer attribute whose minimal value is 2 attribute |
 | `T` | `Attribute` | derived attribute attribute |
 | `Tidx` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.Conj (TF::ConjOp)
+### tf.Conj (TF::ConjOp)
 Returns the complex conjugate of a complex number.
 
-### Description:
+#### Description:
 
 Given a tensor `input` of complex numbers, this operation returns a tensor of
 complex numbers that are the complex conjugate of each element in `input`. The
@@ -355,40 +745,94 @@ For example:
 tf.conj(input) ==> [-2.25 - 4.75j, 3.25 - 5.75j]
 ```
 
-### Operands:
-1. `input`: tensor of complex128 type or complex64 type or TensorFlow variant type values
+#### Operands:
+1. `input`: tensor of complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow variant type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `output`: tensor of complex128 type or complex64 type or TensorFlow variant type values
+#### Results:
+1. `output`: tensor of complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow variant type values
 
-## tf.Const (TF::ConstOp)
+### tf.Const (TF::ConstOp)
 Constant tensor op
 
-### Description:
+#### Description:
 
 
-### Operands:
+#### Operands:
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `value` | `ElementsAttr` | constant vector/tensor attribute attribute |
 | `dtype` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.Conv2D (TF::Conv2DOp)
+### tf.Conv2DBackpropFilter (TF::Conv2DBackpropFilterOp)
+
+Computes the gradients of convolution with respect to the filter.
+  
+
+#### Description:
+
+
+#### Operands:
+1. `input`: tensor of floating-point values
+1. `filter_sizes`: tensor of 32-bit integer values
+1. `out_backprop`: tensor of floating-point values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `strides` | `ArrayAttr` | 64-bit integer array attribute attribute |
+| `use_cudnn_on_gpu` | `BoolAttr` | bool attribute attribute |
+| `padding` | `StringAttr` | string attribute whose value is SAME, or VALID, or EXPLICIT attribute |
+| `explicit_paddings` | `ArrayAttr` | 64-bit integer array attribute attribute |
+| `data_format` | `StringAttr` | 'NHWC' or 'NCHW' convnet data format attribute |
+| `dilations` | `ArrayAttr` | 64-bit integer array attribute attribute |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of floating-point values
+
+### tf.Conv2DBackpropInput (TF::Conv2DBackpropInputOp)
+
+Computes the gradients of convolution with respect to the input.
+  
+
+#### Description:
+
+
+#### Operands:
+1. `input_sizes`: tensor of 32-bit integer values
+1. `filter`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer values
+1. `out_backprop`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `strides` | `ArrayAttr` | 64-bit integer array attribute attribute |
+| `use_cudnn_on_gpu` | `BoolAttr` | bool attribute attribute |
+| `padding` | `StringAttr` | string attribute whose value is SAME, or VALID, or EXPLICIT attribute |
+| `explicit_paddings` | `ArrayAttr` | 64-bit integer array attribute attribute |
+| `data_format` | `StringAttr` | 'NHWC' or 'NCHW' convnet data format attribute |
+| `dilations` | `ArrayAttr` | 64-bit integer array attribute attribute |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer values
+
+### tf.Conv2D (TF::Conv2DOp)
 
 Computes a 2-D convolution given 4-D `input` and `filter` tensors.
   
 
-### Description:
+#### Description:
 
 Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
 and a filter / kernel tensor of shape
@@ -412,11 +856,11 @@ In detail, with the default NHWC format,
 Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
 horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
 
-### Operands:
-1. `input`: tensor of floating-point values
-1. `filter`: tensor of floating-point values
+#### Operands:
+1. `input`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer values
+1. `filter`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `strides` | `ArrayAttr` | 64-bit integer array attribute attribute |
@@ -427,32 +871,202 @@ horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
 | `dilations` | `ArrayAttr` | 64-bit integer array attribute attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
+1. `output`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer values
+
+### tf.Conv3D (TF::Conv3DOp)
+
+Computes a 3-D convolution given 5-D `input` and `filter` tensors.
+  
+
+#### Description:
+
+In signal processing, cross-correlation is a measure of similarity of
+two waveforms as a function of a time-lag applied to one of them. This
+is also known as a sliding dot product or sliding inner-product.
+
+Our Conv3D implements a form of cross-correlation.
+
+#### Operands:
+1. `input`: tensor of floating-point values
+1. `filter`: tensor of floating-point values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `strides` | `ArrayAttr` | 64-bit integer array attribute with at least 5 elements attribute |
+| `padding` | `StringAttr` | string attribute whose value is SAME, or VALID attribute |
+| `data_format` | `StringAttr` | string attribute whose value is NDHWC, or NCDHW attribute |
+| `dilations` | `ArrayAttr` | 64-bit integer array attribute attribute |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
 1. `output`: tensor of floating-point values
 
-## tf.Cos (TF::CosOp)
+### tf.Cos (TF::CosOp)
 Computes cos of x element-wise.
 
-### Description:
+#### Description:
 
+Given an input tensor, this function computes cosine of every
+  element in the tensor. Input range is `(-inf, inf)` and
+  output range is `[-1,1]`. If input lies outside the boundary, `nan`
+  is returned.
 
-### Operands:
+  ```python
+  x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 200, 10000, float("inf")])
+  tf.math.cos(x) ==> [nan -0.91113025 0.87758255 0.5403023 0.36235774 0.48718765 -0.95215535 nan]
+  ```
+
+#### Operands:
 1. `x`: tensor of floating-point or 64/128-bit complex type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `y`: tensor of floating-point or 64/128-bit complex type values
 
-## tf.DepthwiseConv2dNative (TF::DepthwiseConv2dNativeOp)
+### tf.CrossReplicaSum (TF::CrossReplicaSumOp)
+An Op to sum inputs across replicated TPU instances.
+
+#### Description:
+
+Each instance supplies its own input.
+
+For example, suppose there are 8 TPU instances: `[A, B, C, D, E, F, G, H]`.
+Passing group_assignment=`[[0,2,4,6],[1,3,5,7]]` sets `A, C, E, G` as group 0,
+and `B, D, F, H` as group 1. Thus we get the outputs:
+`[A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H]`.
+
+#### Operands:
+1. `input`: tensor of bfloat16 type or 32-bit float or 32-bit integer or TensorFlow uint32 type values
+1. `group_assignment`: tensor of 32-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of bfloat16 type or 32-bit float or 32-bit integer or TensorFlow uint32 type values
+
+### tf.DepthToSpace (TF::DepthToSpaceOp)
+DepthToSpace for tensors of type T.
+
+#### Description:
+
+Rearranges data from depth into blocks of spatial data.
+This is the reverse transformation of SpaceToDepth. More specifically,
+this op outputs a copy of the input tensor where values from the `depth`
+dimension are moved in spatial blocks to the `height` and `width` dimensions.
+The attr `block_size` indicates the input block size and how the data is moved.
+
+  * Chunks of data of size `block_size * block_size` from depth are rearranged
+    into non-overlapping blocks of size `block_size x block_size`
+  * The width the output tensor is `input_depth * block_size`, whereas the
+    height is `input_height * block_size`.
+  * The Y, X coordinates within each block of the output image are determined
+    by the high order component of the input channel index.
+  * The depth of the input tensor must be divisible by
+    `block_size * block_size`.
+
+The `data_format` attr specifies the layout of the input and output tensors
+with the following options:
+  "NHWC": `[ batch, height, width, channels ]`
+  "NCHW": `[ batch, channels, height, width ]`
+  "NCHW_VECT_C":
+      `qint8 [ batch, channels / 4, height, width, 4 ]`
+
+It is useful to consider the operation as transforming a 6-D Tensor.
+e.g. for data_format = NHWC,
+     Each element in the input tensor can be specified via 6 coordinates,
+     ordered by decreasing memory layout significance as:
+     n,iY,iX,bY,bX,oC  (where n=batch index, iX, iY means X or Y coordinates
+                        within the input image, bX, bY means coordinates
+                        within the output block, oC means output channels).
+     The output would be the input transposed to the following layout:
+     n,iY,bY,iX,bX,oC
+
+This operation is useful for resizing the activations between convolutions
+(but keeping all data), e.g. instead of pooling. It is also useful for training
+purely convolutional models.
+
+For example, given an input of shape `[1, 1, 1, 4]`, data_format = "NHWC" and
+block_size = 2:
+
+```
+x = [[[[1, 2, 3, 4]]]]
+
+```
+
+This operation will output a tensor of shape `[1, 2, 2, 1]`:
+
+```
+   [[[[1], [2]],
+     [[3], [4]]]]
+```
+
+Here, the input has a batch of 1 and each batch element has shape `[1, 1, 4]`,
+the corresponding output will have 2x2 elements and will have a depth of
+1 channel (1 = `4 / (block_size * block_size)`).
+The output element shape is `[2, 2, 1]`.
+
+For an input tensor with larger depth, here of shape `[1, 1, 1, 12]`, e.g.
+
+```
+x = [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]]
+```
+
+This operation, for block size of 2, will return the following tensor of shape
+`[1, 2, 2, 3]`
+
+```
+   [[[[1, 2, 3], [4, 5, 6]],
+     [[7, 8, 9], [10, 11, 12]]]]
+
+```
+
+Similarly, for the following input of shape `[1 2 2 4]`, and a block size of 2:
+
+```
+x =  [[[[1, 2, 3, 4],
+       [5, 6, 7, 8]],
+      [[9, 10, 11, 12],
+       [13, 14, 15, 16]]]]
+```
+
+the operator will return the following tensor of shape `[1 4 4 1]`:
+
+```
+x = [[[ [1],   [2],  [5],  [6]],
+      [ [3],   [4],  [7],  [8]],
+      [ [9],  [10], [13],  [14]],
+      [ [11], [12], [15],  [16]]]]
+
+```
+
+#### Operands:
+1. `input`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `block_size` | `IntegerAttr` | 64-bit integer attribute whose minimal value is 2 attribute |
+| `data_format` | `StringAttr` | string attribute whose value is NHWC, or NCHW, or NCHW_VECT_C attribute |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of tf.dtype values
+
+### tf.DepthwiseConv2dNative (TF::DepthwiseConv2dNativeOp)
 
 Computes a 2-D depthwise convolution given 4-D `input` and `filter` tensors.
   
 
-### Description:
+#### Description:
 
 Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
 and a filter / kernel tensor of shape
@@ -473,11 +1087,11 @@ for k in 0..in_channels-1
 Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
 horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of floating-point values
 1. `filter`: tensor of floating-point values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `strides` | `ArrayAttr` | 64-bit integer array attribute attribute |
@@ -486,54 +1100,79 @@ horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
 | `dilations` | `ArrayAttr` | 64-bit integer array attribute attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of floating-point values
 
-## tf.Div (TF::DivOp)
+### tf.Div (TF::DivOp)
 Returns x / y element-wise.
 
-### Description:
+#### Description:
 
 *NOTE*: `Div` supports broadcasting. More about broadcasting
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
-### Operands:
-1. `x`: tensor of number values
-1. `y`: tensor of number values
+#### Operands:
+1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint16 type or TensorFlow uint8 type values
+1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint16 type or TensorFlow uint8 type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `z`: tensor of number values
+#### Results:
+1. `z`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint16 type or TensorFlow uint8 type values
 
-## tf.Elu (TF::EluOp)
+### tf.Elu (TF::EluOp)
 
 Computes exponential linear: `exp(features) - 1` if < 0, `features` otherwise.
   
 
-### Description:
+#### Description:
 
 See [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)
 ](http://arxiv.org/abs/1511.07289)
 
-### Operands:
+#### Operands:
 1. `features`: tensor of floating-point values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `activations`: tensor of floating-point values
 
-## tf.Equal (TF::EqualOp)
+### tf.EmptyTensorList (TF::EmptyTensorListOp)
+Creates and returns an empty tensor list.
+
+#### Description:
+
+All list elements must be tensors of dtype element_dtype and shape compatible
+with element_shape.
+
+handle: an empty tensor list.
+element_dtype: the type of elements in the list.
+element_shape: a shape compatible with that of elements in the list.
+
+#### Operands:
+1. `element_shape`: tensor of 32/64-bit integer values
+1. `max_num_elements`: tensor of 32-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `shape_type` | `Attribute` | derived attribute attribute |
+| `element_dtype` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `handle`: tensor of TensorFlow variant type values
+
+### tf.Equal (TF::EqualOp)
 Returns the truth value of (x == y) element-wise.
 
-### Description:
+#### Description:
 
 *NOTE*: `Equal` supports broadcasting. More about broadcasting
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
@@ -548,22 +1187,68 @@ y = tf.constant([2, 4])
 tf.math.equal(x, y) ==> array([True,  True])
 ```
 
-### Operands:
-1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 1-bit integer or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex128 type or complex64 type or TensorFlow string type values
-1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 1-bit integer or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex128 type or complex64 type or TensorFlow string type values
+#### Operands:
+1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 1-bit integer or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow string type or TensorFlow uint8 type values
+1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 1-bit integer or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow string type or TensorFlow uint8 type values
 
-### Attributes:
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `incompatible_shape_error` | `BoolAttr` | bool attribute attribute |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `z`: tensor of 1-bit integer values
+
+### tf.Exp (TF::ExpOp)
+
+Computes exponential of x element-wise.  \\(y = e^x\\).
+  
+
+#### Description:
+
+This function computes the exponential of every element in the input tensor.
+  i.e. `exp(x)` or `e^(x)`, where `x` is the input tensor.
+  `e` denotes Euler's number and is approximately equal to 2.718281.
+  Output is positive for any real input.
+
+  ```python
+  x = tf.constant(2.0)
+  tf.math.exp(x) ==> 7.389056
+
+  x = tf.constant([2.0, 8.0])
+  tf.math.exp(x) ==> array([7.389056, 2980.958], dtype=float32)
+  ```
+
+  For complex numbers, the exponential value is calculated as follows:
+
+  ```
+  e^(x+iy) = e^x * e^iy = e^x * (cos y + i sin y)
+  ```
+
+  Let's consider complex number 1+1j as an example.
+  e^1 * (cos 1 + i sin 1) = 2.7182818284590 * (0.54030230586+0.8414709848j)
+
+  ```python
+  x = tf.constant(1 + 1j)
+  tf.math.exp(x) ==> 1.4686939399158851+2.2873552871788423j
+  ```
+
+#### Operands:
+1. `x`: tensor of floating-point or 64/128-bit complex type values
+
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `z`: tensor of 1-bit integer values
+#### Results:
+1. `y`: tensor of floating-point or 64/128-bit complex type values
 
-## tf.ExpandDims (TF::ExpandDimsOp)
+### tf.ExpandDims (TF::ExpandDimsOp)
 Inserts a dimension of 1 into a tensor's shape.
 
-### Description:
+#### Description:
 
 Given a tensor `input`, this operation inserts a dimension of 1 at the
 dimension index `axis` of `input`'s shape. The dimension index `axis` starts at
@@ -596,25 +1281,25 @@ This operation requires that:
 This operation is related to `squeeze()`, which removes dimensions of
 size 1.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of tf.dtype values
 1. `dim`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 | `Tdim` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.FakeQuantWithMinMaxArgs (TF::FakeQuantWithMinMaxArgsOp)
+### tf.FakeQuantWithMinMaxArgs (TF::FakeQuantWithMinMaxArgsOp)
 
 Fake-quantize the 'inputs' tensor, type float to 'outputs' tensor of same type.
   
 
-### Description:
+#### Description:
 
 Attributes `[min; max]` define the clamping range for the `inputs` data.
 `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
@@ -633,10 +1318,10 @@ If `min <= 0 <= max`: `scale = (max - min) / (2^num_bits - 1) `,
 
 Quantization is called fake since the output is still in floating point.
 
-### Operands:
+#### Operands:
 1. `inputs`: tensor of 32-bit float values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `min` | `FloatAttr` | 32-bit float attribute attribute |
@@ -644,15 +1329,15 @@ Quantization is called fake since the output is still in floating point.
 | `num_bits` | `IntegerAttr` | 64-bit integer attribute attribute |
 | `narrow_range` | `BoolAttr` | bool attribute attribute |
 
-### Results:
+#### Results:
 1. `outputs`: tensor of 32-bit float values
 
-## tf.FakeQuantWithMinMaxVars (TF::FakeQuantWithMinMaxVarsOp)
+### tf.FakeQuantWithMinMaxVars (TF::FakeQuantWithMinMaxVarsOp)
 
 Fake-quantize the 'inputs' tensor of type float via global float scalars `min`
   
 
-### Description:
+#### Description:
 
 and `max` to 'outputs' tensor of same shape as `inputs`.
 
@@ -674,24 +1359,66 @@ If `min <= 0 <= max`: `scale = (max - min) / (2^num_bits - 1) `,
 This operation has a gradient and thus allows for training `min` and `max`
 values.
 
-### Operands:
+#### Operands:
 1. `inputs`: tensor of 32-bit float values
 1. `min`: tensor of 32-bit float values
 1. `max`: tensor of 32-bit float values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `num_bits` | `IntegerAttr` | 64-bit integer attribute attribute |
 | `narrow_range` | `BoolAttr` | bool attribute attribute |
 
-### Results:
+#### Results:
 1. `outputs`: tensor of 32-bit float values
 
-## tf.Fill (TF::FillOp)
+### tf.FakeQuantWithMinMaxVarsPerChannel (TF::FakeQuantWithMinMaxVarsPerChannelOp)
+
+Fake-quantize the 'inputs' tensor of type float and one of the shapes: `[d]`,
+  
+
+#### Description:
+
+`[b, d]` `[b, h, w, d]` via per-channel floats `min` and `max` of shape `[d]`
+to 'outputs' tensor of same shape as `inputs`.
+
+`[min; max]` define the clamping range for the `inputs` data.
+`inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]`
+when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and
+then de-quantized and output as floats in `[min; max]` interval.
+`num_bits` is the bitwidth of the quantization; between 2 and 16, inclusive.
+
+Before quantization, `min` and `max` values are adjusted with the following
+logic.
+It is suggested to have `min <= 0 <= max`. If `0` is not in the range of values,
+the behavior can be unexpected:
+If `0 < min < max`: `min_adj = 0` and `max_adj = max - min`.
+If `min < max < 0`: `min_adj = min - max` and `max_adj = 0`.
+If `min <= 0 <= max`: `scale = (max - min) / (2^num_bits - 1) `,
+`min_adj = scale * round(min / scale)` and `max_adj = max + min_adj - min`.
+
+This operation has a gradient and thus allows for training `min` and `max`
+values.
+
+#### Operands:
+1. `inputs`: tensor of 32-bit float values
+1. `min`: tensor of 32-bit float values
+1. `max`: tensor of 32-bit float values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `num_bits` | `IntegerAttr` | 64-bit integer attribute attribute |
+| `narrow_range` | `BoolAttr` | bool attribute attribute |
+
+#### Results:
+1. `outputs`: tensor of 32-bit float values
+
+### tf.Fill (TF::FillOp)
 Creates a tensor filled with a scalar value.
 
-### Description:
+#### Description:
 
 This operation creates a tensor of shape `dims` and fills it with `value`.
 
@@ -713,72 +1440,97 @@ fill([2, 3], 9) ==> [[9, 9, 9]
 *   Because `tf.fill` evaluates at graph runtime, it supports dynamic shapes
     based on other runtime Tensors, unlike `tf.constant`.
 
-### Operands:
+#### Operands:
 1. `dims`: tensor of 32/64-bit integer values
 1. `value`: tensor of tf.dtype values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 | `index_type` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.FloorDiv (TF::FloorDivOp)
+### tf.FloorDiv (TF::FloorDivOp)
 Returns x // y element-wise.
 
-### Description:
+#### Description:
 
 *NOTE*: `FloorDiv` supports broadcasting. More about broadcasting
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
-### Operands:
-1. `x`: tensor of number values
-1. `y`: tensor of number values
+#### Operands:
+1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint16 type or TensorFlow uint8 type values
+1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint16 type or TensorFlow uint8 type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `z`: tensor of number values
+#### Results:
+1. `z`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint16 type or TensorFlow uint8 type values
 
-## tf.Floor (TF::FloorOp)
+### tf.FloorMod (TF::FloorModOp)
+
+Returns element-wise remainder of division. When `x < 0` xor `y < 0` is
+  
+
+#### Description:
+
+true, this follows Python semantics in that the result here is consistent
+with a flooring divide. E.g. `floor(x / y) * y + mod(x, y) = x`.
+
+*NOTE*: `FloorMod` supports broadcasting. More about broadcasting
+[here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
+
+#### Operands:
+1. `x`: tensor of floating-point or 32/64-bit integer values
+1. `y`: tensor of floating-point or 32/64-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `z`: tensor of floating-point or 32/64-bit integer values
+
+### tf.Floor (TF::FloorOp)
 Returns element-wise largest integer not greater than x.
 
-### Description:
+#### Description:
 
 
-### Operands:
+#### Operands:
 1. `x`: tensor of floating-point values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `y`: tensor of floating-point values
 
-## tf.FusedBatchNorm (TF::FusedBatchNormOp)
+### tf.FusedBatchNorm (TF::FusedBatchNormOp)
 Batch normalization.
 
-### Description:
+#### Description:
 
 Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
 The size of 1D Tensors matches the dimension C of the 4D Tensors.
 
-### Operands:
+#### Operands:
 1. `x`: tensor of 32-bit float values
 1. `scale`: tensor of 32-bit float values
 1. `offset`: tensor of 32-bit float values
 1. `mean`: tensor of 32-bit float values
 1. `variance`: tensor of 32-bit float values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `epsilon` | `FloatAttr` | 32-bit float attribute attribute |
@@ -786,17 +1538,174 @@ The size of 1D Tensors matches the dimension C of the 4D Tensors.
 | `is_training` | `BoolAttr` | bool attribute attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `y`: tensor of 32-bit float values
 1. `batch_mean`: tensor of 32-bit float values
 1. `batch_variance`: tensor of 32-bit float values
 1. `reserve_space_1`: tensor of 32-bit float values
 1. `reserve_space_2`: tensor of 32-bit float values
 
-## tf.Gather (TF::GatherOp)
+### tf.FusedBatchNormV3 (TF::FusedBatchNormV3Op)
+Batch normalization.
+
+#### Description:
+
+Note that the size of 4D Tensors are defined by either "NHWC" or "NCHW".
+The size of 1D Tensors matches the dimension C of the 4D Tensors.
+
+#### Operands:
+1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float values
+1. `scale`: tensor of 32-bit float values
+1. `offset`: tensor of 32-bit float values
+1. `mean`: tensor of 32-bit float values
+1. `variance`: tensor of 32-bit float values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `epsilon` | `FloatAttr` | 32-bit float attribute attribute |
+| `data_format` | `StringAttr` | 'NHWC' or 'NCHW' convnet data format attribute |
+| `is_training` | `BoolAttr` | bool attribute attribute |
+| `T` | `Attribute` | derived attribute attribute |
+| `U` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float values
+1. `batch_mean`: tensor of 32-bit float values
+1. `batch_variance`: tensor of 32-bit float values
+1. `reserve_space_1`: tensor of 32-bit float values
+1. `reserve_space_2`: tensor of 32-bit float values
+1. `reserve_space_3`: tensor of 32-bit float values
+
+### tf.GatherNd (TF::GatherNdOp)
+
+Gather slices from `params` into a Tensor with shape specified by `indices`.
+  
+
+#### Description:
+
+`indices` is a K-dimensional integer tensor, best thought of as a
+(K-1)-dimensional tensor of indices into `params`, where each element defines a
+slice of `params`:
+
+    output[\\(i_0, ..., i_{K-2}\\)] = params[indices[\\(i_0, ..., i_{K-2}\\)]]
+
+Whereas in `tf.gather` `indices` defines slices into the `axis`
+dimension of `params`, in `tf.gather_nd`, `indices` defines slices into the
+first `N` dimensions of `params`, where `N = indices.shape[-1]`.
+
+The last dimension of `indices` can be at most the rank of
+`params`:
+
+    indices.shape[-1] <= params.rank
+
+The last dimension of `indices` corresponds to elements
+(if `indices.shape[-1] == params.rank`) or slices
+(if `indices.shape[-1] < params.rank`) along dimension `indices.shape[-1]`
+of `params`.  The output tensor has shape
+
+    indices.shape[:-1] + params.shape[indices.shape[-1]:]
+
+Note that on CPU, if an out of bound index is found, an error is returned.
+On GPU, if an out of bound index is found, a 0 is stored in the
+corresponding output value.
+
+Some examples below.
+
+Simple indexing into a matrix:
+
+```python
+    indices = [[0, 0], [1, 1]]
+    params = [['a', 'b'], ['c', 'd']]
+    output = ['a', 'd']
+```
+
+Slice indexing into a matrix:
+
+```python
+    indices = [[1], [0]]
+    params = [['a', 'b'], ['c', 'd']]
+    output = [['c', 'd'], ['a', 'b']]
+```
+
+Indexing into a 3-tensor:
+
+```python
+    indices = [[1]]
+    params = [[['a0', 'b0'], ['c0', 'd0']],
+              [['a1', 'b1'], ['c1', 'd1']]]
+    output = [[['a1', 'b1'], ['c1', 'd1']]]
+
+
+    indices = [[0, 1], [1, 0]]
+    params = [[['a0', 'b0'], ['c0', 'd0']],
+              [['a1', 'b1'], ['c1', 'd1']]]
+    output = [['c0', 'd0'], ['a1', 'b1']]
+
+
+    indices = [[0, 0, 1], [1, 0, 1]]
+    params = [[['a0', 'b0'], ['c0', 'd0']],
+              [['a1', 'b1'], ['c1', 'd1']]]
+    output = ['b0', 'b1']
+```
+
+Batched indexing into a matrix:
+
+```python
+    indices = [[[0, 0]], [[0, 1]]]
+    params = [['a', 'b'], ['c', 'd']]
+    output = [['a'], ['b']]
+```
+
+Batched slice indexing into a matrix:
+
+```python
+    indices = [[[1]], [[0]]]
+    params = [['a', 'b'], ['c', 'd']]
+    output = [[['c', 'd']], [['a', 'b']]]
+```
+
+Batched indexing into a 3-tensor:
+
+```python
+    indices = [[[1]], [[0]]]
+    params = [[['a0', 'b0'], ['c0', 'd0']],
+              [['a1', 'b1'], ['c1', 'd1']]]
+    output = [[[['a1', 'b1'], ['c1', 'd1']]],
+              [[['a0', 'b0'], ['c0', 'd0']]]]
+
+    indices = [[[0, 1], [1, 0]], [[0, 0], [1, 1]]]
+    params = [[['a0', 'b0'], ['c0', 'd0']],
+              [['a1', 'b1'], ['c1', 'd1']]]
+    output = [[['c0', 'd0'], ['a1', 'b1']],
+              [['a0', 'b0'], ['c1', 'd1']]]
+
+
+    indices = [[[0, 0, 1], [1, 0, 1]], [[0, 1, 1], [1, 1, 0]]]
+    params = [[['a0', 'b0'], ['c0', 'd0']],
+              [['a1', 'b1'], ['c1', 'd1']]]
+    output = [['b0', 'b1'], ['d0', 'c1']]
+```
+
+See also `tf.gather` and `tf.batch_gather`.
+
+#### Operands:
+1. `params`: tensor of tf.dtype values
+1. `indices`: tensor of 32/64-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `Tindices` | `Attribute` | derived attribute attribute |
+| `Tparams` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of tf.dtype values
+
+### tf.Gather (TF::GatherOp)
 Gather slices from `params` according to `indices`.
 
-### Description:
+#### Description:
 
 `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
 Produces an output tensor with shape `indices.shape + params.shape[1:]` where:
@@ -824,26 +1733,26 @@ raising an error.
 <img style="width:100%" src="https://www.tensorflow.org/images/Gather.png" alt>
 </div>
 
-### Operands:
+#### Operands:
 1. `params`: tensor of tf.dtype values
 1. `indices`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `validate_indices` | `BoolAttr` | bool attribute attribute |
 | `Tindices` | `Attribute` | derived attribute attribute |
 | `Tparams` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.GatherV2 (TF::GatherV2Op)
+### tf.GatherV2 (TF::GatherV2Op)
 
 Gather slices from `params` axis `axis` according to `indices`.
   
 
-### Description:
+#### Description:
 
 `indices` must be an integer tensor of any dimension (usually 0-D or 1-D).
 Produces an output tensor with shape `params.shape[:axis] + indices.shape +
@@ -873,12 +1782,12 @@ corresponding output value.
 
 See also `tf.batch_gather` and `tf.gather_nd`.
 
-### Operands:
+#### Operands:
 1. `params`: tensor of tf.dtype values
 1. `indices`: tensor of 32/64-bit integer values
 1. `axis`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `batch_dims` | `IntegerAttr` | 64-bit integer attribute attribute |
@@ -886,55 +1795,79 @@ See also `tf.batch_gather` and `tf.gather_nd`.
 | `Tparams` | `Attribute` | derived attribute attribute |
 | `Taxis` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.GreaterEqual (TF::GreaterEqualOp)
+### tf.GreaterEqual (TF::GreaterEqualOp)
 Returns the truth value of (x >= y) element-wise.
 
-### Description:
+#### Description:
 
 *NOTE*: `GreaterEqual` supports broadcasting. More about broadcasting
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
-### Operands:
-1. `x`: tensor of 8/16/32/64-bit integer or floating-point values
-1. `y`: tensor of 8/16/32/64-bit integer or floating-point values
+Example:
 
-### Attributes:
+```python
+x = tf.constant([5, 4, 6, 7])
+y = tf.constant([5, 2, 5, 10])
+tf.math.greater_equal(x, y) ==> [True, True, True, False]
+
+x = tf.constant([5, 4, 6, 7])
+y = tf.constant([5])
+tf.math.greater_equal(x, y) ==> [True, False, True, True]
+```
+
+#### Operands:
+1. `x`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or floating-point values
+1. `y`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or floating-point values
+
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `z`: tensor of 1-bit integer values
 
-## tf.Greater (TF::GreaterOp)
+### tf.Greater (TF::GreaterOp)
 Returns the truth value of (x > y) element-wise.
 
-### Description:
+#### Description:
 
 *NOTE*: `Greater` supports broadcasting. More about broadcasting
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
-### Operands:
-1. `x`: tensor of 8/16/32/64-bit integer or floating-point values
-1. `y`: tensor of 8/16/32/64-bit integer or floating-point values
+Example:
 
-### Attributes:
+```python
+x = tf.constant([5, 4, 6])
+y = tf.constant([5, 2, 5])
+tf.math.greater(x, y) ==> [False, True, True]
+
+x = tf.constant([5, 4, 6])
+y = tf.constant([5])
+tf.math.greater(x, y) ==> [False, False, True]
+```
+
+#### Operands:
+1. `x`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or floating-point values
+1. `y`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or floating-point values
+
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `z`: tensor of 1-bit integer values
 
-## tf.IdentityN (TF::IdentityNOp)
+### tf.IdentityN (TF::IdentityNOp)
 
 Returns a list of tensors with the same shapes and contents as the input
   
 
-### Description:
+#### Description:
 
 tensors.
 
@@ -952,206 +1885,370 @@ def ApplyG(op, dy, _):
   return [None, g(dy)]  # Do not backprop to f(x).
 ```
 
-### Operands:
+#### Operands:
 1. `input`: tensor of tf.dtype values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.Identity (TF::IdentityOp)
+### tf.Identity (TF::IdentityOp)
 Identity op
 
-### Description:
+#### Description:
 
 Returns a tensor with the same shape and contents as input.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of tf.dtype values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.Invert (TF::InvertOp)
+### tf.If (TF::IfOp)
+output = cond ? then_branch(input) : else_branch(input)
+
+#### Description:
+
+output = cond ? then_branch(input) : else_branch(input)
+
+cond: A Tensor. If the tensor is a scalar of non-boolean type, the
+    scalar is converted to a boolean according to the
+    following rule: if the scalar is a numerical value, non-zero means
+    True and zero means False; if the scalar is a string, non-empty
+    means True and empty means False. If the tensor is not a scalar,
+    being empty means False and being non-empty means True.
+input: A list of input tensors.
+then_branch: A function that takes 'inputs' and returns a list of
+    tensors, whose types are the same as what else_branch returns.
+else_branch: A function that takes 'inputs' and returns a list of
+    tensors.  whose types are the same as what then_branch returns.
+
+#### Operands:
+1. `cond`: tensor of tf.dtype values
+1. `input`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `then_branch` | `SymbolRefAttr` | symbol reference attribute attribute |
+| `else_branch` | `SymbolRefAttr` | symbol reference attribute attribute |
+| `output_shapes` | `ArrayAttr` | string array attribute attribute |
+| `is_stateless` | `BoolAttr` | bool attribute attribute |
+| `Tcond` | `Attribute` | derived attribute attribute |
+| `Tin` | `Attribute` | derived attribute attribute |
+| `Tout` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of tf.dtype values
+
+### tf.Imag (TF::ImagOp)
+Returns the imaginary part of a complex number.
+
+#### Description:
+
+Given a tensor `input` of complex numbers, this operation returns a tensor of
+type `float` that is the imaginary part of each element in `input`. All
+elements in `input` must be complex numbers of the form \\(a + bj\\), where *a*
+is the real part and *b* is the imaginary part returned by this operation.
+
+For example:
+
+```
+# tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+tf.imag(input) ==> [4.75, 5.75]
+```
+
+#### Operands:
+1. `input`: tensor of complex type with 64-bit float elements or complex type with 32-bit float elements values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+| `Tout` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of 32/64-bit float values
+
+### tf.Invert (TF::InvertOp)
 
 Invert (flip) each bit of supported types; for example, type `uint8` value 01010101 becomes 10101010.
   
 
-### Description:
+#### Description:
 
 Flip each bit of supported types.  For example, type `int8` (decimal 2) binary 00000010 becomes (decimal -3) binary 11111101.
 This operation is performed on each element of the tensor argument `x`.
 
-### Operands:
-1. `x`: tensor of 8/16/32/64-bit integer values
+Example: ```python import tensorflow as tf
+from tensorflow.python.ops import bitwise_ops
 
-### Attributes:
+# flip 2 (00000010) to -3 (11111101)
+tf.assert_equal(-3, bitwise_ops.invert(2))
+
+dtype_list = [dtypes.int8, dtypes.int16, dtypes.int32, dtypes.int64,
+              dtypes.uint8, dtypes.uint16, dtypes.uint32, dtypes.uint64]
+
+inputs = [0, 5, 3, 14]
+for dtype in dtype_list:
+  # Because of issues with negative numbers, let's test this indirectly.
+  # 1. invert(a) and a = 0
+  # 2. invert(a) or a = invert(0)
+  input_tensor = tf.constant([0, 5, 3, 14], dtype=dtype)
+  not_a_and_a, not_a_or_a, not_0 = [bitwise_ops.bitwise_and(
+                                      input_tensor, bitwise_ops.invert(input_tensor)),
+                                    bitwise_ops.bitwise_or(
+                                      input_tensor, bitwise_ops.invert(input_tensor)),
+                                    bitwise_ops.invert(
+                                      tf.constant(0, dtype=dtype))]
+
+  expected = tf.constant([0, 0, 0, 0], dtype=tf.float32)
+  tf.assert_equal(tf.cast(not_a_and_a, tf.float32), expected)
+
+  expected = tf.cast([not_0] * 4, tf.float32)
+  tf.assert_equal(tf.cast(not_a_or_a, tf.float32), expected)
+
+  # For unsigned dtypes let's also check the result directly.
+  if dtype.is_unsigned:
+    inverted = bitwise_ops.invert(input_tensor)
+    expected = tf.constant([dtype.max - x for x in inputs], dtype=tf.float32)
+    tf.assert_equal(tf.cast(inverted, tf.float32), tf.cast(expected, tf.float32))
+```
+
+#### Operands:
+1. `x`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type values
+
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `y`: tensor of 8/16/32/64-bit integer values
+#### Results:
+1. `y`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type values
 
-## tf.LeakyRelu (TF::LeakyReluOp)
+### tf.LRN (TF::LRNOp)
+Local Response Normalization.
+
+#### Description:
+
+The 4-D `input` tensor is treated as a 3-D array of 1-D vectors (along the last
+dimension), and each vector is normalized independently.  Within a given vector,
+each component is divided by the weighted, squared sum of inputs within
+`depth_radius`.  In detail,
+
+    sqr_sum[a, b, c, d] =
+        sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2)
+    output = input / (bias + alpha * sqr_sum) ** beta
+
+For details, see [Krizhevsky et al., ImageNet classification with deep
+convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
+
+#### Operands:
+1. `input`: tensor of bfloat16 type or 16-bit float or 32-bit float values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `depth_radius` | `IntegerAttr` | 64-bit integer attribute attribute |
+| `bias` | `FloatAttr` | 32-bit float attribute attribute |
+| `alpha` | `FloatAttr` | 32-bit float attribute attribute |
+| `beta` | `FloatAttr` | 32-bit float attribute attribute |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of bfloat16 type or 16-bit float or 32-bit float values
+
+### tf.LeakyRelu (TF::LeakyReluOp)
 Computes rectified linear: `max(features, features * alpha)`.
 
-### Description:
+#### Description:
 
 
-### Operands:
+#### Operands:
 1. `features`: tensor of floating-point values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `alpha` | `FloatAttr` | 32-bit float attribute attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `activations`: tensor of floating-point values
 
-## tf.LessEqual (TF::LessEqualOp)
+### tf.LessEqual (TF::LessEqualOp)
 Returns the truth value of (x <= y) element-wise.
 
-### Description:
+#### Description:
 
 *NOTE*: `LessEqual` supports broadcasting. More about broadcasting
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
-### Operands:
-1. `x`: tensor of 8/16/32/64-bit integer or floating-point values
-1. `y`: tensor of 8/16/32/64-bit integer or floating-point values
+Example:
 
-### Attributes:
+```python
+x = tf.constant([5, 4, 6])
+y = tf.constant([5])
+tf.math.less_equal(x, y) ==> [True, True, False]
+
+x = tf.constant([5, 4, 6])
+y = tf.constant([5, 6, 6])
+tf.math.less_equal(x, y) ==> [True, True, True]
+```
+
+#### Operands:
+1. `x`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or floating-point values
+1. `y`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or floating-point values
+
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `z`: tensor of 1-bit integer values
 
-## tf.Less (TF::LessOp)
+### tf.Less (TF::LessOp)
 Returns the truth value of (x < y) element-wise.
 
-### Description:
+#### Description:
 
 *NOTE*: `Less` supports broadcasting. More about broadcasting
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
-### Operands:
-1. `x`: tensor of 8/16/32/64-bit integer or floating-point values
-1. `y`: tensor of 8/16/32/64-bit integer or floating-point values
+Example:
 
-### Attributes:
+```python
+x = tf.constant([5, 4, 6])
+y = tf.constant([5])
+tf.math.less(x, y) ==> [False, True, False]
+
+x = tf.constant([5, 4, 6])
+y = tf.constant([5, 6, 7])
+tf.math.less(x, y) ==> [False, True, True]
+```
+
+#### Operands:
+1. `x`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or floating-point values
+1. `y`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or floating-point values
+
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `z`: tensor of 1-bit integer values
 
-## tf.Log (TF::LogOp)
+### tf.Log (TF::LogOp)
 Computes natural logarithm of x element-wise.
 
-### Description:
+#### Description:
 
 I.e., \\(y = \log_e x\\).
 
-### Operands:
+Example:
+
+```python
+x = tf.constant([0, 0.5, 1, 5])
+tf.math.log(x) ==> [-inf, -0.6931472,  0. ,  1.609438]
+```
+
+#### Operands:
 1. `x`: tensor of floating-point or 64/128-bit complex type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `y`: tensor of floating-point or 64/128-bit complex type values
 
-## tf.LogSoftmax (TF::LogSoftmaxOp)
+### tf.LogSoftmax (TF::LogSoftmaxOp)
 Computes log softmax activations.
 
-### Description:
+#### Description:
 
 For each batch `i` and class `j` we have
 
     logsoftmax[i, j] = logits[i, j] - log(sum(exp(logits[i])))
 
-### Operands:
+#### Operands:
 1. `logits`: tensor of floating-point values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `logsoftmax`: tensor of floating-point values
 
-## tf.LogicalAnd (TF::LogicalAndOp)
+### tf.LogicalAnd (TF::LogicalAndOp)
 Returns the truth value of x AND y element-wise.
 
-### Description:
+#### Description:
 
 *NOTE*: `LogicalAnd` supports broadcasting. More about broadcasting
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
-### Operands:
+#### Operands:
 1. `x`: tensor of 1-bit integer values
 1. `y`: tensor of 1-bit integer values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `z`: tensor of 1-bit integer values
 
-## tf.LogicalNot (TF::LogicalNotOp)
+### tf.LogicalNot (TF::LogicalNotOp)
 Returns the truth value of NOT x element-wise.
 
-### Description:
+#### Description:
 
 
-### Operands:
+#### Operands:
 1. `x`: tensor of 1-bit integer values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `y`: tensor of 1-bit integer values
 
-## tf.LogicalOr (TF::LogicalOrOp)
+### tf.LogicalOr (TF::LogicalOrOp)
 Returns the truth value of x OR y element-wise.
 
-### Description:
+#### Description:
 
 *NOTE*: `LogicalOr` supports broadcasting. More about broadcasting
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
-### Operands:
+#### Operands:
 1. `x`: tensor of 1-bit integer values
 1. `y`: tensor of 1-bit integer values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `z`: tensor of 1-bit integer values
 
-## tf.MatMul (TF::MatMulOp)
+### tf.MatMul (TF::MatMulOp)
 
 Multiply the matrix "a" by the matrix "b".
   
 
-### Description:
+#### Description:
 
 The inputs must be two-dimensional matrices and the inner dimension of
 "a" (after being transposed if transpose_a is true) must match the
@@ -1161,56 +2258,234 @@ true).
 *Note*: The default kernel implementation for MatMul on GPUs uses
 cublas.
 
-### Operands:
-1. `a`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex128 type or complex64 type values
-1. `b`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex128 type or complex64 type values
+#### Operands:
+1. `a`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
+1. `b`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `transpose_a` | `BoolAttr` | bool attribute attribute |
 | `transpose_b` | `BoolAttr` | bool attribute attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `product`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex128 type or complex64 type values
+#### Results:
+1. `product`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
 
-## tf.Max (TF::MaxOp)
+### tf.MatrixDiag (TF::MatrixDiagOp)
+
+Returns a batched diagonal tensor with a given batched diagonal values.
+  
+
+#### Description:
+
+Given a `diagonal`, this operation returns a tensor with the `diagonal` and
+everything else padded with zeros. The diagonal is computed as follows:
+
+Assume `diagonal` has `k` dimensions `[I, J, K, ..., N]`, then the output is a
+tensor of rank `k+1` with dimensions [I, J, K, ..., N, N]` where:
+
+`output[i, j, k, ..., m, n] = 1{m=n} * diagonal[i, j, k, ..., n]`.
+
+For example:
+
+```
+# 'diagonal' is [[1, 2, 3, 4], [5, 6, 7, 8]]
+
+and diagonal.shape = (2, 4)
+
+tf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0]
+                                     [0, 2, 0, 0]
+                                     [0, 0, 3, 0]
+                                     [0, 0, 0, 4]],
+                                    [[5, 0, 0, 0]
+                                     [0, 6, 0, 0]
+                                     [0, 0, 7, 0]
+                                     [0, 0, 0, 8]]]
+
+which has shape (2, 4, 4)
+```
+
+#### Operands:
+1. `diagonal`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of tf.dtype values
+
+### tf.MatrixDiagV2 (TF::MatrixDiagV2Op)
+
+Returns a batched diagonal tensor with given batched diagonal values.
+  
+
+#### Description:
+
+Returns a tensor with the contents in `diagonal` as `k[0]`-th to `k[1]`-th
+diagonals of a matrix, with everything else padded with `padding`. `num_rows`
+and `num_cols` specify the dimension of the innermost matrix of the output. If
+both are not specified, the op assumes the innermost matrix is square and infers
+its size from `k` and the innermost dimension of `diagonal`. If only one of them
+is specified, the op assumes the unspecified value is the smallest possible
+based on other criteria.
+
+Let `diagonal` have `r` dimensions `[I, J, ..., L, M, N]`. The output tensor has
+rank `r+1` with shape `[I, J, ..., L, M, num_rows, num_cols]` when only one
+diagonal is given (`k` is an integer or `k[0] == k[1]`). Otherwise, it has rank
+`r` with shape `[I, J, ..., L, num_rows, num_cols]`.
+
+The second innermost dimension of `diagonal` has double meaning.
+When `k` is scalar or `k[0] == k[1]`, `M` is part of the batch size
+[I, J, ..., M], and the output tensor is:
+
+```
+output[i, j, ..., l, m, n]
+  = diagonal[i, j, ..., l, n-max(d_upper, 0)] ; if n - m == d_upper
+    output[i, j, ..., l, m, n]                ; otherwise
+```
+
+Otherwise, `M` is treated as the number of diagonals for the matrix in the
+same batch (`M = k[1]-k[0]+1`), and the output tensor is:
+
+```
+output[i, j, ..., l, m, n]
+  = diagonal[i, j, ..., l, k[1]-d, n-max(d, 0)] ; if d_lower <= d <= d_upper
+    input[i, j, ..., l, m, n]                   ; otherwise
+```
+where `d = n - m`
+
+For example:
+
+```
+# The main diagonal.
+diagonal = np.array([[1, 2, 3, 4],            # Input shape: (2, 4)
+                     [5, 6, 7, 8]])
+tf.matrix_diag(diagonal) ==> [[[1, 0, 0, 0],  # Output shape: (2, 4, 4)
+                               [0, 2, 0, 0],
+                               [0, 0, 3, 0],
+                               [0, 0, 0, 4]],
+                              [[5, 0, 0, 0],
+                               [0, 6, 0, 0],
+                               [0, 0, 7, 0],
+                               [0, 0, 0, 8]]]
+
+# A superdiagonal (per batch).
+diagonal = np.array([[1, 2, 3],  # Input shape: (2, 3)
+                     [4, 5, 6]])
+tf.matrix_diag(diagonal, k = 1)
+  ==> [[[0, 1, 0, 0],  # Output shape: (2, 4, 4)
+        [0, 0, 2, 0],
+        [0, 0, 0, 3],
+        [0, 0, 0, 0]],
+       [[0, 4, 0, 0],
+        [0, 0, 5, 0],
+        [0, 0, 0, 6],
+        [0, 0, 0, 0]]]
+
+# A band of diagonals.
+diagonals = np.array([[[1, 2, 3],  # Input shape: (2, 2, 3)
+                       [4, 5, 0]],
+                      [[6, 7, 9],
+                       [9, 1, 0]]])
+tf.matrix_diag(diagonals, k = (-1, 0))
+  ==> [[[1, 0, 0],  # Output shape: (2, 3, 3)
+        [4, 2, 0],
+        [0, 5, 3]],
+       [[6, 0, 0],
+        [9, 7, 0],
+        [0, 1, 9]]]
+
+# Rectangular matrix.
+diagonal = np.array([1, 2])  # Input shape: (2)
+tf.matrix_diag(diagonal, k = -1, num_rows = 3, num_cols = 4)
+  ==> [[0, 0, 0, 0],  # Output shape: (3, 4)
+       [1, 0, 0, 0],
+       [0, 2, 0, 0]]
+
+# Rectangular matrix with inferred num_cols and padding = 9.
+tf.matrix_diag(diagonal, k = -1, num_rows = 3, padding = 9)
+  ==> [[9, 9],  # Output shape: (3, 2)
+       [1, 9],
+       [9, 2]]
+```
+
+#### Operands:
+1. `diagonal`: tensor of tf.dtype values
+1. `k`: tensor of 32-bit integer values
+1. `num_rows`: tensor of 32-bit integer values
+1. `num_cols`: tensor of 32-bit integer values
+1. `padding_value`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of tf.dtype values
+
+### tf.Max (TF::MaxOp)
 
 Computes the maximum of elements across dimensions of a tensor.
   
 
-### Description:
+#### Description:
 
 Reduces `input` along the dimensions given in `axis`. Unless
 `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
 `axis`. If `keep_dims` is true, the reduced dimensions are
 retained with length 1.
 
-### Operands:
-1. `input`: tensor of number values
+#### Operands:
+1. `input`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or TensorFlow uint8 type values
 1. `reduction_indices`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `keep_dims` | `BoolAttr` | bool attribute attribute |
 | `T` | `Attribute` | derived attribute attribute |
 | `Tidx` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `output`: tensor of number values
+#### Results:
+1. `output`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or TensorFlow uint8 type values
 
-## tf.MaxPool (TF::MaxPoolOp)
+### tf.MaxPoolGrad (TF::MaxPoolGradOp)
+Computes gradients of the maxpooling function.
+
+#### Description:
+
+
+#### Operands:
+1. `orig_input`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or floating-point values
+1. `orig_output`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or floating-point values
+1. `grad`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or floating-point values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `ksize` | `ArrayAttr` | 64-bit integer array attribute with at least 4 elements attribute |
+| `strides` | `ArrayAttr` | 64-bit integer array attribute with at least 4 elements attribute |
+| `padding` | `StringAttr` | string attribute whose value is SAME, or VALID attribute |
+| `data_format` | `StringAttr` | 'NHWC' or 'NCHW' convnet data format attribute |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or floating-point values
+
+### tf.MaxPool (TF::MaxPoolOp)
 Performs max pooling on the input.
 
-### Description:
+#### Description:
 
 
-### Operands:
-1. `input`: tensor of 8/16/32/64-bit integer or floating-point values
+#### Operands:
+1. `input`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or TensorFlow qint8 type or TensorFlow uint16 type or TensorFlow uint8 type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `ksize` | `ArrayAttr` | 64-bit integer array attribute with at least 4 elements attribute |
@@ -1219,197 +2494,494 @@ Performs max pooling on the input.
 | `data_format` | `StringAttr` | string attribute whose value is NHWC, or NCHW, or NCHW_VECT_C attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `output`: tensor of 8/16/32/64-bit integer or floating-point values
+#### Results:
+1. `output`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or TensorFlow qint8 type or TensorFlow uint16 type or TensorFlow uint8 type values
 
-## tf.Maximum (TF::MaximumOp)
+### tf.Maximum (TF::MaximumOp)
 Returns the max of x and y (i.e. x > y ? x : y) element-wise.
 
-### Description:
+#### Description:
 
 *NOTE*: `Maximum` supports broadcasting. More about broadcasting
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
-### Operands:
+#### Operands:
 1. `x`: tensor of floating-point or 32/64-bit integer values
 1. `y`: tensor of floating-point or 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `z`: tensor of floating-point or 32/64-bit integer values
 
-## tf.Mean (TF::MeanOp)
+### tf.Mean (TF::MeanOp)
 Computes the mean of elements across dimensions of a tensor.
 
-### Description:
+#### Description:
 
 Reduces `input` along the dimensions given in `axis`. Unless
 `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
 `axis`. If `keep_dims` is true, the reduced dimensions are
 retained with length 1.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of number values
 1. `reduction_indices`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `keep_dims` | `BoolAttr` | bool attribute attribute |
 | `T` | `Attribute` | derived attribute attribute |
 | `Tidx` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of number values
 
-## tf.Min (TF::MinOp)
+### tf.Min (TF::MinOp)
 
 Computes the minimum of elements across dimensions of a tensor.
   
 
-### Description:
+#### Description:
 
 Reduces `input` along the dimensions given in `axis`. Unless
 `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
 `axis`. If `keep_dims` is true, the reduced dimensions are
 retained with length 1.
 
-### Operands:
-1. `input`: tensor of number values
+#### Operands:
+1. `input`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or TensorFlow uint8 type values
 1. `reduction_indices`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `keep_dims` | `BoolAttr` | bool attribute attribute |
 | `T` | `Attribute` | derived attribute attribute |
 | `Tidx` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `output`: tensor of number values
+#### Results:
+1. `output`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or TensorFlow uint8 type values
 
-## tf.Minimum (TF::MinimumOp)
+### tf.Minimum (TF::MinimumOp)
 Returns the min of x and y (i.e. x < y ? x : y) element-wise.
 
-### Description:
+#### Description:
 
 *NOTE*: `Minimum` supports broadcasting. More about broadcasting
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
-### Operands:
+#### Operands:
 1. `x`: tensor of floating-point or 32/64-bit integer values
 1. `y`: tensor of floating-point or 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `z`: tensor of floating-point or 32/64-bit integer values
 
-## tf.MulNoNan (TF::MulNoNanOp)
+### tf.MirrorPad (TF::MirrorPadOp)
+Pads a tensor with mirrored values.
+
+#### Description:
+
+This operation pads a `input` with mirrored values according to the `paddings`
+you specify. `paddings` is an integer tensor with shape `[n, 2]`, where n is
+the rank of `input`. For each dimension D of `input`, `paddings[D, 0]` indicates
+how many values to add before the contents of `input` in that dimension, and
+`paddings[D, 1]` indicates how many values to add after the contents of `input`
+in that dimension. Both `paddings[D, 0]` and `paddings[D, 1]` must be no greater
+than `input.dim_size(D)` (or `input.dim_size(D) - 1`) if `copy_border` is true
+(if false, respectively).
+
+The padded size of each dimension D of the output is:
+
+`paddings(D, 0) + input.dim_size(D) + paddings(D, 1)`
+
+For example:
+
+```
+# 't' is [[1, 2, 3], [4, 5, 6]].
+# 'paddings' is [[1, 1]], [2, 2]].
+# 'mode' is SYMMETRIC.
+# rank of 't' is 2.
+pad(t, paddings) ==> [[2, 1, 1, 2, 3, 3, 2]
+                      [2, 1, 1, 2, 3, 3, 2]
+                      [5, 4, 4, 5, 6, 6, 5]
+                      [5, 4, 4, 5, 6, 6, 5]]
+```
+
+#### Operands:
+1. `input`: tensor of tf.dtype values
+1. `paddings`: tensor of 32/64-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `mode` | `StringAttr` | string attribute whose value is REFLECT, or SYMMETRIC attribute |
+| `T` | `Attribute` | derived attribute attribute |
+| `Tpaddings` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of tf.dtype values
+
+### tf.MlirPassthroughOp (TF::MlirPassthroughOp)
+
+Wraps an arbitrary MLIR computation expressed as a module with a main() function.
+  
+
+#### Description:
+
+This operation does not have an associated kernel and is not intended to be
+executed in a regular TensorFlow session. Instead it is intended to be used for
+testing or for special case where a user intends to pass custom MLIR computation
+through a TensorFlow graph with the intent of having custom tooling processing
+it downstream (when targeting a different environment, like TensorFlow lite for
+example).
+The MLIR module is expected to have a main() function that will be used as an
+entry point. The inputs to the operations will be passed as argument to the
+main() function and the returned values of the main function mapped to the
+outputs.
+Example usage:
+
+```
+import tensorflow as tf
+from tensorflow.compiler.mlir.tensorflow.gen_mlir_passthrough_op import mlir_passthrough_op
+
+mlir_module = '''python
+func @main(%arg0 : tensor<10xf32>, %arg1 : tensor<10xf32>) -> tensor<10x10xf32> {
+   %add = "magic.op"(%arg0, %arg1) : (tensor<10xf32>, tensor<10xf32>) -> tensor<10x10xf32>
+   return %ret : tensor<10x10xf32>
+}
+'''
+
+@tf.function
+def foo(x, y):
+  return = mlir_passthrough_op([x, y], mlir_module, Toutputs=[tf.float32])
+
+graph_def = foo.get_concrete_function(tf.TensorSpec([10], tf.float32), tf.TensorSpec([10], tf.float32)).graph.as_graph_def()
+```
+
+#### Operands:
+1. `inputs`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `mlir_module` | `StringAttr` | string attribute attribute |
+| `Tinputs` | `Attribute` | derived attribute attribute |
+| `Toutputs` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `outputs`: tensor of tf.dtype values
+
+### tf.MulNoNan (TF::MulNoNanOp)
 
 Returns x * y element-wise. Returns zero if y is zero, even if x if infinite or NaN.
   
 
-### Description:
+#### Description:
 
 *NOTE*: `MulNoNan` supports broadcasting. More about broadcasting
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
-### Operands:
-1. `x`: tensor of 16-bit float or 32-bit float or 64-bit float or complex128 type or complex64 type values
-1. `y`: tensor of 16-bit float or 32-bit float or 64-bit float or complex128 type or complex64 type values
+#### Operands:
+1. `x`: tensor of 16-bit float or 32-bit float or 64-bit float or complex type with 64-bit float elements or complex type with 32-bit float elements values
+1. `y`: tensor of 16-bit float or 32-bit float or 64-bit float or complex type with 64-bit float elements or complex type with 32-bit float elements values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `z`: tensor of 16-bit float or 32-bit float or 64-bit float or complex128 type or complex64 type values
+#### Results:
+1. `z`: tensor of 16-bit float or 32-bit float or 64-bit float or complex type with 64-bit float elements or complex type with 32-bit float elements values
 
-## tf.Mul (TF::MulOp)
+### tf.Mul (TF::MulOp)
 Returns x * y element-wise.
 
-### Description:
+#### Description:
 
 *NOTE*: `Multiply` supports broadcasting. More about broadcasting
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
-### Operands:
-1. `x`: tensor of number values
-1. `y`: tensor of number values
+#### Operands:
+1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint16 type or TensorFlow uint8 type values
+1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint16 type or TensorFlow uint8 type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `z`: tensor of number values
+#### Results:
+1. `z`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint16 type or TensorFlow uint8 type values
 
-## tf.Neg (TF::NegOp)
+### tf.Neg (TF::NegOp)
 Computes numerical negative value element-wise.
 
-### Description:
+#### Description:
 
 I.e., \\(y = -x\\).
 
-### Operands:
-1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex128 type or complex64 type values
+#### Operands:
+1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex128 type or complex64 type values
+#### Results:
+1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
 
-## tf.NoOp (TF::NoOp)
+### tf.NoOp (TF::NoOp)
 Does nothing. Only useful as a placeholder for control edges.
 
-### Description:
+#### Description:
 
 
-### Operands:
+#### Operands:
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 
-## tf.NotEqual (TF::NotEqualOp)
+### tf.NonMaxSuppressionV4 (TF::NonMaxSuppressionV4Op)
+
+Greedily selects a subset of bounding boxes in descending order of score,
+  
+
+#### Description:
+
+pruning away boxes that have high intersection-over-union (IOU) overlap
+with previously selected boxes.  Bounding boxes with score less than
+`score_threshold` are removed.  Bounding boxes are supplied as
+[y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
+diagonal pair of box corners and the coordinates can be provided as normalized
+(i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
+is agnostic to where the origin is in the coordinate system and more
+generally is invariant to orthogonal transformations and translations
+of the coordinate system; thus translating or reflections of the coordinate
+system result in the same boxes being selected by the algorithm.
+The output of this operation is a set of integers indexing into the input
+collection of bounding boxes representing the selected boxes.  The bounding
+box coordinates corresponding to the selected indices can then be obtained
+using the `tf.gather operation`.  For example:
+  selected_indices = tf.image.non_max_suppression_v2(
+      boxes, scores, max_output_size, iou_threshold, score_threshold)
+  selected_boxes = tf.gather(boxes, selected_indices)
+
+#### Operands:
+1. `boxes`: tensor of 16-bit float or 32-bit float values
+1. `scores`: tensor of 16-bit float or 32-bit float values
+1. `max_output_size`: tensor of 32-bit integer values
+1. `iou_threshold`: tensor of 16-bit float or 32-bit float values
+1. `score_threshold`: tensor of 16-bit float or 32-bit float values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `pad_to_max_output_size` | `BoolAttr` | bool attribute attribute |
+| `T_threshold` | `Attribute` | derived attribute attribute |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `selected_indices`: tensor of 32-bit integer values
+1. `valid_outputs`: tensor of 32-bit integer values
+
+### tf.NonMaxSuppressionV5 (TF::NonMaxSuppressionV5Op)
+
+Greedily selects a subset of bounding boxes in descending order of score,
+  
+
+#### Description:
+
+pruning away boxes that have high intersection-over-union (IOU) overlap
+with previously selected boxes.  Bounding boxes with score less than
+`score_threshold` are removed.  Bounding boxes are supplied as
+[y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
+diagonal pair of box corners and the coordinates can be provided as normalized
+(i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
+is agnostic to where the origin is in the coordinate system and more
+generally is invariant to orthogonal transformations and translations
+of the coordinate system; thus translating or reflections of the coordinate
+system result in the same boxes being selected by the algorithm.
+The output of this operation is a set of integers indexing into the input
+collection of bounding boxes representing the selected boxes.  The bounding
+box coordinates corresponding to the selected indices can then be obtained
+using the `tf.gather operation`.  For example:
+  selected_indices = tf.image.non_max_suppression_v2(
+      boxes, scores, max_output_size, iou_threshold, score_threshold)
+  selected_boxes = tf.gather(boxes, selected_indices)
+This op also supports a Soft-NMS (with Gaussian weighting) mode (c.f.
+Bodla et al, https://arxiv.org/abs/1704.04503) where boxes reduce the score
+of other overlapping boxes instead of directly causing them to be pruned.
+To enable this Soft-NMS mode, set the `soft_nms_sigma` parameter to be
+larger than 0.
+
+#### Operands:
+1. `boxes`: tensor of 16-bit float or 32-bit float values
+1. `scores`: tensor of 16-bit float or 32-bit float values
+1. `max_output_size`: tensor of 32-bit integer values
+1. `iou_threshold`: tensor of 16-bit float or 32-bit float values
+1. `score_threshold`: tensor of 16-bit float or 32-bit float values
+1. `soft_nms_sigma`: tensor of 16-bit float or 32-bit float values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `pad_to_max_output_size` | `BoolAttr` | bool attribute attribute |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `selected_indices`: tensor of 32-bit integer values
+1. `selected_scores`: tensor of 16-bit float or 32-bit float values
+1. `valid_outputs`: tensor of 32-bit integer values
+
+### tf.NotEqual (TF::NotEqualOp)
 Returns the truth value of (x != y) element-wise.
 
-### Description:
+#### Description:
 
 *NOTE*: `NotEqual` supports broadcasting. More about broadcasting
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
-### Operands:
-1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 1-bit integer or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex128 type or complex64 type or TensorFlow string type values
-1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 1-bit integer or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex128 type or complex64 type or TensorFlow string type values
+#### Operands:
+1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 1-bit integer or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow string type or TensorFlow uint8 type values
+1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 1-bit integer or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow string type or TensorFlow uint8 type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
+| `incompatible_shape_error` | `BoolAttr` | bool attribute attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `z`: tensor of 1-bit integer values
 
-## tf.Pack (TF::PackOp)
+### tf.OneHot (TF::OneHotOp)
+Returns a one-hot tensor.
+
+#### Description:
+
+The locations represented by indices in `indices` take value `on_value`,
+while all other locations take value `off_value`.
+
+If the input `indices` is rank `N`, the output will have rank `N+1`,
+The new axis is created at dimension `axis` (default: the new axis is
+appended at the end).
+
+If `indices` is a scalar the output shape will be a vector of length `depth`.
+
+If `indices` is a vector of length `features`, the output shape will be:
+```
+  features x depth if axis == -1
+  depth x features if axis == 0
+```
+
+If `indices` is a matrix (batch) with shape `[batch, features]`,
+the output shape will be:
+```
+  batch x features x depth if axis == -1
+  batch x depth x features if axis == 1
+  depth x batch x features if axis == 0
+```
+
+
+Examples
+=========
+
+Suppose that
+```
+  indices = [0, 2, -1, 1]
+  depth = 3
+  on_value = 5.0
+  off_value = 0.0
+  axis = -1
+```
+
+Then output is `[4 x 3]`:
+```
+output =
+  [5.0 0.0 0.0]  // one_hot(0)
+  [0.0 0.0 5.0]  // one_hot(2)
+  [0.0 0.0 0.0]  // one_hot(-1)
+  [0.0 5.0 0.0]  // one_hot(1)
+```
+
+Suppose that
+```
+  indices = [0, 2, -1, 1]
+  depth = 3
+  on_value = 0.0
+  off_value = 3.0
+  axis = 0
+```
+
+Then output is `[3 x 4]`:
+```
+output =
+  [0.0 3.0 3.0 3.0]
+  [3.0 3.0 3.0 0.0]
+  [3.0 3.0 3.0 3.0]
+  [3.0 0.0 3.0 3.0]
+//  ^                one_hot(0)
+//      ^            one_hot(2)
+//          ^        one_hot(-1)
+//              ^    one_hot(1)
+```
+
+Suppose that
+```
+  indices = [[0, 2], [1, -1]]
+  depth = 3
+  on_value = 1.0
+  off_value = 0.0
+  axis = -1
+```
+
+Then output is `[2 x 2 x 3]`:
+```
+output =
+  [
+    [1.0, 0.0, 0.0]  // one_hot(0)
+    [0.0, 0.0, 1.0]  // one_hot(2)
+  ][
+    [0.0, 1.0, 0.0]  // one_hot(1)
+    [0.0, 0.0, 0.0]  // one_hot(-1)
+  ]
+```
+
+#### Operands:
+1. `indices`: tensor of 32-bit integer or 64-bit integer or TensorFlow uint8 type values
+1. `depth`: tensor of 32-bit integer values
+1. `on_value`: tensor of tf.dtype values
+1. `off_value`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `axis` | `IntegerAttr` | 64-bit integer attribute attribute |
+| `T` | `Attribute` | derived attribute attribute |
+| `TI` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of tf.dtype values
+
+### tf.Pack (TF::PackOp)
 
 Packs a list of `N` rank-`R` tensors into one rank-`(R+1)` tensor.
   
 
-### Description:
+#### Description:
 
 Packs the `N` tensors in `values` into a tensor with rank one higher than each
 tensor in `values`, by packing them along the `axis` dimension.
@@ -1431,23 +3003,23 @@ pack([x, y, z], axis=1) => [[1, 2, 3], [4, 5, 6]]
 
 This is the opposite of `unpack`.
 
-### Operands:
+#### Operands:
 1. `values`: tensor of tf.dtype values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `N` | `IntegerAttr` | 64-bit integer attribute whose minimal value is 1 attribute |
 | `axis` | `IntegerAttr` | 64-bit integer attribute attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.Pad (TF::PadOp)
+### tf.Pad (TF::PadOp)
 Pads a tensor with zeros.
 
-### Description:
+#### Description:
 
 This operation pads a `input` with zeros according to the `paddings` you
 specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is the
@@ -1472,23 +3044,23 @@ pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
                       [0, 0, 0, 0, 0, 0]]
 ```
 
-### Operands:
+#### Operands:
 1. `input`: tensor of tf.dtype values
 1. `paddings`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 | `Tpaddings` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.PadV2 (TF::PadV2Op)
+### tf.PadV2 (TF::PadV2Op)
 Pads a tensor.
 
-### Description:
+#### Description:
 
 This operation pads `input` according to the `paddings` and `constant_values`
 you specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is
@@ -1515,31 +3087,56 @@ pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
                       [0, 0, 0, 0, 0, 0]]
 ```
 
-### Operands:
+#### Operands:
 1. `input`: tensor of tf.dtype values
 1. `paddings`: tensor of 32/64-bit integer values
 1. `constant_values`: tensor of tf.dtype values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 | `Tpaddings` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.Placeholder.input (TF::PlaceholderInputOp)
+### tf.PartitionedCall (TF::PartitionedCallOp)
+returns `f(inputs)`, where `f`'s body is placed and partitioned.
+
+#### Description:
+
+Asynchronously executes a function, potentially across multiple devices but
+within a single process. The kernel places and partitions a given function's
+underlying graph, and executes each of the partitioned subgraphs as a function.
+
+#### Operands:
+1. `args`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `f` | `SymbolRefAttr` | symbol reference attribute attribute |
+| `config` | `StringAttr` | string attribute attribute |
+| `config_proto` | `StringAttr` | string attribute attribute |
+| `executor_type` | `StringAttr` | string attribute attribute |
+| `Tin` | `Attribute` | derived attribute attribute |
+| `Tout` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of tf.dtype values
+
+### tf.Placeholder.input (TF::PlaceholderInputOp)
 PlaceholderInput op
 
-### Description:
+#### Description:
 
 Inserts a placeholder for a tensor that will be always fed.
 
-### Operands:
+#### Operands:
 1. `arg`: tensor of tf.dtype values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `min` | `FloatAttr` | 32-bit float attribute attribute |
@@ -1547,36 +3144,107 @@ Inserts a placeholder for a tensor that will be always fed.
 | `type` | `TypeAttr` | integer type attribute |
 | `dtype` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.Placeholder (TF::PlaceholderOp)
+### tf.Placeholder (TF::PlaceholderOp)
 Placeholder op
 
-### Description:
+#### Description:
 
 Inserts a placeholder for a tensor that will be always fed.
 
-### Operands:
+#### Operands:
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `dtype` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.QuantizeAndDequantize (TF::QuantizeAndDequantizeOp)
+### tf.PlaceholderWithDefault (TF::PlaceholderWithDefaultOp)
+Placeholder op
+
+#### Description:
+
+A placeholder op that passes through input when its output is not fed.
+
+#### Operands:
+1. `input`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `dtype` | `Attribute` | derived attribute attribute |
+| `shape` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of tf.dtype values
+
+### tf.Pow (TF::PowOp)
+Computes the power of one value to another.
+
+#### Description:
+
+Given a tensor `x` and a tensor `y`, this operation computes \\(x^y\\) for
+corresponding elements in `x` and `y`. For example:
+
+```
+# tensor 'x' is [[2, 2]], [3, 3]]
+# tensor 'y' is [[8, 16], [2, 3]]
+tf.pow(x, y) ==> [[256, 65536], [9, 27]]
+```
+
+#### Operands:
+1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
+1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `z`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
+
+### tf.Prod (TF::ProdOp)
+
+Computes the product of elements across dimensions of a tensor.
+  
+
+#### Description:
+
+Reduces `input` along the dimensions given in `axis`. Unless
+`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+`axis`. If `keep_dims` is true, the reduced dimensions are
+retained with length 1.
+
+#### Operands:
+1. `input`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or TensorFlow uint8 type values
+1. `reduction_indices`: tensor of 32/64-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `keep_dims` | `BoolAttr` | bool attribute attribute |
+| `T` | `Attribute` | derived attribute attribute |
+| `Tidx` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or TensorFlow uint8 type values
+
+### tf.QuantizeAndDequantize (TF::QuantizeAndDequantizeOp)
 Use QuantizeAndDequantizeV2 instead.
 
-### Description:
+#### Description:
 
 
-### Operands:
+#### Operands:
 1. `input`: tensor of floating-point values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `signed_input` | `BoolAttr` | bool attribute attribute |
@@ -1586,13 +3254,13 @@ Use QuantizeAndDequantizeV2 instead.
 | `input_max` | `FloatAttr` | 32-bit float attribute attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of floating-point values
 
-## tf.QuantizeAndDequantizeV2 (TF::QuantizeAndDequantizeV2Op)
+### tf.QuantizeAndDequantizeV2 (TF::QuantizeAndDequantizeV2Op)
 Quantizes then dequantizes a tensor.
 
-### Description:
+#### Description:
 
 This op simulates the precision loss from the quantized forward pass by:
 
@@ -1646,12 +3314,12 @@ output = round(clamp(value, input_min, input_max) * scale_factor) / scale_factor
 
 The above round function rounds the value based on the given round_mode.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of floating-point values
 1. `input_min`: tensor of floating-point values
 1. `input_max`: tensor of floating-point values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `signed_input` | `BoolAttr` | bool attribute attribute |
@@ -1659,48 +3327,79 @@ The above round function rounds the value based on the given round_mode.
 | `range_given` | `BoolAttr` | bool attribute attribute |
 | `round_mode` | `StringAttr` | string attribute whose value is HALF_TO_EVEN, or HALF_UP attribute |
 | `narrow_range` | `BoolAttr` | bool attribute attribute |
+| `axis` | `IntegerAttr` | 64-bit integer attribute attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of floating-point values
 
-## tf.QuantizeAndDequantizeV3 (TF::QuantizeAndDequantizeV3Op)
+### tf.QuantizeAndDequantizeV3 (TF::QuantizeAndDequantizeV3Op)
 Quantizes then dequantizes a tensor.
 
-### Description:
+#### Description:
 
 This is almost identical to QuantizeAndDequantizeV2, except that num_bits is a
 tensor, so its value can change during training.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of floating-point values
 1. `input_min`: tensor of floating-point values
 1. `input_max`: tensor of floating-point values
 1. `num_bits`: tensor of 32-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `signed_input` | `BoolAttr` | bool attribute attribute |
 | `range_given` | `BoolAttr` | bool attribute attribute |
 | `narrow_range` | `BoolAttr` | bool attribute attribute |
+| `axis` | `IntegerAttr` | 64-bit integer attribute attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of floating-point values
 
-## tf.RandomUniform (TF::RandomUniformOp)
+### tf.RFFT (TF::RFFTOp)
+Real-valued fast Fourier transform.
+
+#### Description:
+
+Computes the 1-dimensional discrete Fourier transform of a real-valued signal
+over the inner-most dimension of `input`.
+
+Since the DFT of a real signal is Hermitian-symmetric, `RFFT` only returns the
+`fft_length / 2 + 1` unique components of the FFT: the zero-frequency term,
+followed by the `fft_length / 2` positive-frequency terms.
+
+Along the axis `RFFT` is computed on, if `fft_length` is smaller than the
+corresponding dimension of `input`, the dimension is cropped. If it is larger,
+the dimension is padded with zeros.
+
+#### Operands:
+1. `input`: tensor of 32/64-bit float values
+1. `fft_length`: tensor of 32-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `Treal` | `Attribute` | derived attribute attribute |
+| `Tcomplex` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of complex type with 64-bit float elements or complex type with 32-bit float elements values
+
+### tf.RandomUniform (TF::RandomUniformOp)
 Outputs random values from a uniform distribution.
 
-### Description:
+#### Description:
 
 The generated values follow a uniform distribution in the range `[0, 1)`. The
 lower bound 0 is included in the range, while the upper bound 1 is excluded.
 
-### Operands:
+#### Operands:
 1. `shape`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `seed` | `IntegerAttr` | 64-bit integer attribute attribute |
@@ -1708,13 +3407,13 @@ lower bound 0 is included in the range, while the upper bound 1 is excluded.
 | `T` | `Attribute` | derived attribute attribute |
 | `dtype` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of floating-point values
 
-## tf.Range (TF::RangeOp)
+### tf.Range (TF::RangeOp)
 Creates a sequence of numbers.
 
-### Description:
+#### Description:
 
 This operation creates a sequence of numbers that begins at `start` and
 extends by increments of `delta` up to but not including `limit`.
@@ -1728,23 +3427,23 @@ For example:
 tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15]
 ```
 
-### Operands:
-1. `start`: tensor of bfloat16 type or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer values
-1. `limit`: tensor of bfloat16 type or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer values
-1. `delta`: tensor of bfloat16 type or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer values
+#### Operands:
+1. `start`: tensor of floating-point or 32/64-bit integer values
+1. `limit`: tensor of floating-point or 32/64-bit integer values
+1. `delta`: tensor of floating-point or 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `Tidx` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `output`: tensor of bfloat16 type or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer values
+#### Results:
+1. `output`: tensor of floating-point or 32/64-bit integer values
 
-## tf.Rank (TF::RankOp)
+### tf.Rank (TF::RankOp)
 Returns the rank of a tensor.
 
-### Description:
+#### Description:
 
 This operation returns an integer representing the rank of `input`.
 
@@ -1760,107 +3459,184 @@ rank(t) ==> 3
 of a tensor is the number of indices required to uniquely select each element
 of the tensor. Rank is also known as "order", "degree", or "ndims."
 
-### Operands:
+#### Operands:
 1. `input`: tensor of tf.dtype values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of 32-bit integer values
 
-## tf.RealDiv (TF::RealDivOp)
+### tf.ReadVariableOp (TF::ReadVariableOp)
+Reads the value of a variable.
+
+#### Description:
+
+The tensor returned by this operation is immutable.
+
+The value returned by this operation is guaranteed to be influenced by all the
+writes on which this operation depends directly or indirectly, and to not be
+influenced by any of the writes which depend directly or indirectly on this
+operation.
+
+#### Operands:
+1. `resource`: tensor of TensorFlow resource type values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `dtype` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `value`: tensor of tf.dtype values
+
+### tf.RealDiv (TF::RealDivOp)
 Returns x / y element-wise for real types.
 
-### Description:
+#### Description:
 
 If `x` and `y` are reals, this will return the floating-point division.
 
 *NOTE*: `Div` supports broadcasting. More about broadcasting
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
-### Operands:
-1. `x`: tensor of number values
-1. `y`: tensor of number values
+#### Operands:
+1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint16 type or TensorFlow uint8 type values
+1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint16 type or TensorFlow uint8 type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `z`: tensor of number values
+#### Results:
+1. `z`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint16 type or TensorFlow uint8 type values
 
-## tf.Reciprocal (TF::ReciprocalOp)
+### tf.Real (TF::RealOp)
+Returns the real part of a complex number.
+
+#### Description:
+
+Given a tensor `input` of complex numbers, this operation returns a tensor of
+type `float` that is the real part of each element in `input`. All elements in
+`input` must be complex numbers of the form \\(a + bj\\), where *a* is the real
+ part returned by this operation and *b* is the imaginary part.
+
+For example:
+
+```
+# tensor 'input' is [-2.25 + 4.75j, 3.25 + 5.75j]
+tf.real(input) ==> [-2.25, 3.25]
+```
+
+#### Operands:
+1. `input`: tensor of complex type with 64-bit float elements or complex type with 32-bit float elements values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+| `Tout` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of 32/64-bit float values
+
+### tf.Reciprocal (TF::ReciprocalOp)
 Computes the reciprocal of x element-wise.
 
-### Description:
+#### Description:
 
 I.e., \\(y = 1 / x\\).
 
-### Operands:
-1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex128 type or complex64 type values
+#### Operands:
+1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex128 type or complex64 type values
+#### Results:
+1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
 
-## tf.Relu6 (TF::Relu6Op)
+### tf.Relu6 (TF::Relu6Op)
 Computes rectified linear 6: `min(max(features, 0), 6)`.
 
-### Description:
+#### Description:
 
 
-### Operands:
-1. `features`: tensor of 8/16/32/64-bit integer or floating-point values
+#### Operands:
+1. `features`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or floating-point values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `activations`: tensor of 8/16/32/64-bit integer or floating-point values
+#### Results:
+1. `activations`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or floating-point values
 
-## tf.Relu (TF::ReluOp)
+### tf.ReluGrad (TF::ReluGradOp)
+Computes rectified linear gradients for a Relu operation.
+
+#### Description:
+
+
+#### Operands:
+1. `gradients`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or floating-point values
+1. `features`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or floating-point values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `backprops`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or floating-point values
+
+### tf.Relu (TF::ReluOp)
 Computes rectified linear: `max(features, 0)`.
 
-### Description:
+#### Description:
 
+See: https://en.wikipedia.org/wiki/Rectifier_(neural_networks)
+Example usage:
+>>> tf.nn.relu([-2., 0., -0., 3.]).numpy()
+array([ 0.,  0., -0.,  3.], dtype=float32)
 
-### Operands:
-1. `features`: tensor of 8/16/32/64-bit integer or floating-point values
+#### Operands:
+1. `features`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or TensorFlow qint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or TensorFlow uint8 type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `activations`: tensor of 8/16/32/64-bit integer or floating-point values
+#### Results:
+1. `activations`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or TensorFlow qint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or TensorFlow uint8 type values
 
-## tf.Reshape (TF::ReshapeOp)
+### tf.Reshape (TF::ReshapeOp)
 Reshapes a tensor.
 
-### Description:
+#### Description:
 
 Given `tensor`, this operation returns a tensor that has the same values
 as `tensor` with shape `shape`.
 
-If one component of `shape` is the special value -1, the size of that dimension
-is computed so that the total size remains constant.  In particular, a `shape`
-of `[-1]` flattens into 1-D.  At most one component of `shape` can be -1.
+If one component of 1-D tensor `shape` is the special value -1, the size of that
+dimension is computed so that the total size remains constant.  In particular, a
+`shape` of `[-1]` flattens into 1-D.  At most one component of `shape` may be
+unknown.
 
-If `shape` is 1-D or higher, then the operation returns a tensor with shape
+The `shape` must be 1-D and the operation returns a tensor with shape
 `shape` filled with the values of `tensor`. In this case, the number of elements
 implied by `shape` must be the same as the number of elements in `tensor`.
 
+It is an error if `shape` is not 1-D.
+
 For example:
 
 ```
@@ -1907,44 +3683,141 @@ reshape(t, [ 2, -1, 3]) ==> [[[1, 1, 1],
 reshape(t, []) ==> 7
 ```
 
-### Operands:
+#### Operands:
 1. `tensor`: tensor of tf.dtype values
 1. `shape`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 | `Tshape` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.ResizeBilinear (TF::ResizeBilinearOp)
+### tf.ResizeBilinear (TF::ResizeBilinearOp)
 Resize `images` to `size` using bilinear interpolation.
 
-### Description:
+#### Description:
 
 Input images can be of different types but output images are always float.
 
-### Operands:
-1. `images`: tensor of 8/16/32/64-bit integer or floating-point values
+#### Operands:
+1. `images`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or TensorFlow uint16 type or TensorFlow uint8 type values
 1. `size`: tensor of 32-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `align_corners` | `BoolAttr` | bool attribute attribute |
 | `half_pixel_centers` | `BoolAttr` | bool attribute attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `resized_images`: tensor of 32-bit float values
 
-## tf.ReverseV2 (TF::ReverseV2Op)
+### tf.ResizeNearestNeighbor (TF::ResizeNearestNeighborOp)
+
+Resize `images` to `size` using nearest neighbor interpolation.
+  
+
+#### Description:
+
+
+#### Operands:
+1. `images`: tensor of 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or TensorFlow uint16 type or TensorFlow uint8 type values
+1. `size`: tensor of 32-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `align_corners` | `BoolAttr` | bool attribute attribute |
+| `half_pixel_centers` | `BoolAttr` | bool attribute attribute |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `resized_images`: tensor of 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or TensorFlow uint16 type or TensorFlow uint8 type values
+
+### tf.ReverseSequence (TF::ReverseSequenceOp)
+Reverses variable length slices.
+
+#### Description:
+
+This op first slices `input` along the dimension `batch_dim`, and for each
+slice `i`, reverses the first `seq_lengths[i]` elements along
+the dimension `seq_dim`.
+
+The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`,
+and `seq_lengths` must be a vector of length `input.dims[batch_dim]`.
+
+The output slice `i` along dimension `batch_dim` is then given by input
+slice `i`, with the first `seq_lengths[i]` slices along dimension
+`seq_dim` reversed.
+
+For example:
+
+```
+# Given this:
+batch_dim = 0
+seq_dim = 1
+input.dims = (4, 8, ...)
+seq_lengths = [7, 2, 3, 5]
+
+# then slices of input are reversed on seq_dim, but only up to seq_lengths:
+output[0, 0:7, :, ...] = input[0, 7:0:-1, :, ...]
+output[1, 0:2, :, ...] = input[1, 2:0:-1, :, ...]
+output[2, 0:3, :, ...] = input[2, 3:0:-1, :, ...]
+output[3, 0:5, :, ...] = input[3, 5:0:-1, :, ...]
+
+# while entries past seq_lens are copied through:
+output[0, 7:, :, ...] = input[0, 7:, :, ...]
+output[1, 2:, :, ...] = input[1, 2:, :, ...]
+output[2, 3:, :, ...] = input[2, 3:, :, ...]
+output[3, 2:, :, ...] = input[3, 2:, :, ...]
+```
+
+In contrast, if:
+
+```
+# Given this:
+batch_dim = 2
+seq_dim = 0
+input.dims = (8, ?, 4, ...)
+seq_lengths = [7, 2, 3, 5]
+
+# then slices of input are reversed on seq_dim, but only up to seq_lengths:
+output[0:7, :, 0, :, ...] = input[7:0:-1, :, 0, :, ...]
+output[0:2, :, 1, :, ...] = input[2:0:-1, :, 1, :, ...]
+output[0:3, :, 2, :, ...] = input[3:0:-1, :, 2, :, ...]
+output[0:5, :, 3, :, ...] = input[5:0:-1, :, 3, :, ...]
+
+# while entries past seq_lens are copied through:
+output[7:, :, 0, :, ...] = input[7:, :, 0, :, ...]
+output[2:, :, 1, :, ...] = input[2:, :, 1, :, ...]
+output[3:, :, 2, :, ...] = input[3:, :, 2, :, ...]
+output[2:, :, 3, :, ...] = input[2:, :, 3, :, ...]
+```
+
+#### Operands:
+1. `input`: tensor of tf.dtype values
+1. `seq_lengths`: tensor of 32/64-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `seq_dim` | `IntegerAttr` | 64-bit integer attribute attribute |
+| `batch_dim` | `IntegerAttr` | 64-bit integer attribute attribute |
+| `Tlen` | `Attribute` | derived attribute attribute |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of tf.dtype values
+
+### tf.ReverseV2 (TF::ReverseV2Op)
 Reverses specific dimensions of a tensor.
 
-### Description:
+#### Description:
 
 NOTE `tf.reverse` has now changed behavior in preparation for 1.0.
 `tf.reverse_v2` is currently an alias that will be deprecated before TF 1.0.
@@ -1993,41 +3866,62 @@ reverse(t, dims) ==> [[[[8, 9, 10, 11],
                         [12, 13, 14, 15]]]]
 ```
 
-### Operands:
-1. `tensor`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 1-bit integer or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex128 type or complex64 type or TensorFlow string type values
+#### Operands:
+1. `tensor`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 1-bit integer or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow string type or TensorFlow uint16 type or TensorFlow uint8 type values
 1. `axis`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 | `Tidx` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `output`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 1-bit integer or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex128 type or complex64 type or TensorFlow string type values
+#### Results:
+1. `output`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 1-bit integer or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow string type or TensorFlow uint16 type or TensorFlow uint8 type values
 
-## tf.Rsqrt (TF::RsqrtOp)
-Computes reciprocal of square root of x element-wise.
+### tf.Round (TF::RoundOp)
 
-### Description:
+Rounds the values of a tensor to the nearest integer, element-wise.
+  
 
-I.e., \\(y = 1 / \sqrt{x}\\).
+#### Description:
 
-### Operands:
-1. `x`: tensor of floating-point or 64/128-bit complex type values
+Rounds half to even.  Also known as bankers rounding. If you want to round
+according to the current system rounding mode use std::cint.
 
-### Attributes:
+#### Operands:
+1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
+
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
+1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
+
+### tf.Rsqrt (TF::RsqrtOp)
+Computes reciprocal of square root of x element-wise.
+
+#### Description:
+
+I.e., \\(y = 1 / \sqrt{x}\\).
+
+#### Operands:
+1. `x`: tensor of floating-point or 64/128-bit complex type values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
 1. `y`: tensor of floating-point or 64/128-bit complex type values
 
-## tf.Select (TF::SelectOp)
+### tf.Select (TF::SelectOp)
 Selects elements from `x` or `y`, depending on `condition`.
 
-### Description:
+#### Description:
 
 The `x`, and `y` tensors must all have the same shape, and the
 output will also have that shape.
@@ -2068,23 +3962,62 @@ select(condition, t, e) ==> [[1, 2],
 
 ```
 
-### Operands:
+#### Operands:
 1. `condition`: tensor of 1-bit integer values
 1. `t`: tensor of tf.dtype values
 1. `e`: tensor of tf.dtype values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.Shape (TF::ShapeOp)
+### tf.SelectV2 (TF::SelectV2Op)
+
+
+#### Description:
+
+
+#### Operands:
+1. `condition`: tensor of 1-bit integer values
+1. `t`: tensor of tf.dtype values
+1. `e`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of tf.dtype values
+
+### tf.ShapeN (TF::ShapeNOp)
+Returns shape of tensors.
+
+#### Description:
+
+This operation returns N 1-D integer tensors representing shape of `input[i]s`.
+
+#### Operands:
+1. `input`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `N` | `IntegerAttr` | 64-bit integer attribute whose minimal value is 1 attribute |
+| `T` | `Attribute` | derived attribute attribute |
+| `out_type` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of 32/64-bit integer values
+
+### tf.Shape (TF::ShapeOp)
 Returns the shape of a tensor.
 
-### Description:
+#### Description:
 
 This operation returns a 1-D integer tensor representing the shape of `input`.
 
@@ -2095,57 +4028,65 @@ For example:
 shape(t) ==> [2, 2, 3]
 ```
 
-### Operands:
+#### Operands:
 1. `input`: tensor of tf.dtype values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 | `out_type` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of 32/64-bit integer values
 
-## tf.Sigmoid (TF::SigmoidOp)
+### tf.Sigmoid (TF::SigmoidOp)
 Computes sigmoid of `x` element-wise.
 
-### Description:
+#### Description:
 
 Specifically, `y = 1 / (1 + exp(-x))`.
 
-### Operands:
+#### Operands:
 1. `x`: tensor of floating-point or 64/128-bit complex type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `y`: tensor of floating-point or 64/128-bit complex type values
 
-## tf.Sin (TF::SinOp)
-Computes sin of x element-wise.
+### tf.Sin (TF::SinOp)
+Computes sine of x element-wise.
 
-### Description:
+#### Description:
 
+Given an input tensor, this function computes sine of every
+  element in the tensor. Input range is `(-inf, inf)` and
+  output range is `[-1,1]`.
 
-### Operands:
+  ```python
+  x = tf.constant([-float("inf"), -9, -0.5, 1, 1.2, 200, 10, float("inf")])
+  tf.math.sin(x) ==> [nan -0.4121185 -0.47942555 0.84147096 0.9320391 -0.87329733 -0.54402107 nan]
+  ```
+
+#### Operands:
 1. `x`: tensor of floating-point or 64/128-bit complex type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `y`: tensor of floating-point or 64/128-bit complex type values
 
-## tf.Slice (TF::SliceOp)
+### tf.Slice (TF::SliceOp)
 Return a slice from 'input'.
 
-### Description:
+#### Description:
 
 The output tensor is a tensor with dimensions described by 'size'
 whose values are extracted from 'input' starting at the offsets in
@@ -2154,44 +4095,83 @@ whose values are extracted from 'input' starting at the offsets in
 *Requirements*:
   0 <= begin[i] <= begin[i] + size[i] <= Di  for i in [0, n)
 
-### Operands:
+#### Operands:
 1. `input`: tensor of tf.dtype values
 1. `begin`: tensor of 32/64-bit integer values
 1. `size`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 | `Index` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.Softmax (TF::SoftmaxOp)
+### tf.Snapshot (TF::SnapshotOp)
+Returns a copy of the input tensor.
+
+#### Description:
+
+
+#### Operands:
+1. `input`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of tf.dtype values
+
+### tf.SoftmaxCrossEntropyWithLogits (TF::SoftmaxCrossEntropyWithLogitsOp)
+
+Computes softmax cross entropy cost and gradients to backpropagate.
+  
+
+#### Description:
+
+Inputs are the logits, not probabilities.
+
+#### Operands:
+1. `features`: tensor of floating-point values
+1. `labels`: tensor of floating-point values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `loss`: tensor of floating-point values
+1. `backprop`: tensor of floating-point values
+
+### tf.Softmax (TF::SoftmaxOp)
 Computes softmax activations.
 
-### Description:
+#### Description:
 
 For each batch `i` and class `j` we have
 
     $$softmax[i, j] = exp(logits[i, j]) / sum_j(exp(logits[i, j]))$$
 
-### Operands:
+#### Operands:
 1. `logits`: tensor of floating-point values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `softmax`: tensor of floating-point values
 
-## tf.SpaceToBatchND (TF::SpaceToBatchNDOp)
+### tf.SpaceToBatchND (TF::SpaceToBatchNDOp)
 SpaceToBatch for N-D tensors of type T.
 
-### Description:
+#### Description:
 
 This operation divides "spatial" dimensions `[1, ..., M]` of the input into a
 grid of blocks of shape `block_shape`, and interleaves these blocks with the
@@ -2202,121 +4182,264 @@ batch position.  Prior to division into blocks, the spatial dimensions of the
 input are optionally zero padded according to `paddings`.  See below for a
 precise description.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of tf.dtype values
 1. `block_shape`: tensor of 32/64-bit integer values
 1. `paddings`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 | `Tpaddings` | `Attribute` | derived attribute attribute |
 | `Tblock_shape` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.Split (TF::SplitOp)
+### tf.SpaceToDepth (TF::SpaceToDepthOp)
+SpaceToDepth for tensors of type T.
+
+#### Description:
+
+Rearranges blocks of spatial data, into depth. More specifically,
+this op outputs a copy of the input tensor where values from the `height`
+and `width` dimensions are moved to the `depth` dimension.
+The attr `block_size` indicates the input block size.
+
+  * Non-overlapping blocks of size `block_size x block size` are rearranged
+    into depth at each location.
+  * The depth of the output tensor is `block_size * block_size * input_depth`.
+  * The Y, X coordinates within each block of the input become the high order
+    component of the output channel index.
+  * The input tensor's height and width must be divisible by block_size.
+
+The `data_format` attr specifies the layout of the input and output tensors
+with the following options:
+  "NHWC": `[ batch, height, width, channels ]`
+  "NCHW": `[ batch, channels, height, width ]`
+  "NCHW_VECT_C":
+      `qint8 [ batch, channels / 4, height, width, 4 ]`
+
+It is useful to consider the operation as transforming a 6-D Tensor.
+e.g. for data_format = NHWC,
+     Each element in the input tensor can be specified via 6 coordinates,
+     ordered by decreasing memory layout significance as:
+     n,oY,bY,oX,bX,iC  (where n=batch index, oX, oY means X or Y coordinates
+                        within the output image, bX, bY means coordinates
+                        within the input block, iC means input channels).
+     The output would be a transpose to the following layout:
+     n,oY,oX,bY,bX,iC
+
+This operation is useful for resizing the activations between convolutions
+(but keeping all data), e.g. instead of pooling. It is also useful for training
+purely convolutional models.
+
+For example, given an input of shape `[1, 2, 2, 1]`, data_format = "NHWC" and
+block_size = 2:
+
+```
+x = [[[[1], [2]],
+      [[3], [4]]]]
+```
+
+This operation will output a tensor of shape `[1, 1, 1, 4]`:
+
+```
+[[[[1, 2, 3, 4]]]]
+```
+
+Here, the input has a batch of 1 and each batch element has shape `[2, 2, 1]`,
+the corresponding output will have a single element (i.e. width and height are
+both 1) and will have a depth of 4 channels (1 * block_size * block_size).
+The output element shape is `[1, 1, 4]`.
+
+For an input tensor with larger depth, here of shape `[1, 2, 2, 3]`, e.g.
+
+```
+x = [[[[1, 2, 3], [4, 5, 6]],
+      [[7, 8, 9], [10, 11, 12]]]]
+```
+
+This operation, for block_size of 2, will return the following tensor of shape
+`[1, 1, 1, 12]`
+
+```
+[[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]]
+```
+
+Similarly, for the following input of shape `[1 4 4 1]`, and a block size of 2:
+
+```
+x = [[[[1],   [2],  [5],  [6]],
+      [[3],   [4],  [7],  [8]],
+      [[9],  [10], [13],  [14]],
+      [[11], [12], [15],  [16]]]]
+```
+
+the operator will return the following tensor of shape `[1 2 2 4]`:
+
+```
+x = [[[[1, 2, 3, 4],
+       [5, 6, 7, 8]],
+      [[9, 10, 11, 12],
+       [13, 14, 15, 16]]]]
+```
+
+#### Operands:
+1. `input`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `block_size` | `IntegerAttr` | 64-bit integer attribute whose minimal value is 2 attribute |
+| `data_format` | `StringAttr` | string attribute whose value is NHWC, or NCHW, or NCHW_VECT_C attribute |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of tf.dtype values
+
+### tf.SparseToDense (TF::SparseToDenseOp)
+Converts a sparse representation into a dense tensor.
+
+#### Description:
+
+Builds an array `dense` with shape `output_shape` such that
+
+```
+# If sparse_indices is scalar
+dense[i] = (i == sparse_indices ? sparse_values : default_value)
+
+# If sparse_indices is a vector, then for each i
+dense[sparse_indices[i]] = sparse_values[i]
+
+# If sparse_indices is an n by d matrix, then for each i in [0, n)
+dense[sparse_indices[i][0], ..., sparse_indices[i][d-1]] = sparse_values[i]
+```
+
+All other values in `dense` are set to `default_value`.  If `sparse_values` is a
+scalar, all sparse indices are set to this single value.
+
+Indices should be sorted in lexicographic order, and indices must not
+contain any repeats. If `validate_indices` is true, these properties
+are checked during execution.
+
+#### Operands:
+1. `sparse_indices`: tensor of 32/64-bit integer values
+1. `output_shape`: tensor of 32/64-bit integer values
+1. `sparse_values`: tensor of tf.dtype values
+1. `default_value`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `validate_indices` | `BoolAttr` | bool attribute attribute |
+| `Tindices` | `Attribute` | derived attribute attribute |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `dense`: tensor of tf.dtype values
+
+### tf.Split (TF::SplitOp)
 Splits a tensor into `num_split` tensors along one dimension.
 
-### Description:
+#### Description:
 
 
-### Operands:
+#### Operands:
 1. `split_dim`: tensor of 32-bit integer values
 1. `value`: tensor of tf.dtype values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `num_split` | `IntegerAttr` | 64-bit integer attribute whose minimal value is 1 attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.SplitV (TF::SplitVOp)
+### tf.SplitV (TF::SplitVOp)
 Splits a tensor into `num_split` tensors along one dimension.
 
-### Description:
+#### Description:
 
 
-### Operands:
+#### Operands:
 1. `value`: tensor of tf.dtype values
 1. `size_splits`: tensor of 32/64-bit integer values
 1. `split_dim`: tensor of 32-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `num_split` | `IntegerAttr` | 64-bit integer attribute whose minimal value is 1 attribute |
 | `Tlen` | `Attribute` | derived attribute attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.Sqrt (TF::SqrtOp)
+### tf.Sqrt (TF::SqrtOp)
 Computes square root of x element-wise.
 
-### Description:
+#### Description:
 
 I.e., \\(y = \sqrt{x} = x^{1/2}\\).
 
-### Operands:
+#### Operands:
 1. `x`: tensor of floating-point or 64/128-bit complex type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `y`: tensor of floating-point or 64/128-bit complex type values
 
-## tf.Square (TF::SquareOp)
+### tf.Square (TF::SquareOp)
 Computes square of x element-wise.
 
-### Description:
+#### Description:
 
 I.e., \\(y = x * x = x^2\\).
 
-### Operands:
-1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex128 type or complex64 type values
+#### Operands:
+1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex128 type or complex64 type values
+#### Results:
+1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
 
-## tf.SquaredDifference (TF::SquaredDifferenceOp)
+### tf.SquaredDifference (TF::SquaredDifferenceOp)
 Returns (x - y)(x - y) element-wise.
 
-### Description:
+#### Description:
 
 *NOTE*: `SquaredDifference` supports broadcasting. More about broadcasting
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
-### Operands:
-1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex128 type or complex64 type values
-1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex128 type or complex64 type values
+#### Operands:
+1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
+1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `z`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex128 type or complex64 type values
+#### Results:
+1. `z`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 32-bit integer or 64-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements values
 
-## tf.Squeeze (TF::SqueezeOp)
+### tf.Squeeze (TF::SqueezeOp)
 Removes dimensions of size 1 from the shape of a tensor.
 
-### Description:
+#### Description:
 
 Given a tensor `input`, this operation returns a tensor of the same type with
 all dimensions of size 1 removed. If you don't want to remove all size 1
@@ -2337,22 +4460,83 @@ Or, to remove specific size 1 dimensions:
 shape(squeeze(t, [2, 4])) ==> [1, 2, 3, 1]
 ```
 
-### Operands:
+#### Operands:
 1. `input`: tensor of tf.dtype values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `squeeze_dims` | `ArrayAttr` | 64-bit integer array attribute attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.StridedSlice (TF::StridedSliceOp)
+### tf.StatefulPartitionedCall (TF::StatefulPartitionedCallOp)
+returns `f(inputs)`, where `f`'s body is placed and partitioned.
+
+#### Description:
+
+Asynchronously executes a function, potentially across multiple devices but
+within a single process. The kernel places and partitions a given function's
+underlying graph, and executes each of the partitioned subgraphs as a function.
+
+#### Operands:
+1. `args`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `f` | `SymbolRefAttr` | symbol reference attribute attribute |
+| `config` | `StringAttr` | string attribute attribute |
+| `config_proto` | `StringAttr` | string attribute attribute |
+| `executor_type` | `StringAttr` | string attribute attribute |
+| `Tin` | `Attribute` | derived attribute attribute |
+| `Tout` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of tf.dtype values
+
+### tf.StopGradient (TF::StopGradientOp)
+Stops gradient computation.
+
+#### Description:
+
+When executed in a graph, this op outputs its input tensor as-is.
+
+When building ops to compute gradients, this op prevents the contribution of
+its inputs to be taken into account.  Normally, the gradient generator adds ops
+to a graph to compute the derivatives of a specified 'loss' by recursively
+finding out inputs that contributed to its computation.  If you insert this op
+in the graph it inputs are masked from the gradient generator.  They are not
+taken into account for computing gradients.
+
+This is useful any time you want to compute a value with TensorFlow but need
+to pretend that the value was a constant. Some examples include:
+
+*  The *EM* algorithm where the *M-step* should not involve backpropagation
+   through the output of the *E-step*.
+*  Contrastive divergence training of Boltzmann machines where, when
+   differentiating the energy function, the training must not backpropagate
+   through the graph that generated the samples from the model.
+*  Adversarial training, where no backprop should happen through the adversarial
+   example generation process.
+
+#### Operands:
+1. `input`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of tf.dtype values
+
+### tf.StridedSlice (TF::StridedSliceOp)
 Return a strided slice from `input`.
 
-### Description:
+#### Description:
 
 Note, most python users will want to use the Python `Tensor.__getitem__`
 or `Variable.__getitem__` rather than this op directly.
@@ -2444,13 +4628,13 @@ receive 0, 0, and 1, respectively. The appropriate bits in `begin_mask` and
   `0 != strides[i] for i in [0, m)`
   `ellipsis_mask must be a power of two (only one ellipsis)`
 
-### Operands:
+#### Operands:
 1. `input`: tensor of tf.dtype values
 1. `begin`: tensor of 32/64-bit integer values
 1. `end`: tensor of 32/64-bit integer values
 1. `strides`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `begin_mask` | `IntegerAttr` | 64-bit integer attribute attribute |
@@ -2461,143 +4645,269 @@ receive 0, 0, and 1, respectively. The appropriate bits in `begin_mask` and
 | `T` | `Attribute` | derived attribute attribute |
 | `Index` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.Sub (TF::SubOp)
+### tf.Sub (TF::SubOp)
 Returns x - y element-wise.
 
-### Description:
+#### Description:
 
 *NOTE*: `Subtract` supports broadcasting. More about broadcasting
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
-### Operands:
-1. `x`: tensor of number values
-1. `y`: tensor of number values
+#### Operands:
+1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint16 type or TensorFlow uint8 type values
+1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint16 type or TensorFlow uint8 type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `z`: tensor of number values
+#### Results:
+1. `z`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint16 type or TensorFlow uint8 type values
 
-## tf.Sum (TF::SumOp)
+### tf.Sum (TF::SumOp)
 Computes the sum of elements across dimensions of a tensor.
 
-### Description:
+#### Description:
 
 Reduces `input` along the dimensions given in `axis`. Unless
 `keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
 `axis`. If `keep_dims` is true, the reduced dimensions are
 retained with length 1.
 
-### Operands:
-1. `input`: tensor of number values
+#### Operands:
+1. `input`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or TensorFlow uint8 type values
 1. `reduction_indices`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `keep_dims` | `BoolAttr` | bool attribute attribute |
 | `T` | `Attribute` | derived attribute attribute |
 | `Tidx` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `output`: tensor of number values
+#### Results:
+1. `output`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or TensorFlow uint8 type values
 
-## tf.TensorListFromTensor (TF::TensorListFromTensorOp)
+### tf.TPUCompilationResult (TF::TPUCompilationResultOp)
+Returns the result of a TPU compilation.
+
+#### Description:
+
+This operation returns the result of a TPU compilation as a serialized
+CompilationResultProto, which holds a status and an error message if an error
+occurred during compilation.
+
+#### Operands:
+
+#### Attributes:
+
+#### Results:
+1. `output`: tensor of TensorFlow string type values
+
+### tf.TPUReplicateMetadata (TF::TPUReplicateMetadataOp)
+
+Metadata indicating how the TPU computation should be replicated.
+  
+
+#### Description:
+
+This operation holds the metadata common to operations of a `tpu.replicate()` computation subgraph.
+
+#### Operands:
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `num_replicas` | `IntegerAttr` | 64-bit integer attribute whose minimal value is 0 attribute |
+| `num_cores_per_replica` | `IntegerAttr` | 64-bit integer attribute attribute |
+| `topology` | `StringAttr` | string attribute attribute |
+| `use_tpu` | `BoolAttr` | bool attribute attribute |
+| `device_assignment` | `ArrayAttr` | 64-bit integer array attribute attribute |
+| `computation_shape` | `ArrayAttr` | 64-bit integer array attribute attribute |
+| `host_compute_core` | `ArrayAttr` | string array attribute attribute |
+| `padding_map` | `ArrayAttr` | string array attribute attribute |
+| `step_marker_location` | `StringAttr` | string attribute attribute |
+| `allow_soft_placement` | `BoolAttr` | bool attribute attribute |
+
+#### Results:
+
+### tf.Tanh (TF::TanhOp)
+Computes hyperbolic tangent of `x` element-wise.
+
+#### Description:
+
+Given an input tensor, this function computes hyperbolic tangent of every
+  element in the tensor. Input range is `[-inf, inf]` and
+  output range is `[-1,1]`.
+
+  ```python
+  x = tf.constant([-float("inf"), -5, -0.5, 1, 1.2, 2, 3, float("inf")])
+  tf.math.tanh(x) ==> [-1. -0.99990916 -0.46211717 0.7615942 0.8336547 0.9640276 0.9950547 1.]
+  ```
+
+#### Operands:
+1. `x`: tensor of floating-point or 64/128-bit complex type values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `y`: tensor of floating-point or 64/128-bit complex type values
+
+### tf.TensorListFromTensor (TF::TensorListFromTensorOp)
 
 Creates a TensorList which, when stacked, has the value of `tensor`.
   
 
-### Description:
+#### Description:
 
 Each tensor in the result list corresponds to one row of the input tensor.
 
 tensor: The input tensor.
 output_handle: The list.
 
-### Operands:
+#### Operands:
 1. `tensor`: tensor of tf.dtype values
 1. `element_shape`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `shape_type` | `Attribute` | derived attribute attribute |
 | `element_dtype` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output_handle`: tensor of TensorFlow variant type values
 
-## tf.TensorListGetItem (TF::TensorListGetItemOp)
+### tf.TensorListGetItem (TF::TensorListGetItemOp)
 
 
-### Description:
+#### Description:
 
 
-### Operands:
+#### Operands:
 1. `input_handle`: tensor of TensorFlow variant type values
 1. `index`: tensor of 32-bit integer values
 1. `element_shape`: tensor of 32-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `element_dtype` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `item`: tensor of tf.dtype values
 
-## tf.TensorListReserve (TF::TensorListReserveOp)
+### tf.TensorListLength (TF::TensorListLengthOp)
+Returns the number of tensors in the input tensor list.
+
+#### Description:
+
+input_handle: the input list
+length: the number of tensors in the list
+
+#### Operands:
+1. `input_handle`: tensor of TensorFlow variant type values
+
+#### Attributes:
+
+#### Results:
+1. `length`: tensor of 32-bit integer values
+
+### tf.TensorListPushBack (TF::TensorListPushBackOp)
+
+Returns a list which has the passed-in `Tensor` as last element and the other elements of the given list in `input_handle`.
+  
+
+#### Description:
+
+tensor: The tensor to put on the list.
+input_handle: The old list.
+output_handle: A list with the elements of the old list followed by tensor.
+element_dtype: the type of elements in the list.
+element_shape: a shape compatible with that of elements in the list.
+
+#### Operands:
+1. `input_handle`: tensor of TensorFlow variant type values
+1. `tensor`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `element_dtype` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output_handle`: tensor of TensorFlow variant type values
+
+### tf.TensorListReserve (TF::TensorListReserveOp)
 List of the given size with empty elements.
 
-### Description:
+#### Description:
 
 element_shape: the shape of the future elements of the list
 num_elements: the number of elements to reserve
 handle: the output list
 element_dtype: the desired type of elements in the list.
 
-### Operands:
+#### Operands:
 1. `element_shape`: tensor of 32/64-bit integer values
 1. `num_elements`: tensor of 32-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
-| `element_dtype` | `TypeAttr` | any type attribute attribute |
 | `shape_type` | `Attribute` | derived attribute attribute |
+| `element_dtype` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `handle`: tensor of TensorFlow variant type values
 
-## tf.TensorListSetItem (TF::TensorListSetItemOp)
+### tf.TensorListResize (TF::TensorListResizeOp)
+Resizes the list.
+
+#### Description:
+
+input_handle: the input list
+size: size of the output list
+
+#### Operands:
+1. `input_handle`: tensor of TensorFlow variant type values
+1. `size`: tensor of 32-bit integer values
+
+#### Attributes:
+
+#### Results:
+1. `output_handle`: tensor of TensorFlow variant type values
+
+### tf.TensorListSetItem (TF::TensorListSetItemOp)
 
 
-### Description:
+#### Description:
 
 
-### Operands:
+#### Operands:
 1. `input_handle`: tensor of TensorFlow variant type values
 1. `index`: tensor of 32-bit integer values
 1. `item`: tensor of tf.dtype values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `element_dtype` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output_handle`: tensor of TensorFlow variant type values
 
-## tf.TensorListStack (TF::TensorListStackOp)
+### tf.TensorListStack (TF::TensorListStackOp)
 Stacks all tensors in the list.
 
-### Description:
+#### Description:
 
 Requires that all tensors have the same shape.
 
@@ -2605,25 +4915,70 @@ input_handle: the input list
 tensor: the gathered result
 num_elements: optional. If not -1, the number of elements in the list.
 
-### Operands:
+#### Operands:
 1. `input_handle`: tensor of TensorFlow variant type values
 1. `element_shape`: tensor of 32-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `num_elements` | `IntegerAttr` | 64-bit integer attribute attribute |
 | `element_dtype` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `tensor`: tensor of tf.dtype values
 
-## tf.TopKV2 (TF::TopKV2Op)
+### tf.Tile (TF::TileOp)
+Constructs a tensor by tiling a given tensor.
+
+#### Description:
+
+This operation creates a new tensor by replicating `input` `multiples` times.
+The output tensor's i'th dimension has `input.dims(i) * multiples[i]` elements,
+and the values of `input` are replicated `multiples[i]` times along the 'i'th
+dimension. For example, tiling `[a b c d]` by `[2]` produces
+`[a b c d a b c d]`.
+
+>>> a = tf.constant([[1,2,3],[4,5,6]], tf.int32)
+>>> b = tf.constant([1,2], tf.int32)
+>>> tf.tile(a, b)
+<tf.Tensor: shape=(2, 6), dtype=int32, numpy=
+array([[1, 2, 3, 1, 2, 3],
+       [4, 5, 6, 4, 5, 6]], dtype=int32)>
+>>> c = tf.constant([2,1], tf.int32)
+>>> tf.tile(a, c)
+<tf.Tensor: shape=(4, 3), dtype=int32, numpy=
+array([[1, 2, 3],
+       [4, 5, 6],
+       [1, 2, 3],
+       [4, 5, 6]], dtype=int32)>
+>>> d = tf.constant([2,2], tf.int32)
+>>> tf.tile(a, d)
+<tf.Tensor: shape=(4, 6), dtype=int32, numpy=
+array([[1, 2, 3, 1, 2, 3],
+       [4, 5, 6, 4, 5, 6],
+       [1, 2, 3, 1, 2, 3],
+       [4, 5, 6, 4, 5, 6]], dtype=int32)>
+
+#### Operands:
+1. `input`: tensor of tf.dtype values
+1. `multiples`: tensor of 32/64-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `Tmultiples` | `Attribute` | derived attribute attribute |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of tf.dtype values
+
+### tf.TopKV2 (TF::TopKV2Op)
 
 Finds values and indices of the `k` largest elements for the last dimension.
   
 
-### Description:
+#### Description:
 
 If the input is a vector (rank-1), finds the `k` largest entries in the vector
 and outputs their values and indices as vectors.  Thus `values[j]` is the
@@ -2636,45 +4991,45 @@ row (resp. vector along the last dimension).  Thus,
 
 If two elements are equal, the lower-index element appears first.
 
-### Operands:
-1. `input`: tensor of 8/16/32/64-bit integer or floating-point values
+#### Operands:
+1. `input`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or floating-point values
 1. `k`: tensor of 32-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `sorted` | `BoolAttr` | bool attribute attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `values`: tensor of 8/16/32/64-bit integer or floating-point values
+#### Results:
+1. `values`: tensor of 8/16/32/64-bit integer or TensorFlow uint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or floating-point values
 1. `indices`: tensor of 32-bit integer values
 
-## tf.Transpose (TF::TransposeOp)
+### tf.Transpose (TF::TransposeOp)
 Shuffle dimensions of x according to a permutation.
 
-### Description:
+#### Description:
 
 The output `y` has the same rank as `x`. The shapes of `x` and `y` satisfy:
   `y.shape[i] == x.shape[perm[i]] for i in [0, 1, ..., rank(x) - 1]`
 
-### Operands:
+#### Operands:
 1. `x`: tensor of tf.dtype values
 1. `perm`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 | `Tperm` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `y`: tensor of tf.dtype values
 
-## tf.TruncateDiv (TF::TruncateDivOp)
+### tf.TruncateDiv (TF::TruncateDivOp)
 Returns x / y element-wise for integer types.
 
-### Description:
+#### Description:
 
 Truncation designates that negative numbers will round fractional quantities
 toward zero. I.e. -7 / 5 = -1. This matches C semantics but it is different
@@ -2684,24 +5039,65 @@ Python Semantics.
 *NOTE*: `TruncateDiv` supports broadcasting. More about broadcasting
 [here](http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html)
 
-### Operands:
-1. `x`: tensor of number values
-1. `y`: tensor of number values
+#### Operands:
+1. `x`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint16 type or TensorFlow uint8 type values
+1. `y`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint16 type or TensorFlow uint8 type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `z`: tensor of number values
+#### Results:
+1. `z`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow uint16 type or TensorFlow uint8 type values
 
-## tf.Unpack (TF::UnpackOp)
+### tf.Unique (TF::UniqueOp)
+Finds unique elements in a 1-D tensor.
+
+#### Description:
+
+This operation returns a tensor `y` containing all of the unique elements of `x`
+sorted in the same order that they occur in `x`; `x` does not need to be sorted.
+This operation also returns a tensor `idx` the same size as `x` that contains 
+the index of each value of `x` in the unique output `y`. In other words:
+
+`y[idx[i]] = x[i] for i in [0, 1,...,rank(x) - 1]`
+
+Examples:
+
+```
+# tensor 'x' is [1, 1, 2, 4, 4, 4, 7, 8, 8]
+y, idx = unique(x)
+y ==> [1, 2, 4, 7, 8]
+idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4]
+```
+
+```
+# tensor 'x' is [4, 5, 1, 2, 3, 3, 4, 5]
+y, idx = unique(x)
+y ==> [4, 5, 1, 2, 3]
+idx ==> [0, 1, 2, 3, 4, 4, 0, 1]
+```
+
+#### Operands:
+1. `x`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+| `out_idx` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `y`: tensor of tf.dtype values
+1. `idx`: tensor of 32/64-bit integer values
+
+### tf.Unpack (TF::UnpackOp)
 
 Unpacks a given dimension of a rank-`R` tensor into `num` rank-`(R-1)` tensors.
   
 
-### Description:
+#### Description:
 
 Unpacks `num` tensors from `value` by chipping it along the `axis` dimension.
 For example, given a tensor of shape `(A, B, C, D)`;
@@ -2716,51 +5112,190 @@ Etc.
 
 This is the opposite of `pack`.
 
-### Operands:
+#### Operands:
 1. `value`: tensor of tf.dtype values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `num` | `IntegerAttr` | 64-bit integer attribute whose minimal value is 0 attribute |
 | `axis` | `IntegerAttr` | 64-bit integer attribute attribute |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of tf.dtype values
 
-## tf.Xdivy (TF::XdivyOp)
+### tf.VariableShape (TF::VariableShapeOp)
+Returns the shape of the variable pointed to by `resource`.
+
+#### Description:
+
+This operation returns a 1-D integer tensor representing the shape of `input`.
+
+For example:
+
+```
+# 't' is [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]
+shape(t) ==> [2, 2, 3]
+```
+
+#### Operands:
+1. `input`: tensor of TensorFlow resource type values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `out_type` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of 32/64-bit integer values
+
+### tf.Where (TF::WhereOp)
+Returns locations of nonzero / true values in a tensor.
+
+#### Description:
+
+This operation returns the coordinates of true elements in `condition`. The
+coordinates are returned in a 2-D tensor where the first dimension (rows)
+represents the number of true elements, and the second dimension (columns)
+represents the coordinates of the true elements. Keep in mind, the shape of
+the output tensor can vary depending on how many true values there are in
+`condition`. Indices are output in row-major order.
+
+For example:
+
+```
+# 'input' tensor is [[True, False]
+#                    [True, False]]
+# 'input' has two true values, so output has two coordinates.
+# 'input' has rank of 2, so coordinates have two indices.
+where(input) ==> [[0, 0],
+                  [1, 0]]
+
+# `condition` tensor is [[[True, False]
+#                     [True, False]]
+#                    [[False, True]
+#                     [False, True]]
+#                    [[False, False]
+#                     [False, True]]]
+# 'input' has 5 true values, so output has 5 coordinates.
+# 'input' has rank of 3, so coordinates have three indices.
+where(input) ==> [[0, 0, 0],
+                  [0, 1, 0],
+                  [1, 0, 1],
+                  [1, 1, 1],
+                  [2, 1, 1]]
+
+# `condition` tensor is [[[1.5,  0.0]
+#                     [-0.5, 0.0]]
+#                    [[0.0,  0.25]
+#                     [0.0,  0.75]]
+#                    [[0.0,  0.0]
+#                     [0.0,  0.01]]]
+# 'input' has 5 nonzero values, so output has 5 coordinates.
+# 'input' has rank of 3, so coordinates have three indices.
+where(input) ==> [[0, 0, 0],
+                  [0, 1, 0],
+                  [1, 0, 1],
+                  [1, 1, 1],
+                  [2, 1, 1]]
+
+# `condition` tensor is [[[1.5 + 0.0j, 0.0  + 0.0j]
+#                     [0.0 + 0.5j, 0.0  + 0.0j]]
+#                    [[0.0 + 0.0j, 0.25 + 1.5j]
+#                     [0.0 + 0.0j, 0.75 + 0.0j]]
+#                    [[0.0 + 0.0j, 0.0  + 0.0j]
+#                     [0.0 + 0.0j, 0.01 + 0.0j]]]
+# 'input' has 5 nonzero magnitude values, so output has 5 coordinates.
+# 'input' has rank of 3, so coordinates have three indices.
+where(input) ==> [[0, 0, 0],
+                  [0, 1, 0],
+                  [1, 0, 1],
+                  [1, 1, 1],
+                  [2, 1, 1]]
+```
+
+#### Operands:
+1. `input`: tensor of bfloat16 type or 16-bit float or 32-bit float or 64-bit float or 1-bit integer or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer or complex type with 64-bit float elements or complex type with 32-bit float elements or TensorFlow qint32 type or TensorFlow qint8 type or TensorFlow quint8 type or TensorFlow uint16 type or TensorFlow uint32 type or TensorFlow uint64 type or TensorFlow uint8 type values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `index`: tensor of 64-bit integer values
+
+### tf.While (TF::WhileOp)
+
+output = input; While (Cond(output)) { output = Body(output) }
+  
+
+#### Description:
+
+output = input; While (Cond(output)) { output = Body(output) }
+
+input: A list of input tensors whose types are T.
+output: A list of output tensors whose types are T.
+cond: A function takes 'input' and returns a tensor.  If the tensor is
+    a scalar of non-boolean, the scalar is converted to a boolean
+    according to the following rule: if the scalar is a numerical
+    value, non-zero means True and zero means False; if the scalar is
+    a string, non-empty means True and empty means False. If the
+    tensor is not a scalar, non-emptiness means True and False
+    otherwise.
+body: A function that takes a list of tensors and returns another
+      list of tensors. Both lists have the same types as specified
+      by T.
+
+#### Operands:
+1. `input`: tensor of tf.dtype values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `cond` | `SymbolRefAttr` | symbol reference attribute attribute |
+| `body` | `SymbolRefAttr` | symbol reference attribute attribute |
+| `output_shapes` | `ArrayAttr` | string array attribute attribute |
+| `parallel_iterations` | `IntegerAttr` | 64-bit integer attribute attribute |
+| `is_stateless` | `BoolAttr` | bool attribute attribute |
+| `T` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of tf.dtype values
+
+### tf.Xdivy (TF::XdivyOp)
 Returns 0 if x == 0, and x / y otherwise, elementwise.
 
-### Description:
+#### Description:
 
 
-### Operands:
-1. `x`: tensor of 16-bit float or 32-bit float or 64-bit float or complex128 type or complex64 type values
-1. `y`: tensor of 16-bit float or 32-bit float or 64-bit float or complex128 type or complex64 type values
+#### Operands:
+1. `x`: tensor of 16-bit float or 32-bit float or 64-bit float or complex type with 64-bit float elements or complex type with 32-bit float elements values
+1. `y`: tensor of 16-bit float or 32-bit float or 64-bit float or complex type with 64-bit float elements or complex type with 32-bit float elements values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
-1. `z`: tensor of 16-bit float or 32-bit float or 64-bit float or complex128 type or complex64 type values
+#### Results:
+1. `z`: tensor of 16-bit float or 32-bit float or 64-bit float or complex type with 64-bit float elements or complex type with 32-bit float elements values
 
-## tf.ZerosLike (TF::ZerosLikeOp)
+### tf.ZerosLike (TF::ZerosLikeOp)
 Returns a tensor of zeros with the same shape and type as x.
 
-### Description:
+#### Description:
 
 
-### Operands:
+#### Operands:
 1. `x`: tensor of tf.dtype values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `T` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `y`: tensor of tf.dtype values
 
diff --git a/tensorflow/compiler/mlir/g3doc/tfl_ops.md b/tensorflow/compiler/mlir/g3doc/tfl_ops.md
index d2ae435cf2e..b31c35d76a5 100644
--- a/tensorflow/compiler/mlir/g3doc/tfl_ops.md
+++ b/tensorflow/compiler/mlir/g3doc/tfl_ops.md
@@ -1,67 +1,121 @@
 <!-- Autogenerated by mlir-tblgen; don't manually edit -->
-# Operation definition
-## tfl.abs (TFL::AbsOp)
+# Dialect 'tfl' definition
+
+The TensorFlow Lite dialect.
+
+This dialect maps to TensorFlow Lite operations.
+
+Invariants:
+
+* All values are of Tensor type (in particular, scalars are
+  represented using zero-dimentional tensors);
+
+[TOC]
+
+## Operation definition
+### tfl.abs (TFL::AbsOp)
 Absolute value operator
 
-### Description:
+#### Description:
 
 Given a tensor `x`, this operation returns a tensor containing the absolute
 value of each element in `x`. For example, if x is an input element and y is
 an output element, this operation computes \\(y = |x|\\).
 
-### Operands:
+#### Operands:
 1. `x`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `y`: tensor of any type values
 
-## tfl.add_n (TFL::AddNOp)
+### tfl.add_n (TFL::AddNOp)
 add_n operator
 
-### Description:
+#### Description:
 
 Adds all input tensors element-wise.
 
-### Operands:
-1. `inputs`: tensor of 32-bit float or 32-bit integer values
+#### Operands:
+1. `inputs`: tensor of 32-bit float or 32-bit integer or QI16 type or QUI16 type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
-1. `sum`: tensor of 32-bit float or 32-bit integer values
+#### Results:
+1. `sum`: tensor of 32-bit float or 32-bit integer or QI16 type or QUI16 type values
 
-## tfl.add (TFL::AddOp)
+### tfl.add (TFL::AddOp)
 Addition operator
 
-### Description:
+#### Description:
 
 Element-wise addition operation.
 
-### Operands:
+#### Operands:
 1. `lhs`: tensor of any type values
 1. `rhs`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `fused_activation_function` | `StringAttr` | fused activation enum attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.average_pool_2d (TFL::AveragePool2DOp)
+### tfl.arg_max (TFL::ArgMaxOp)
+ArgMax operator
+
+#### Description:
+
+Returns the index with the largest value across dimensions of a tensor.
+
+#### Operands:
+1. `input`: tensor of 32-bit float or 32-bit integer or 8-bit integer values
+1. `dim`: tensor of 32/64-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `output_type` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of 32/64-bit integer values
+
+### tfl.arg_min (TFL::ArgMinOp)
+ArgMin operator
+
+#### Description:
+
+Returns the index with the smallest value across dimensions of a tensor."
+  a = [1, 10, 26.9, 2.8, 166.32, 62.3]
+  b = tf.math.argmin(input = a)
+  c = tf.keras.backend.eval(b)
+
+#### Operands:
+1. `input`: tensor of 32-bit float or 32-bit integer or 8-bit integer values
+1. `dim`: tensor of 32/64-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `output_type` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of 32/64-bit integer values
+
+### tfl.average_pool_2d (TFL::AveragePool2DOp)
 Average_pool_2d operator
 
-### Description:
+#### Description:
 
 Performs average-pooling operation on input.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `filter_height` | `IntegerAttr` | 32-bit integer attribute attribute |
@@ -71,64 +125,107 @@ Performs average-pooling operation on input.
 | `stride_w` | `IntegerAttr` | 32-bit integer attribute attribute |
 | `fused_activation_function` | `StringAttr` | fused activation enum attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.batch_to_space_nd (TFL::BatchToSpaceNdOp)
+### tfl.basic_lstm (TFL::BasicLSTMOp)
+The basic lstm operator
+
+#### Description:
+
+basic LSTM Cell Operator.
+
+#### Operands:
+1. `data_input`: tensor of 32-bit float or 8-bit integer or QI8 type or QUI8 type or QI16 type or QUI16 type values
+1. `prev_activ_input`: tensor of 32-bit float or 8-bit integer or QI8 type or QUI8 type or QI16 type or QUI16 type values
+1. `weights_input`: tensor of 32-bit float or 8-bit integer or QI8 type or QUI8 type or QI16 type or QUI16 type values
+1. `biases_input`: tensor of 32-bit float or QI32 type or QUI32 type values
+1. `prev_state_input`: tensor of 32-bit float or 8-bit integer or QI8 type or QUI8 type or QI16 type or QUI16 type values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `fused_activation_function` | `StringAttr` | fused activation enum attribute |
+| `cell_clip` | `FloatAttr` | 32-bit float attribute attribute |
+| `proj_clip` | `FloatAttr` | 32-bit float attribute attribute |
+| `kernel_type` | `StringAttr` | lstm kernel type enum case BASIC attribute |
+
+#### Results:
+1. `activ_output`: 2D tensor of 32-bit float or 8-bit integer or QI8 type or QUI8 type or QI16 type or QUI16 type values
+1. `state_output`: 2D tensor of 32-bit float or 8-bit integer or QI8 type or QUI8 type or QI16 type or QUI16 type values
+1. `concat_temp`: 2D tensor of 32-bit float or 8-bit integer or QI8 type or QUI8 type or QI16 type or QUI16 type values
+1. `activ_temp`: 2D tensor of 32-bit float or 8-bit integer or QI8 type or QUI8 type or QI16 type or QUI16 type values
+
+### tfl.batch_to_space_nd (TFL::BatchToSpaceNdOp)
 BatchToSpaceNd operator
 
-### Description:
+#### Description:
 
 This operation reshapes the "batch" dimension 0 into space dimensions.
 
-### Operands:
-1. `input`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer values
+#### Operands:
+1. `input`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer or QI8 type or QUI8 type values
 1. `block_shape`: tensor of 32-bit integer values
 1. `indices`: tensor of 32-bit integer values
 
-### Attributes:
+#### Attributes:
 
-### Results:
-1. `output`: tensor of 32-bit float or 16-bit integer or 32-bit integer or 64-bit integer values
+#### Results:
+1. `output`: tensor of 32-bit float or 16-bit integer or 32-bit integer or 64-bit integer or QI8 type or QUI8 type values
 
-## tfl.ceil (TFL::CeilOp)
+### tfl.cast (TFL::CastOp)
+Cast operator
+
+#### Description:
+
+Casts input from input type to output type.
+
+#### Operands:
+1. `input`: tensor of 32-bit float or 1-bit integer or 32-bit integer or 64-bit integer values
+
+#### Attributes:
+
+#### Results:
+1. `output`: tensor of 32-bit float or 1-bit integer or 32-bit integer or 64-bit integer values
+
+### tfl.ceil (TFL::CeilOp)
 Ceil operator
 
-### Description:
+#### Description:
 
 Returns element-wise ceil value of the input.
 
-### Operands:
+#### Operands:
 1. `x`: tensor of floating-point values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `y`: tensor of floating-point values
 
-## tfl.concatenation (TFL::ConcatenationOp)
+### tfl.concatenation (TFL::ConcatenationOp)
 Concatenation operator
 
-### Description:
+#### Description:
 
 Concatenates tensors along one dimension
 
-### Operands:
-1. `values`: tensor of 32-bit float or 64-bit integer or 32-bit integer or 16-bit integer or 8-bit integer or quantized type with 8 bits storage type values
+#### Operands:
+1. `values`: tensor of 32-bit float or 64-bit integer or 32-bit integer or 16-bit integer or 8-bit integer or QI8 type or QUI8 type or TFLite uint8 type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `axis` | `IntegerAttr` | 32-bit integer attribute attribute |
 | `fused_activation_function` | `StringAttr` | fused activation enum attribute |
 
-### Results:
-1. `output`: tensor of 32-bit float or 64-bit integer or 32-bit integer or 16-bit integer or 8-bit integer or quantized type with 8 bits storage type values
+#### Results:
+1. `output`: tensor of 32-bit float or 64-bit integer or 32-bit integer or 16-bit integer or 8-bit integer or QI8 type or QUI8 type or TFLite uint8 type values
 
-## tfl.pseudo_const (TFL::ConstOp)
+### tfl.pseudo_const (TFL::ConstOp)
 Constant pseudo op.
 
-### Description:
+#### Description:
 
 Represents a constant value in TensorFlow Lite dialect. This is not an
 actual operation and it will be lowered to buffer instead.
@@ -136,20 +233,20 @@ actual operation and it will be lowered to buffer instead.
 The op is allowed to have all the same type of attributes as tf.Const does
 (e.g., opaque TF attributes are allowed).
 
-### Operands:
+#### Operands:
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `value` | `ElementsAttr` | constant vector/tensor attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.conv_2d (TFL::Conv2DOp)
+### tfl.conv_2d (TFL::Conv2DOp)
 Convolution operator
 
-### Description:
+#### Description:
 
 Performs convolution operation on inputs.
 
@@ -158,12 +255,12 @@ Inputs:
   `inputs[1]`: required: the filter weight tensor
   `inputs[2]`: optional: the bias tensor
 
-### Operands:
+#### Operands:
 1. `input`: tensor of any type values
 1. `filter`: tensor of any type values
-1. `bias`: tensor of any type values
+1. `bias`: tensor of any type values or none type
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `dilation_h_factor` | `IntegerAttr` | 32-bit integer attribute attribute |
@@ -173,28 +270,51 @@ Inputs:
 | `stride_h` | `IntegerAttr` | 32-bit integer attribute attribute |
 | `stride_w` | `IntegerAttr` | 32-bit integer attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.cos (TFL::CosOp)
+### tfl.cos (TFL::CosOp)
 Cosine operator
 
-### Description:
+#### Description:
 
 Computes element-wise Cosine of input
 
-### Operands:
+#### Operands:
 1. `x`: tensor of floating-point values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `y`: tensor of floating-point values
 
-## tfl.depthwise_conv_2d (TFL::DepthwiseConv2DOp)
+### tfl.depth_to_space (TFL::DepthToSpaceOp)
+DepthToSpace operator
+
+#### Description:
+
+Rearranges data from depth into blocks of spatial data.
+This is the reverse transformation of SpaceToDepth. More specifically,
+this op outputs a copy of the input tensor where values from the `depth`
+dimension are moved in spatial blocks to the `height` and `width`
+dimensions. The attr `block_size` indicates the input block size and how
+the data is moved.
+
+#### Operands:
+1. `input`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer or TFLite uint8 type or QUI8 type values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `block_size` | `IntegerAttr` | 32-bit integer attribute attribute |
+
+#### Results:
+1. `output`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer or TFLite uint8 type or QUI8 type values
+
+### tfl.depthwise_conv_2d (TFL::DepthwiseConv2DOp)
 Depthwise-separable convolution operator
 
-### Description:
+#### Description:
 
 Performs convolution operation on inputs.
 
@@ -203,12 +323,12 @@ Inputs:
   `inputs[1]`: required: the filter weight tensor
   `inputs[2]`: optional: the bias tensor
 
-### Operands:
+#### Operands:
 1. `input`: tensor of any type values
 1. `filter`: tensor of any type values
-1. `bias`: tensor of any type values
+1. `bias`: tensor of any type values or none type
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `dilation_h_factor` | `IntegerAttr` | 32-bit integer attribute attribute |
@@ -219,96 +339,112 @@ Inputs:
 | `stride_w` | `IntegerAttr` | 32-bit integer attribute attribute |
 | `depth_multiplier` | `IntegerAttr` | 32-bit integer attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.dequantize (TFL::DequantizeOp)
+### tfl.dequantize (TFL::DequantizeOp)
 Dequantize operator
 
-### Description:
+#### Description:
 
 Converts quantized array of integers to floating-points according to the
 quantization parameters.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.div (TFL::DivOp)
+### tfl.div (TFL::DivOp)
 Division operator
 
-### Description:
+#### Description:
 
 Element-wise division operation.
 
-### Operands:
+#### Operands:
 1. `lhs`: tensor of any type values
 1. `rhs`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `fused_activation_function` | `StringAttr` | fused activation enum attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.elu (TFL::EluOp)
+### tfl.elu (TFL::EluOp)
 Exponential Linear Unit operator
 
-### Description:
+#### Description:
 
 Computes the exponential linear
   f(x) -> exp(x) - 1 for x < 0, x for x >= 0.
 element-wise.
 
-### Operands:
+#### Operands:
 1. `x`: tensor of floating-point values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `y`: tensor of any type values
 
-## tfl.equal (TFL::EqualOp)
+### tfl.embedding_lookup (TFL::EmbeddingLookupOp)
+Embedding lookup operator
+
+#### Description:
+
+Looks up ids in a list of embedding tensors.
+
+#### Operands:
+1. `lookup`: tensor of 32-bit integer values
+1. `value`: tensor of 32-bit float or 8-bit integer or TFLite uint8 type values
+
+#### Attributes:
+
+#### Results:
+1. `output`: tensor of 32-bit float or 8-bit integer or TFLite uint8 type values
+
+### tfl.equal (TFL::EqualOp)
 Equal operator
 
-### Description:
+#### Description:
 
 Returns the truth element of x == y element-wise
 
-### Operands:
-1. `x`: tensor of 1-bit integer or 32-bit float or 32-bit integer or 64-bit integer or 8-bit integer values
-1. `y`: tensor of 1-bit integer or 32-bit float or 32-bit integer or 64-bit integer or 8-bit integer values
+#### Operands:
+1. `x`: tensor of 1-bit integer or 32-bit float or 32-bit integer or 64-bit integer or 8-bit integer or QI8 type or QUI8 type or TFLite uint8 type values
+1. `y`: tensor of 1-bit integer or 32-bit float or 32-bit integer or 64-bit integer or 8-bit integer or QI8 type or QUI8 type or TFLite uint8 type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of 1-bit integer values
 
-## tfl.exp (TFL::ExpOp)
+### tfl.exp (TFL::ExpOp)
 Natural exponentiation operator
 
-### Description:
+#### Description:
 
 Performs element-wise natural exponentiation operation on input.
 
-### Operands:
-1. `x`: tensor of any type values
+#### Operands:
+1. `x`: tensor of floating-point values
 
-### Attributes:
+#### Attributes:
 
-### Results:
-1. `y`: tensor of any type values
+#### Results:
+1. `y`: tensor of floating-point values
 
-## tfl.expand_dims (TFL::ExpandDimsOp)
+### tfl.expand_dims (TFL::ExpandDimsOp)
 Inserts a dimension of 1 into a tensor's shape.
 
-### Description:
+#### Description:
 
 Given a tensor `input`, this operation inserts a dimension of 1 at the
 dimension index `axis` of `input`'s shape. The dimension index `axis` starts at
@@ -341,347 +477,508 @@ This operation requires that:
 This operation is related to `squeeze()`, which removes dimensions of
 size 1.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of any type values
 1. `dim`: tensor of any integer type
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.fake_quant (TFL::FakeQuantOp)
+### tfl.fake_quant (TFL::FakeQuantOp)
 FakeQuant operator
 
-### Description:
+#### Description:
 
 Fake-quantize the 'inputs' tensor of type float via float scalars min and
 max to 'outputs' tensor of same shape as inputs.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `minmax` | `ArrayAttr` | min-max range pair attribute |
 | `num_bits` | `IntegerAttr` | 32-bit integer attribute attribute |
 | `narrow_range` | `BoolAttr` | bool attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.fill (TFL::FillOp)
+### tfl.fill (TFL::FillOp)
 Fill the tensor with given value.
 
-### Description:
+#### Description:
 
 Fill the tensor with given value.
 
-### Operands:
+#### Operands:
 1. `dims`: tensor of 32/64-bit integer values
 1. `value`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `res`: tensor of any type values
 
-## tfl.floor_div (TFL::FloorDivOp)
+### tfl.floor_div (TFL::FloorDivOp)
 Floor div operator
 
-### Description:
+#### Description:
 
 Element-wise floor div operation.
 
-### Operands:
+#### Operands:
 1. `lhs`: tensor of any type values
 1. `rhs`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.floor_mod (TFL::FloorModOp)
+### tfl.floor_mod (TFL::FloorModOp)
 Division reminder
 
-### Description:
+#### Description:
 
 Element-wise division reminder operation.
 
-### Operands:
-1. `lhs`: tensor of any type values
-1. `rhs`: tensor of any type values
+#### Operands:
+1. `lhs`: tensor of 32-bit integer or 64-bit integer or 32-bit float values
+1. `rhs`: tensor of 32-bit integer or 64-bit integer or 32-bit float values
 
-### Attributes:
+#### Attributes:
 
-### Results:
-1. `output`: tensor of any type values
+#### Results:
+1. `output`: tensor of 32-bit integer or 64-bit integer or 32-bit float values
 
-## tfl.floor (TFL::FloorOp)
+### tfl.floor (TFL::FloorOp)
 Floor operator
 
-### Description:
+#### Description:
 
 Returns element-wise floor value of the input.
 
-### Operands:
+#### Operands:
 1. `x`: tensor of floating-point values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `y`: tensor of floating-point values
 
-## tfl.fully_connected (TFL::FullyConnectedOp)
+### tfl.fully_connected (TFL::FullyConnectedOp)
 Fully connected op
 
-### Description:
+#### Description:
 
 
-### Operands:
-1. `input`: tensor of 32-bit float values
-1. `filter`: tensor of 32-bit float values
-1. `bias`: tensor of 32-bit float values or none type
+#### Operands:
+1. `input`: tensor of 32-bit float or QI8 type or QUI8 type or QI16 type or QUI16 type values
+1. `filter`: tensor of 32-bit float or QI8 type or QUI8 type or QI16 type or QUI16 type values
+1. `bias`: tensor of 32-bit float or QI32 type or QUI32 type values or none type
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `fused_activation_function` | `StringAttr` | fused activation enum attribute |
 | `weights_format` | `StringAttr` | fully connected options weights format attribute |
 | `keep_num_dims` | `BoolAttr` | bool attribute attribute |
 
-### Results:
-1. `output`: tensor of 32-bit float values
+#### Results:
+1. `output`: tensor of 32-bit float or QI8 type or QUI8 type or QI16 type or QUI16 type values
 
-## tfl.gather (TFL::GatherOp)
+### tfl.gather_nd (TFL::GatherNdOp)
+Gather_nd operator
+
+#### Description:
+
+Gather slices from `params` into a Tensor with shape specified by `indices`.
+
+#### Operands:
+1. `params`: tensor of 32-bit float or 8-bit integer or 64-bit integer or 32-bit integer or TFLite uint8 type values
+1. `indices`: tensor of 32/64-bit integer values
+
+#### Attributes:
+
+#### Results:
+1. `output`: tensor of 32-bit float or 8-bit integer or 64-bit integer or 32-bit integer or TFLite uint8 type values
+
+### tfl.gather (TFL::GatherOp)
 Gather operator
 
-### Description:
+#### Description:
 
 Gather slices from `params` axis `axis` according to `indices`.
 
-### Operands:
-1. `params`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer or TFLite string type values
+#### Operands:
+1. `params`: tensor of 32-bit float or 1-bit integer or 8-bit integer or 32-bit integer or 64-bit integer or TFLite string type or QI8 type or QUI8 type values
 1. `indices`: tensor of 32-bit integer or 64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `axis` | `IntegerAttr` | 32-bit integer attribute attribute |
 
-### Results:
-1. `output`: tensor of 32-bit float or 16-bit integer or 32-bit integer or 64-bit integer or TFLite string type values
+#### Results:
+1. `output`: tensor of 32-bit float or 1-bit integer or 8-bit integer or 32-bit integer or 64-bit integer or TFLite string type or QI8 type or QUI8 type values
 
-## tfl.greater_equal (TFL::GreaterEqualOp)
+### tfl.greater_equal (TFL::GreaterEqualOp)
 Greater_equal operator
 
-### Description:
+#### Description:
 
 Element-wise greater_equal operation.
 
-### Operands:
+#### Operands:
 1. `lhs`: tensor of any type values
 1. `rhs`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of 1-bit integer values
 
-## tfl.greater (TFL::GreaterOp)
+### tfl.greater (TFL::GreaterOp)
 Greater operator
 
-### Description:
+#### Description:
 
 Element-wise greater operation.
 
-### Operands:
+#### Operands:
 1. `lhs`: tensor of any type values
 1. `rhs`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.pseudo_input (TFL::InputOp)
+### tfl.hard_swish (TFL::HardSwishOp)
+Hardswish activation function.
+
+#### Description:
+
+Computes hard-swish activation function
+  f(x) -> (x * relu6(x+3))/6
+element-wise.
+
+#### Operands:
+1. `input`: tensor of 32-bit float or QUI8 type or QI8 type values
+
+#### Attributes:
+
+#### Results:
+1. `out`: tensor of 32-bit float or QUI8 type or QI8 type values
+
+### tfl.pseudo_input (TFL::InputOp)
 Input pseudo operator
 
-### Description:
+#### Description:
 
 Takes one of the function arguments as input and returns it as result.  This
 is a NOP and is used to attach attributes such as tensor name to function
 arguments.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.leaky_relu (TFL::LeakyReluOp)
+### tfl.l2_normalization (TFL::L2NormalizationOp)
+L2 Normalize Operator
+
+#### Description:
+
+L2Normalization Op
+
+#### Operands:
+1. `input`: tensor of 32-bit float or QUI8 type or QI8 type or QUI16 type or QI16 type or 8-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `fused_activation_function` | `StringAttr` | fused activation enum attribute |
+
+#### Results:
+1. `output`: tensor of 32-bit float or QUI8 type or QI8 type or QUI16 type or QI16 type or 8-bit integer values
+
+### tfl.lstm (TFL::LSTMOp)
+The full lstm operator
+
+#### Description:
+
+Long short-term memory unit (LSTM) recurrent network layer.
+The default non-peephole implementation is based on:
+http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf
+S. Hochreiter and J. Schmidhuber. "Long Short-Term Memory". Neural Computation,
+9(8):1735-1780, 1997.
+The peephole implementation is based on:
+https://research.google.com/pubs/archive/43905.pdf
+Hasim Sak, Andrew Senior, and Francoise Beaufays. "Long short-term memory
+recurrent neural network architectures for large scale acoustic modeling.
+INTERSPEECH, 2014.
+The coupling of input and forget gate (CIFG) is based on:
+http://arxiv.org/pdf/1503.04069.pdf
+Greff et al. "LSTM: A Search Space Odyssey"
+The layer normalization is based on:
+https://arxiv.org/pdf/1607.06450.pdf
+Ba et al. “Layer Normalization”
+
+#### Operands:
+1. `input`: tensor of 32-bit float values
+1. `input_to_input_weights`: tensor of 32-bit float or 8-bit integer values or none type
+1. `input_to_forget_weights`: tensor of 32-bit float or 8-bit integer values
+1. `input_to_cell_weights`: tensor of 32-bit float or 8-bit integer values
+1. `input_to_output_weights`: tensor of 32-bit float or 8-bit integer values
+1. `recurrent_to_input_weights`: tensor of 32-bit float or 8-bit integer values or none type
+1. `recurrent_to_forget_weights`: tensor of 32-bit float or 8-bit integer values
+1. `recurrent_to_cell_weights`: tensor of 32-bit float or 8-bit integer values
+1. `recurrent_to_output_weights`: tensor of 32-bit float or 8-bit integer values
+1. `cell_to_input_weights`: tensor of 32-bit float or 8-bit integer values or none type
+1. `cell_to_forget_weights`: tensor of 32-bit float or 8-bit integer values or none type
+1. `cell_to_output_weights`: tensor of 32-bit float or 8-bit integer values or none type
+1. `input_gate_bias`: tensor of 32-bit float values or none type
+1. `forget_gate_bias`: tensor of 32-bit float values
+1. `cell_bias`: tensor of 32-bit float values
+1. `output_gate_bias`: tensor of 32-bit float values
+1. `projection_weights`: tensor of 32-bit float or 8-bit integer values or none type
+1. `projection_bias`: tensor of 32-bit float values or none type
+1. `input_activation_state`: stateful tensor
+1. `input_cell_state`: stateful tensor
+1. `input_layer_norm_coefficients`: tensor of 32-bit float values or none type
+1. `forget_layer_norm_coefficients`: tensor of 32-bit float values or none type
+1. `cell_layer_norm_coefficients`: tensor of 32-bit float values or none type
+1. `output_layer_norm_coefficients`: tensor of 32-bit float values or none type
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `fused_activation_function` | `StringAttr` | fused activation enum attribute |
+| `cell_clip` | `FloatAttr` | 32-bit float attribute attribute |
+| `proj_clip` | `FloatAttr` | 32-bit float attribute attribute |
+| `kernel_type` | `StringAttr` | lstm kernel type enum case FULL attribute |
+
+#### Results:
+1. `output`: tensor of any type values
+
+### tfl.leaky_relu (TFL::LeakyReluOp)
 Leaky Relu operator
 
-### Description:
+#### Description:
 
 Element-wise Leaky ReLU operator
   x -> x >= 0 ? x : (alpha * x)
 
-### Operands:
+#### Operands:
 1. `input`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `alpha` | `FloatAttr` | 32-bit float attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.less_equal (TFL::LessEqualOp)
+### tfl.less_equal (TFL::LessEqualOp)
 Less_equal operator
 
-### Description:
+#### Description:
 
 Element-wise less_equal operation.
 
-### Operands:
-1. `lhs`: tensor of 32-bit float or 32-bit integer or 64-bit integer or 8-bit integer values
-1. `rhs`: tensor of 32-bit float or 32-bit integer or 64-bit integer or 8-bit integer values
+#### Operands:
+1. `lhs`: tensor of 32-bit float or 32-bit integer or 64-bit integer or 8-bit integer or QI8 type or QUI8 type or TFLite uint8 type values
+1. `rhs`: tensor of 32-bit float or 32-bit integer or 64-bit integer or 8-bit integer or QI8 type or QUI8 type or TFLite uint8 type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of 1-bit integer values
 
-## tfl.less (TFL::LessOp)
+### tfl.less (TFL::LessOp)
 Less operator
 
-### Description:
+#### Description:
 
 Element-wise less operation.
 
-### Operands:
+#### Operands:
 1. `lhs`: tensor of any type values
 1. `rhs`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of 1-bit integer values
 
-## tfl.log (TFL::LogOp)
+### tfl.local_response_normalization (TFL::LocalResponseNormalizationOp)
+Local Response Normalization.
+
+#### Description:
+
+The 4-D `input` tensor is treated as a 3-D array of 1-D vectors (along the last
+dimension), and each vector is normalized independently.  Within a given vector,
+each component is divided by the weighted, squared sum of inputs within
+`depth_radius`.  In detail,
+
+    sqr_sum[a, b, c, d] =
+        sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2)
+    output = input / (bias + alpha * sqr_sum) ** beta
+
+For details, see [Krizhevsky et al., ImageNet classification with deep
+convolutional neural networks (NIPS 2012)](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
+
+#### Operands:
+1. `input`: tensor of 32-bit float or QI8 type or QUI8 type values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `radius` | `IntegerAttr` | 32-bit integer attribute attribute |
+| `bias` | `FloatAttr` | 32-bit float attribute attribute |
+| `alpha` | `FloatAttr` | 32-bit float attribute attribute |
+| `beta` | `FloatAttr` | 32-bit float attribute attribute |
+
+#### Results:
+1. `output`: tensor of 32-bit float or QI8 type or QUI8 type values
+
+### tfl.log (TFL::LogOp)
 Natural logarithm operator
 
-### Description:
+#### Description:
 
 Performs element-wise natural logarithm operation on input.
 
-### Operands:
+#### Operands:
 1. `x`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `y`: tensor of any type values
 
-## tfl.log_softmax (TFL::LogSoftmaxOp)
+### tfl.log_softmax (TFL::LogSoftmaxOp)
 Log softmax operator
 
-### Description:
+#### Description:
 
 Computes element-wise log softmax activations with the following formula
 
   input - log(reduce_sum(exp(input), dim))
 
-### Operands:
+#### Operands:
 1. `input`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.logical_and (TFL::LogicalAndOp)
+### tfl.logical_and (TFL::LogicalAndOp)
 Logical AND operator
 
-### Description:
+#### Description:
 
 Element-wise logical AND operation.
 
-### Operands:
+#### Operands:
 1. `lhs`: tensor of 1-bit integer values
 1. `rhs`: tensor of 1-bit integer values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of 1-bit integer values
 
-## tfl.logical_not (TFL::LogicalNotOp)
+### tfl.logical_not (TFL::LogicalNotOp)
 Logical NOT operator
 
-### Description:
+#### Description:
 
 Element-wise logical NOT operation.
 
-### Operands:
+#### Operands:
 1. `lhs`: tensor of 1-bit integer values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of 1-bit integer values
 
-## tfl.logical_or (TFL::LogicalOrOp)
+### tfl.logical_or (TFL::LogicalOrOp)
 Logical OR operator
 
-### Description:
+#### Description:
 
 Element-wise logical OR operation.
 
-### Operands:
+#### Operands:
 1. `lhs`: tensor of 1-bit integer values
 1. `rhs`: tensor of 1-bit integer values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of 1-bit integer values
 
-## tfl.logistic (TFL::LogisticOp)
+### tfl.logistic (TFL::LogisticOp)
 Logistic operator
 
-### Description:
+#### Description:
 
 Computes element-wise Sigmoid of input
 
-### Operands:
-1. `x`: tensor of floating-point values
+#### Operands:
+1. `x`: tensor of floating-point or QI8 type or QUI8 type or QI16 type or QUI16 type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
-1. `y`: tensor of floating-point values
+#### Results:
+1. `y`: tensor of floating-point or QI8 type or QUI8 type or QI16 type or QUI16 type values
 
-## tfl.max_pool_2d (TFL::MaxPool2DOp)
+### tfl.matrix_diag (TFL::MatrixDiagOp)
+
+    Returns a tensor with the provided diagonal and everything else padded with zeros.
+  
+
+#### Description:
+
+Given a diagonal, returns a tensor with the diagonal and everything else padded with zeros.
+Assume diagonal has k dimensions `[I, J, K, ..., N]`, then the output is a tensor of rank `k+1`
+with dimensions `[I, J, K, ..., N, N]` where:
+   `output[i, j, k, ..., m, n] = 1{m=n} * diagonal[i, j, k, ..., n].`
+
+#### Operands:
+1. `diagonal`: tensor of 32-bit float or 8-bit integer or 64-bit integer or 32-bit integer or TFLite uint8 type values
+
+#### Attributes:
+
+#### Results:
+1. `output`: tensor of 32-bit float or 8-bit integer or 64-bit integer or 32-bit integer or TFLite uint8 type values
+
+### tfl.max_pool_2d (TFL::MaxPool2DOp)
 Max Pool 2D op
 
-### Description:
+#### Description:
 
 Performs max pool 2D on input.
 
 Inputs:
   `inputs[0]`: required: the input tensor
 
-### Operands:
+#### Operands:
 1. `input`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `padding` | `StringAttr` | padding enum attribute |
@@ -691,29 +988,29 @@ Inputs:
 | `filter_height` | `IntegerAttr` | 32-bit integer attribute attribute |
 | `fused_activation_function` | `StringAttr` | fused activation enum attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.maximum (TFL::MaximumOp)
+### tfl.maximum (TFL::MaximumOp)
 Max operator
 
-### Description:
+#### Description:
 
 Element-wise max operation.
 
-### Operands:
-1. `lhs`: tensor of floating-point or 32/64-bit integer values
-1. `rhs`: tensor of floating-point or 32/64-bit integer values
+#### Operands:
+1. `lhs`: tensor of floating-point or 32/64-bit integer or QI8 type or QUI8 type values
+1. `rhs`: tensor of floating-point or 32/64-bit integer or QI8 type or QUI8 type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
-1. `max`: tensor of floating-point or 32/64-bit integer values
+#### Results:
+1. `max`: tensor of floating-point or 32/64-bit integer or QI8 type or QUI8 type values
 
-## tfl.mean (TFL::MeanOp)
+### tfl.mean (TFL::MeanOp)
 Mean operator
 
-### Description:
+#### Description:
 
 Computes the mean of elements across dimensions of a tensor.
 Reduces input_tensor along the dimensions given in axis.
@@ -721,88 +1018,251 @@ Unless keepdims is true, the rank of the tensor is reduced by 1 for
 each entry in axis. If keepdims is true, the reduced dimensions are retained
 with length 1.
 
-### Operands:
-1. `input`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer values
+#### Operands:
+1. `input`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer or QI8 type or QUI8 type or TFLite uint8 type values
 1. `axis`: tensor of 32-bit integer or 64-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `keep_dims` | `BoolAttr` | bool attribute attribute |
 
-### Results:
-1. `output`: tensor of 32-bit float or 32-bit integer or 64-bit integer or 8-bit integer values
+#### Results:
+1. `output`: tensor of 32-bit float or 32-bit integer or 64-bit integer or 8-bit integer or QI8 type or QUI8 type or TFLite uint8 type values
 
-## tfl.minimum (TFL::MinimumOp)
+### tfl.minimum (TFL::MinimumOp)
 Min operator
 
-### Description:
+#### Description:
 
 Element-wise min operation.
 
-### Operands:
-1. `lhs`: tensor of floating-point or 32/64-bit integer values
-1. `rhs`: tensor of floating-point or 32/64-bit integer values
+#### Operands:
+1. `lhs`: tensor of floating-point or 32/64-bit integer or QI8 type or QUI8 type values
+1. `rhs`: tensor of floating-point or 32/64-bit integer or QI8 type or QUI8 type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
-1. `min`: tensor of floating-point or 32/64-bit integer values
+#### Results:
+1. `min`: tensor of floating-point or 32/64-bit integer or QI8 type or QUI8 type values
 
-## tfl.mul (TFL::MulOp)
+### tfl.mirror_pad (TFL::MirrorPadOp)
+MirrorPad Operator. Pads a tensor with mirrored values.
+
+#### Description:
+
+This operation pads a input with mirrored values according to the paddings
+you specify. paddings is an integer tensor with shape [n, 2],
+where n is the rank of input.
+For each dimension D of input, paddings[D, 0] indicates how many values
+to add before the contents of input in that dimension,
+and paddings[D, 1] indicates how many values to add after the contents of
+input in that dimension.
+
+Both paddings[D, 0] and paddings[D, 1] must be no greater than
+input.dim_size(D) (or input.dim_size(D) - 1)
+if copy_border is true (if false, respectively).
+
+The padded size of each dimension D of the output is:
+
+paddings(D, 0) + input.dim_size(D) + paddings(D, 1)
+
+#### Operands:
+1. `input`: tensor of 32-bit float or 32-bit integer or 64-bit integer values
+1. `pad`: tensor of 32-bit integer or 64-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `mode` | `StringAttr` | Mirror pad enum attribute |
+
+#### Results:
+1. `output`: tensor of 32-bit float or 32-bit integer or 64-bit integer values
+
+### tfl.mul (TFL::MulOp)
 Multiplication operator
 
-### Description:
+#### Description:
 
 Element-wise multiplication operation.
 
-### Operands:
+#### Operands:
 1. `lhs`: tensor of any type values
 1. `rhs`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `fused_activation_function` | `StringAttr` | fused activation enum attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.neg (TFL::NegOp)
+### tfl.neg (TFL::NegOp)
 Negation operator
 
-### Description:
+#### Description:
 
 Computes element-wise negation of input
 
-### Operands:
+#### Operands:
 1. `x`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `y`: tensor of any type values
 
-## tfl.not_equal (TFL::NotEqualOp)
+### tfl.non_max_suppression_v4 (TFL::NonMaxSuppressionV4Op)
+
+Greedily selects a subset of bounding boxes in descending order of score,
+  
+
+#### Description:
+
+pruning away boxes that have high intersection-over-union (IOU) overlap
+with previously selected boxes.  Bounding boxes with score less than
+`score_threshold` are removed.  Bounding boxes are supplied as
+[y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
+diagonal pair of box corners and the coordinates can be provided as normalized
+(i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
+is agnostic to where the origin is in the coordinate system and more
+generally is invariant to orthogonal transformations and translations
+of the coordinate system; thus translating or reflections of the coordinate
+system result in the same boxes being selected by the algorithm.
+The output of this operation is a set of integers indexing into the input
+collection of bounding boxes representing the selected boxes.  The bounding
+box coordinates corresponding to the selected indices can then be obtained
+using the `tf.gather operation`.  For example:
+  selected_indices = tf.image.non_max_suppression_v2(
+      boxes, scores, max_output_size, iou_threshold, score_threshold)
+  selected_boxes = tf.gather(boxes, selected_indices)
+
+#### Operands:
+1. `boxes`: tensor of floating-point values
+1. `scores`: tensor of floating-point values
+1. `max_output_size`: tensor of 32-bit integer values
+1. `iou_threshold`: tensor of floating-point values
+1. `score_threshold`: tensor of floating-point values
+
+#### Attributes:
+
+#### Results:
+1. `selected_indices`: tensor of 32-bit integer values
+1. `valid_outputs`: tensor of 32-bit integer values
+
+### tfl.non_max_suppression_v5 (TFL::NonMaxSuppressionV5Op)
+
+Greedily selects a subset of bounding boxes in descending order of score,
+  
+
+#### Description:
+
+pruning away boxes that have high intersection-over-union (IOU) overlap
+with previously selected boxes.  Bounding boxes with score less than
+`score_threshold` are removed.  Bounding boxes are supplied as
+[y1, x1, y2, x2], where (y1, x1) and (y2, x2) are the coordinates of any
+diagonal pair of box corners and the coordinates can be provided as normalized
+(i.e., lying in the interval [0, 1]) or absolute.  Note that this algorithm
+is agnostic to where the origin is in the coordinate system and more
+generally is invariant to orthogonal transformations and translations
+of the coordinate system; thus translating or reflections of the coordinate
+system result in the same boxes being selected by the algorithm.
+The output of this operation is a set of integers indexing into the input
+collection of bounding boxes representing the selected boxes.  The bounding
+box coordinates corresponding to the selected indices can then be obtained
+using the `tf.gather operation`.  For example:
+  selected_indices = tf.image.non_max_suppression_v2(
+      boxes, scores, max_output_size, iou_threshold, score_threshold)
+  selected_boxes = tf.gather(boxes, selected_indices)
+This op also supports a Soft-NMS (with Gaussian weighting) mode (c.f.
+Bodla et al, https://arxiv.org/abs/1704.04503) where boxes reduce the score
+of other overlapping boxes instead of directly causing them to be pruned.
+To enable this Soft-NMS mode, set the `soft_nms_sigma` parameter to be
+larger than 0.
+
+#### Operands:
+1. `boxes`: tensor of floating-point values
+1. `scores`: tensor of floating-point values
+1. `max_output_size`: tensor of 32-bit integer values
+1. `iou_threshold`: tensor of floating-point values
+1. `score_threshold`: tensor of floating-point values
+1. `soft_nms_sigma`: tensor of floating-point values
+
+#### Attributes:
+
+#### Results:
+1. `selected_indices`: tensor of 32-bit integer values
+1. `selected_scores`: tensor of floating-point values
+1. `valid_outputs`: tensor of 32-bit integer values
+
+### tfl.not_equal (TFL::NotEqualOp)
 Not_equal operator
 
-### Description:
+#### Description:
 
 Element-wise not_equal operation.
 
-### Operands:
+#### Operands:
 1. `lhs`: tensor of any type values
 1. `rhs`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of 1-bit integer values
 
-## tfl.pack (TFL::PackOp)
+### tfl.one_hot (TFL::OneHotOp)
+OneHot operator
+
+#### Description:
+
+Returns a one-hot tensor.The locations represented by indices in `indices`
+take value `on_value`, while all other locations take value `off_value`.
+
+If the input `indices` is rank `N`, the output will have rank `N+1`,
+The new axis is created at dimension `axis` (default: the new axis is
+appended at the end).
+
+#### Operands:
+1. `indices`: tensor of 32-bit integer or 64-bit integer values
+1. `depth`: tensor of 32-bit integer values
+1. `on_value`: tensor of 32-bit float or 32-bit integer or 64-bit integer or 1-bit integer values
+1. `off_value`: tensor of 32-bit float or 32-bit integer or 64-bit integer or 1-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `axis` | `IntegerAttr` | 32-bit integer attribute attribute |
+
+#### Results:
+1. `output`: tensor of 32-bit float or 32-bit integer or 64-bit integer or 1-bit integer values
+
+### tfl.prelu (TFL::PReluOp)
+Parameterized Relu operator
+
+#### Description:
+
+Parameterized Relu operator
+  x -> x >= 0 ? x : (alpha * x)
+where alpha is a trainable tensor.
+alpha should have one less rank than the input as it doesn't have the batch
+dimension, and the other dimensions either should be the same size as input
+or size 1, where it is broadcasted in the second case.
+
+#### Operands:
+1. `input`: tensor of 32-bit float or QUI8 type values
+1. `alpha`: tensor of 32-bit float or QUI8 type values
+
+#### Attributes:
+
+#### Results:
+1. `output`: tensor of 32-bit float or QUI8 type values
+
+### tfl.pack (TFL::PackOp)
 Packs a list of tensors along a dimension into one tensor
 
-### Description:
+#### Description:
 
 Packs a list of `values_count` rank-`R` tensors into one rank-`(R+1)`
 tensor.
@@ -829,22 +1289,22 @@ pack([x, y, z], axis=1) => [[1, 2, 3], [4, 5, 6]]
 
 This is the opposite of `unpack`.
 
-### Operands:
-1. `values`: tensor of 32-bit float or 8-bit integer or 16-bit integer or 32-bit integer or 64-bit integer values
+#### Operands:
+1. `values`: tensor of 32-bit float or 8-bit integer or 16-bit integer or 32-bit integer or 64-bit integer or QI8 type or QUI8 type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `values_count` | `IntegerAttr` | 32-bit integer attribute attribute |
 | `axis` | `IntegerAttr` | 32-bit integer attribute attribute |
 
-### Results:
-1. `output`: tensor of 32-bit float or 8-bit integer or 16-bit integer or 32-bit integer or 64-bit integer values
+#### Results:
+1. `output`: tensor of 32-bit float or 8-bit integer or 16-bit integer or 32-bit integer or 64-bit integer or QI8 type or QUI8 type values
 
-## tfl.pad (TFL::PadOp)
+### tfl.pad (TFL::PadOp)
 Padding operator
 
-### Description:
+#### Description:
 
 This operation pads a `input` with zeros according to the `paddings` you
 specify. `paddings` is an integer tensor with shape `[Dn, 2]`, where n is
@@ -869,19 +1329,19 @@ pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
                       [0, 0, 0, 0, 0, 0]]
 ```
 
-### Operands:
-1. `input`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer values
+#### Operands:
+1. `input`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer or QI8 type or QUI8 type values
 1. `padding`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 
-### Results:
-1. `output`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer values
+#### Results:
+1. `output`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer or QI8 type or QUI8 type values
 
-## tfl.padv2 (TFL::PadV2Op)
+### tfl.padv2 (TFL::PadV2Op)
 Padding operator v2
 
-### Description:
+#### Description:
 
 This operation pads a `input` according to the `paddings` and
 `constant_values` you specify. `paddings` is an integer tensor with shape
@@ -908,216 +1368,305 @@ pad(t, paddings) ==> [[0, 0, 0, 0, 0, 0]
                       [0, 0, 0, 0, 0, 0]]
 ```
 
-### Operands:
-1. `input`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer values
+#### Operands:
+1. `input`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer or QI8 type or QUI8 type values
 1. `padding`: tensor of 32/64-bit integer values
 1. `constant_values`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer values
 
-### Attributes:
+#### Attributes:
 
-### Results:
-1. `output`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer values
+#### Results:
+1. `output`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer or QI8 type or QUI8 type values
 
-## tfl.pow (TFL::PowOp)
+### tfl.pow (TFL::PowOp)
 Power operator
 
-### Description:
+#### Description:
 
 Element-wise power operation.
 
-### Operands:
+#### Operands:
 1. `lhs`: tensor of any type values
 1. `rhs`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.pseudo_qconst (TFL::QConstOp)
+### tfl.pseudo_qconst (TFL::QConstOp)
 Quantized constant pseudo op
 
-### Description:
+#### Description:
 
 Represents a quantized constant value in TensorFlow Lite dialect. This is
 not an actual operation and it will be lowered to buffer instead. The
 quantization parameters are stored as a type attribute in this constant.
 
-### Operands:
+#### Operands:
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `qtype` | `TypeAttr` | Tensor type attribute attribute |
 | `value` | `ElementsAttr` | constant vector/tensor attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.quantize (TFL::QuantizeOp)
+### tfl.quantize (TFL::QuantizeOp)
 Quantize operator
 
-### Description:
+#### Description:
 
 Converts floating point tensors to quantized integer tensors according to
 the quantization parameters defined in the type attribute.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `qtype` | `TypeAttr` | Tensor type attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.range (TFL::RangeOp)
+### tfl.range (TFL::RangeOp)
 Range operator
 
-### Description:
+#### Description:
 
 Returns a 1D tensor defined by a sequence from `start` to `limit` with
 a given `delta`.
 
-### Operands:
+#### Operands:
 1. `start`: tensor of any type values
 1. `limit`: tensor of any type values
 1. `delta`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `result`: tensor of any type values
 
-## tfl.rank (TFL::RankOp)
+### tfl.rank (TFL::RankOp)
 Rank operator.
 
-### Description:
+#### Description:
 
 Returns the rank of a tensor.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of any integer type
 
-## tfl.reduce_max (TFL::ReduceMaxOp)
+### tfl.reduce_any (TFL::ReduceAnyOp)
+
+Computes the "logical or" of elements across dimensions of a tensor.
+  
+
+#### Description:
+
+Reduces `input` along the dimensions given in `axis`. Unless
+`keep_dims` is true, the rank of the tensor is reduced by 1 for each entry in
+`axis`. If `keep_dims` is true, the reduced dimensions are
+retained with length 1.
+
+#### Operands:
+1. `input`: tensor of 1-bit integer values
+1. `reduction_indices`: tensor of 32-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `keep_dims` | `BoolAttr` | bool attribute attribute |
+
+#### Results:
+1. `output`: tensor of 1-bit integer values
+
+### tfl.reduce_max (TFL::ReduceMaxOp)
 Max-reduction operator
 
-### Description:
+#### Description:
 
 Computes the max reduction along the specified axes
 
-### Operands:
+#### Operands:
 1. `input`: tensor of any type values
-1. `axes`: tensor of 32/64-bit integer values
+1. `axes`: tensor of 32-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `keep_dims` | `BoolAttr` | bool attribute attribute |
 
-### Results:
+#### Results:
 1. &laquo;unnamed&raquo;: tensor of any type values
 
-## tfl.reduce_min (TFL::ReduceMinOp)
+### tfl.reduce_min (TFL::ReduceMinOp)
 Min-reduction operator
 
-### Description:
+#### Description:
 
 Computes the min reduction along the specified axes
 
-### Operands:
+#### Operands:
 1. `input`: tensor of any type values
-1. `axes`: tensor of 32/64-bit integer values
+1. `axes`: tensor of 32-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `keep_dims` | `BoolAttr` | bool attribute attribute |
 
-### Results:
+#### Results:
 1. &laquo;unnamed&raquo;: tensor of any type values
 
-## tfl.relu6 (TFL::Relu6Op)
+### tfl.reduce_prod (TFL::ReduceProdOp)
+Prod-reduction operator
+
+#### Description:
+
+Computes the product along the specified axes
+
+#### Operands:
+1. `input`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer values
+1. `axes`: tensor of 32-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `keep_dims` | `BoolAttr` | bool attribute attribute |
+
+#### Results:
+1. &laquo;unnamed&raquo;: tensor of any type values
+
+### tfl.relu6 (TFL::Relu6Op)
 Relu6 operator
 
-### Description:
+#### Description:
 
 Element-wise Relu6 operator
   x -> max(0, min(6, x))
 
-### Operands:
+#### Operands:
 1. `x`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `y`: tensor of any type values
 
-## tfl.relu (TFL::ReluOp)
+### tfl.relu (TFL::ReluOp)
 Relu operator
 
-### Description:
+#### Description:
 
 Element-wise Relu operator
   x -> max(0, x)
 
-### Operands:
+#### Operands:
 1. `x`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `y`: tensor of any type values
 
-## tfl.reshape (TFL::ReshapeOp)
+### tfl.reshape (TFL::ReshapeOp)
 Reshape operator
 
-### Description:
+#### Description:
 
 Produces a tensor with the same values but different static shape defined
 by the output type.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of any type values
+1. `shape`: tensor of 32-bit integer values
 
-### Attributes:
-| Attribute | MLIR Type | Description |
-| :-------: | :-------: | ----------- |
-| `new_shape` | `Attribute` | derived attribute attribute |
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.resize_bilinear (TFL::ResizeBilinearOp)
+### tfl.resize_bilinear (TFL::ResizeBilinearOp)
 ResizeBilinear Op
 
-### Description:
+#### Description:
 
 Resize `images` to `size` using bilinear interpolation.
 
-### Operands:
-1. `input`: tensor of 32-bit float or 32-bit integer values
+#### Operands:
+1. `input`: tensor of 32-bit float or 32-bit integer or QI8 type or QUI8 type values
 1. `size`: tensor of 32-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `align_corners` | `BoolAttr` | bool attribute attribute |
 
-### Results:
-1. `output`: tensor of 32-bit float values
+#### Results:
+1. `output`: tensor of 32-bit float or QI8 type or QUI8 type values
 
-## tfl.reverse_v2 (TFL::ReverseV2Op)
+### tfl.resize_nearest_neighbor (TFL::ResizeNearestNeighborOp)
+ResizeNearestNeighbor Op
+
+#### Description:
+
+Resize `images` to `size` using nearest neighbor interpolation.
+
+#### Operands:
+1. `input`: tensor of 32-bit float or 8-bit integer or TFLite uint8 type or QUI8 type or QI8 type values
+1. `size`: tensor of 32-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `align_corners` | `BoolAttr` | bool attribute attribute |
+
+#### Results:
+1. `output`: tensor of 32-bit float or 8-bit integer or TFLite uint8 type or QUI8 type or QI8 type values
+
+### tfl.reverse_sequence (TFL::ReverseSequenceOp)
+Reverses variable length slices.
+
+#### Description:
+
+This op first slices `input` along the dimension `batch_dim`, and for each
+slice `i`, reverses the first `seq_lengths[i]` elements along
+the dimension `seq_dim`.
+
+The elements of `seq_lengths` must obey `seq_lengths[i] <= input.dims[seq_dim]`,
+and `seq_lengths` must be a vector of length `input.dims[batch_dim]`.
+
+The output slice `i` along dimension `batch_dim` is then given by input
+slice `i`, with the first `seq_lengths[i]` slices along dimension
+`seq_dim` reversed.
+
+#### Operands:
+1. `input`: tensor of 32-bit float or 16-bit integer or 32-bit integer or 64-bit integer or TFLite uint8 type values
+1. `seq_lengths`: tensor of 32/64-bit integer values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `seq_dim` | `IntegerAttr` | 32-bit integer attribute attribute |
+| `batch_dim` | `IntegerAttr` | 32-bit integer attribute attribute |
+
+#### Results:
+1. `output`: tensor of 32-bit float or 16-bit integer or 32-bit integer or 64-bit integer or TFLite uint8 type values
+
+### tfl.reverse_v2 (TFL::ReverseV2Op)
 ReverseV2 Operator
 
-### Description:
+#### Description:
 
 Reverses specific dimensions of a tensor.
 
@@ -1134,34 +1683,75 @@ Args:
   with only 1 element which is the axis index.
   TODO: Add support for multiple elements.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of 32-bit float or 16-bit integer or 32-bit integer or 64-bit integer values
 1. `axis`: tensor of 32-bit integer or 64-bit integer values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of 32-bit float or 16-bit integer or 32-bit integer or 64-bit integer or 8-bit integer values
 
-## tfl.rsqrt (TFL::RsqrtOp)
+### tfl.round (TFL::RoundOp)
+Round operator
+
+#### Description:
+
+Rounds the values of a tensor to the nearest integer, element-wise.
+
+#### Operands:
+1. `x`: tensor of 32-bit float values
+
+#### Attributes:
+
+#### Results:
+1. `y`: tensor of 32-bit float values
+
+### tfl.rsqrt (TFL::RsqrtOp)
 Reciprocal of square root operator
 
-### Description:
+#### Description:
 
 Computes element-wise reverse square root of input
 
-### Operands:
+#### Operands:
 1. `x`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `y`: tensor of any type values
 
-## tfl.select (TFL::SelectOp)
+### tfl.svdf (TFL::SVDFOp)
+Single value decomposition filter operator
+
+#### Description:
+
+The SVDF op is a decomposition of a densely connected op into low rank
+filters.
+For details: https://research.google.com/pubs/pub43813.html
+             https://arxiv.org/abs/1812.02802
+
+#### Operands:
+1. `input`: tensor of 32-bit float or 8-bit integer values
+1. `feature_weights`: tensor of 32-bit float or 8-bit integer values
+1. `time_weights`: tensor of 32-bit float or 8-bit integer values
+1. `input_gate_bias`: tensor of 32-bit float values or none type
+1. `activation_state`: stateful tensor
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `rank` | `IntegerAttr` | 32-bit integer attribute attribute |
+| `fused_activation_function` | `StringAttr` | fused activation enum attribute |
+
+#### Results:
+1. `output`: tensor of 32-bit float or 8-bit integer values
+
+### tfl.select (TFL::SelectOp)
 Select operator
 
-### Description:
+#### Description:
 
 Select values of 'x' if the corresponding value of 'condition' is true or
 the value of 'y' if false. There are valid condition input sizes:
@@ -1169,179 +1759,263 @@ the value of 'y' if false. There are valid condition input sizes:
 1. Either the same shape (in which case the select is elementwise), or
 2. condition must be Rank 1 and match over the first dimension.
 
-### Operands:
+#### Operands:
 1. `condition`: tensor of 1-bit integer values
-1. `x`: tensor of 32-bit float or 1-bit integer or 8-bit integer or 16-bit integer or 32-bit integer or 64-bit integer values
-1. `y`: tensor of 32-bit float or 1-bit integer or 8-bit integer or 16-bit integer or 32-bit integer or 64-bit integer values
+1. `x`: tensor of 32-bit float or 1-bit integer or 8-bit integer or 16-bit integer or 32-bit integer or 64-bit integer or TFLite uint8 type values
+1. `y`: tensor of 32-bit float or 1-bit integer or 8-bit integer or 16-bit integer or 32-bit integer or 64-bit integer or TFLite uint8 type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.shape (TFL::ShapeOp)
+### tfl.shape (TFL::ShapeOp)
 Shape operator
 
-### Description:
+#### Description:
 
 Returns the shape of a tensor.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `out_type` | `Attribute` | derived attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.sin (TFL::SinOp)
+### tfl.sin (TFL::SinOp)
 Sine operator
 
-### Description:
+#### Description:
 
 Computes element-wise Sine of input
 
-### Operands:
+#### Operands:
 1. `x`: tensor of floating-point values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `y`: tensor of floating-point values
 
-## tfl.softmax (TFL::SoftmaxOp)
+### tfl.slice (TFL::SliceOp)
+Return a slice from 'input'.
+
+#### Description:
+
+The output tensor is a tensor with dimensions described by 'size'
+whose values are extracted from 'input' starting at the offsets in
+'begin'.
+
+`begin` is zero-based; `size` is one-based. If size[i] is -1, all remaining
+elements in dimension i are included in the slice. In other words, this is
+equivalent to setting:
+  size[i] = input.dim_size(i) - begin[i]
+
+*Requirements*:
+  0 <= begin[i] <= begin[i] + size[i] <= Di  for i in [0, n)
+
+#### Operands:
+1. `input`: tensor of any type values
+1. `begin`: tensor of 32/64-bit integer values
+1. `size`: tensor of 32/64-bit integer values
+
+#### Attributes:
+
+#### Results:
+1. `output`: tensor of any type values
+
+### tfl.softmax (TFL::SoftmaxOp)
 Softmax operator
 
-### Description:
+#### Description:
 
 Computes element-wise softmax activations with the following formula
 
   exp(input) / tf.reduce_sum(exp(input * beta), dim)
 
-### Operands:
+#### Operands:
 1. `input`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `beta` | `FloatAttr` | 32-bit float attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.space_to_batch_nd (TFL::SpaceToBatchNdOp)
+### tfl.space_to_batch_nd (TFL::SpaceToBatchNdOp)
 SpaceToBatchNd operator
 
-### Description:
+#### Description:
 
 This operation reshapes space dimensions into the "batch" dimension 0
 
-### Operands:
-1. `input`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer values
+#### Operands:
+1. `input`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer or QI8 type or QUI8 type values
 1. `block_shape`: tensor of 32-bit integer values
 1. `paddings`: tensor of 32-bit integer values
 
-### Attributes:
+#### Attributes:
 
-### Results:
-1. `output`: tensor of 32-bit float or 16-bit integer or 32-bit integer or 64-bit integer values
+#### Results:
+1. `output`: tensor of 32-bit float or 16-bit integer or 32-bit integer or 64-bit integer or QI8 type or QUI8 type values
 
-## tfl.split (TFL::SplitOp)
+### tfl.space_to_depth (TFL::SpaceToDepthOp)
+SpaceToDepth operator
+
+#### Description:
+
+Rearranges blocks of spatial data, into depth. More specifically,
+this op outputs a copy of the input tensor where values from the `height`
+and `width` dimensions are moved to the `depth` dimension.
+`block_size` indicates the input block size.
+
+#### Operands:
+1. `input`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer or TFLite uint8 type or QUI8 type values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `block_size` | `IntegerAttr` | 32-bit integer attribute attribute |
+
+#### Results:
+1. `output`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer or TFLite uint8 type or QUI8 type values
+
+### tfl.sparse_to_dense (TFL::SparseToDenseOp)
+Converts a sparse representation into a dense tensor.
+
+#### Description:
+
+Builds an array `dense` with shape `output_shape` such that
+
+```
+# If sparse_indices is scalar
+dense[i] = (i == sparse_indices ? sparse_values : default_value)
+
+# If sparse_indices is a vector, then for each i
+dense[sparse_indices[i]] = sparse_values[i]
+
+# If sparse_indices is an n by d matrix, then for each i in [0, n)
+dense[sparse_indices[i][0], ..., sparse_indices[i][d-1]] = sparse_values[i]
+```
+
+All other values in `dense` are set to `default_value`.  If `sparse_values` is a
+scalar, all sparse indices are set to this single value.
+
+Indices should be sorted in lexicographic order, and indices must not
+contain any repeats. If `validate_indices` is true, these properties
+are checked during execution.
+
+#### Operands:
+1. `sparse_indices`: tensor of 32/64-bit integer values
+1. `output_shape`: tensor of 32/64-bit integer values
+1. `sparse_values`: tensor of 32-bit integer or 64-bit integer or 8-bit integer or TFLite uint8 type or 32-bit float values
+1. `default_value`: tensor of 32-bit integer or 64-bit integer or 8-bit integer or TFLite uint8 type or 32-bit float values
+
+#### Attributes:
+
+#### Results:
+1. `dense`: tensor of 32-bit integer or 64-bit integer or 8-bit integer or TFLite uint8 type or 32-bit float values
+
+### tfl.split (TFL::SplitOp)
 Splits a tensor into `num_split` tensors along one dimension.
 
-### Description:
+#### Description:
 
 Splits the `value` tensor along `split_dim` into a number of sub-tensors
 with same shape as the original one, except for `split_dim`. Same as
 tf.Split.
 
-### Operands:
-1. `split_dim`: tensor of 32-bit integer values
-1. `value`: tensor of 32-bit float or 16-bit integer or 32-bit integer or 64-bit integer values
+#### Operands:
+1. `split_dim`: 0D tensor of 32-bit integer values
+1. `value`: tensor of 32-bit float or 16-bit integer or 32-bit integer or 64-bit integer or QI8 type or QUI8 type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
-| `num_splits` | `IntegerAttr` | 32-bit integer attribute attribute |
+| `num_splits` | `IntegerAttr` | positive 32-bit integer attribute attribute |
 
-### Results:
-1. `outputs`: tensor of 32-bit float or 16-bit integer or 32-bit integer or 64-bit integer values
+#### Results:
+1. `outputs`: tensor of 32-bit float or 16-bit integer or 32-bit integer or 64-bit integer or QI8 type or QUI8 type values
 
-## tfl.split_v (TFL::SplitVOp)
+### tfl.split_v (TFL::SplitVOp)
 Splits a tensor into `num_split` tensors along one dimension.
 
-### Description:
+#### Description:
 
 Splits the `value` tensor along `split_dim` into a number of sub-tensors
 with same shape as the original one, except for `split_dim`. The grouping
 of the resultant sub-tensors is decided by `size-splits`. Same as tf.SplitV.
 
-### Operands:
-1. `value`: tensor of 32-bit float or 16-bit integer or 32-bit integer or 64-bit integer values
-1. `size_splits`: tensor of 32-bit integer values
-1. `split_dim`: tensor of 32-bit integer values
+#### Operands:
+1. `value`: tensor of 32-bit float or 16-bit integer or 32-bit integer or 64-bit integer or QI8 type or QUI8 type values
+1. `size_splits`: 1D tensor of 32-bit integer values
+1. `split_dim`: 0D tensor of 32-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
-| `num_splits` | `IntegerAttr` | 32-bit integer attribute attribute |
+| `num_splits` | `IntegerAttr` | positive 32-bit integer attribute attribute |
 
-### Results:
-1. `outputs`: tensor of 32-bit float or 16-bit integer or 32-bit integer or 64-bit integer values
+#### Results:
+1. `outputs`: tensor of 32-bit float or 16-bit integer or 32-bit integer or 64-bit integer or QI8 type or QUI8 type values
 
-## tfl.sqrt (TFL::SqrtOp)
+### tfl.sqrt (TFL::SqrtOp)
 Square root operator
 
-### Description:
+#### Description:
 
 Computes element-wise Square root of input
 
-### Operands:
-1. `x`: tensor of any type values
+#### Operands:
+1. `x`: tensor of floating-point values
 
-### Attributes:
+#### Attributes:
 
-### Results:
-1. `y`: tensor of any type values
+#### Results:
+1. `y`: tensor of floating-point values
 
-## tfl.square (TFL::SquareOp)
+### tfl.square (TFL::SquareOp)
 Square operator
 
-### Description:
+#### Description:
 
 Computes element-wise Square of input
 
-### Operands:
-1. `x`: tensor of any type values
+#### Operands:
+1. `x`: tensor of floating-point or QI8 type or QUI8 type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
-1. `y`: tensor of any type values
+#### Results:
+1. `y`: tensor of floating-point or QI8 type or QUI8 type values
 
-## tfl.squared_difference (TFL::SquaredDifferenceOp)
+### tfl.squared_difference (TFL::SquaredDifferenceOp)
 Squared difference operator
 
-### Description:
+#### Description:
 
 Element-wise squared difference operation.
 
-### Operands:
+#### Operands:
 1. `lhs`: tensor of any type values
 1. `rhs`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.squeeze (TFL::SqueezeOp)
+### tfl.squeeze (TFL::SqueezeOp)
 Removes dimensions of size 1 from the shape of a tensor.
 
-### Description:
+#### Description:
 
 Given a tensor `input`, this operation returns a tensor of the same type with
 all dimensions of size 1 removed. If you don't want to remove all size 1
@@ -1362,31 +2036,31 @@ Or, to remove specific size 1 dimensions:
 shape(squeeze(t, [2, 4])) ==> [1, 2, 3, 1]
 ```
 
-### Operands:
+#### Operands:
 1. `input`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `squeeze_dims` | `ArrayAttr` | 64-bit integer array attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.strided_slice (TFL::StridedSliceOp)
+### tfl.strided_slice (TFL::StridedSliceOp)
 StridedSlice Op
 
-### Description:
+#### Description:
 
 Return a strided slice from `input`.
 
-### Operands:
-1. `input`: tensor of 32-bit float or 32-bit integer or 64-bit integer or 8-bit integer values
+#### Operands:
+1. `input`: tensor of 32-bit float or 32-bit integer or 64-bit integer or 8-bit integer or QI8 type or QUI8 type or 1-bit integer values
 1. `begin`: tensor of 32-bit integer values
 1. `end`: tensor of 32-bit integer values
 1. `strides`: tensor of 32-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `begin_mask` | `IntegerAttr` | 32-bit integer attribute attribute |
@@ -1395,66 +2069,66 @@ Return a strided slice from `input`.
 | `new_axis_mask` | `IntegerAttr` | 32-bit integer attribute attribute |
 | `shrink_axis_mask` | `IntegerAttr` | 32-bit integer attribute attribute |
 
-### Results:
-1. `output`: tensor of 32-bit float or 32-bit integer or 64-bit integer or 8-bit integer values
+#### Results:
+1. `output`: tensor of 32-bit float or 32-bit integer or 64-bit integer or 8-bit integer or QI8 type or QUI8 type or 1-bit integer values
 
-## tfl.sub (TFL::SubOp)
+### tfl.sub (TFL::SubOp)
 Subtraction operator
 
-### Description:
+#### Description:
 
 Element-wise subtraction operation.
 
-### Operands:
+#### Operands:
 1. `lhs`: tensor of any type values
 1. `rhs`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `fused_activation_function` | `StringAttr` | fused activation enum attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.sum (TFL::SumOp)
+### tfl.sum (TFL::SumOp)
 Sum operator
 
-### Description:
+#### Description:
 
 Computes the sum reduction along the specified axes
 
-### Operands:
+#### Operands:
 1. `input`: tensor of any type values
-1. `axes`: tensor of 32/64-bit integer values
+1. `axes`: tensor of 32-bit integer values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `keep_dims` | `BoolAttr` | bool attribute attribute |
 
-### Results:
+#### Results:
 1. &laquo;unnamed&raquo;: tensor of any type values
 
-## tfl.tanh (TFL::TanhOp)
+### tfl.tanh (TFL::TanhOp)
 Hyperbolic tangent operator
 
-### Description:
+#### Description:
 
 Computes element-wise Hyperbolic tangent of input
 
-### Operands:
-1. `x`: tensor of any type values
+#### Operands:
+1. `x`: tensor of 32-bit float or 16-bit integer or 8-bit integer or QI8 type or QUI8 type or QI16 type or QUI16 type or TFLite uint8 type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
-1. `y`: tensor of any type values
+#### Results:
+1. `y`: tensor of 32-bit float or 16-bit integer or 8-bit integer or QI8 type or QUI8 type or QI16 type or QUI16 type or TFLite uint8 type values
 
-## tfl.tile (TFL::TileOp)
+### tfl.tile (TFL::TileOp)
 Tile operator.
 
-### Description:
+#### Description:
 
  Constructs a tensor by tiling a given tensor.
 
@@ -1464,54 +2138,76 @@ input.dims(i) * multiples[i] elements, and the values of input
 are replicated multiples[i] times along the 'i'th dimension.
 For example, tiling [a b c d] by [2] produces [a b c d a b c d].
 
-### Operands:
-1. `input`: tensor of any type values
+#### Operands:
+1. `input`: tensor of 32-bit float or 1-bit integer or 32-bit integer or 64-bit integer or TFLite uint8 type values
 1. `multiples`: tensor of 32/64-bit integer values
 
-### Attributes:
+#### Attributes:
 
-### Results:
-1. `output`: tensor of any type values
+#### Results:
+1. `output`: tensor of 32-bit float or 1-bit integer or 32-bit integer or 64-bit integer or TFLite uint8 type values
 
-## tfl.topk_v2 (TFL::TopKV2Op)
+### tfl.topk_v2 (TFL::TopKV2Op)
 TopK operator
 
-### Description:
+#### Description:
 
 Returns the top `k` largest element along each last dimensional slice of
 `input` and the indices of values within the last dimension of the input
 tensor.
 
-### Operands:
-1. `input`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer values
+#### Operands:
+1. `input`: tensor of 32-bit float or 8-bit integer or 32-bit integer or 64-bit integer or TFLite uint8 type values
 1. `k`: tensor of 32-bit integer values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `values`: tensor of any type values
 1. `indices`: tensor of 32-bit integer values
 
-## tfl.transpose (TFL::TransposeOp)
+### tfl.transpose_conv (TFL::TransposeConvOp)
+Transpose convolution operator
+
+#### Description:
+
+Performs transpose convolution operation on input.
+
+#### Operands:
+1. `output_shape`: 1D tensor of 32-bit integer values
+1. `weights`: tensor of 32-bit float or TFLite uint8 type or QI8 type or QUI8 type values
+1. `input`: tensor of 32-bit float or TFLite uint8 type or QI8 type or QUI8 type values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `padding` | `StringAttr` | padding enum attribute |
+| `stride_h` | `IntegerAttr` | 32-bit integer attribute attribute |
+| `stride_w` | `IntegerAttr` | 32-bit integer attribute attribute |
+
+#### Results:
+1. `output`: tensor of any type values
+
+### tfl.transpose (TFL::TransposeOp)
 Transpose operator
 
-### Description:
+#### Description:
 
 Returns the Transpose of x
 
-### Operands:
+#### Operands:
 1. `x`: tensor of any type values
-1. `perm`: tensor of any type values
+1. `perm`: tensor of 32-bit integer values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `y`: tensor of any type values
 
-## tfl.unidirectional_sequence_lstm (TFL::UnidirectionalSequenceLSTMOp)
+### tfl.unidirectional_sequence_lstm (TFL::UnidirectionalSequenceLSTMOp)
 Unidirectional sequence lstm operator
 
-### Description:
+#### Description:
 
 A recurrent neural network specified by an LSTM cell. This Op supports
 unrolling the input along the time or batch dimensions, and
@@ -1522,7 +2218,7 @@ each element in the sequence s = 1...sequence_length:
 where LSTMOp is LSTM TF Lite Op and the “activation” is the function passed
 as the “fused_activation_function” argument (if not “NONE”).
 
-### Operands:
+#### Operands:
 1. `input`: tensor of 32-bit float or 8-bit integer values
 1. `input_to_input_weights`: tensor of 32-bit float or 8-bit integer values or none type
 1. `input_to_forget_weights`: tensor of 32-bit float or 8-bit integer values
@@ -1548,7 +2244,7 @@ as the “fused_activation_function” argument (if not “NONE”).
 1. `cell_layer_norm_coefficients`: tensor of 32-bit float or 8-bit integer values or none type
 1. `output_layer_norm_coefficients`: tensor of 32-bit float or 8-bit integer values or none type
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `fused_activation_function` | `StringAttr` | fused activation enum attribute |
@@ -1556,13 +2252,67 @@ as the “fused_activation_function” argument (if not “NONE”).
 | `proj_clip` | `FloatAttr` | 32-bit float attribute attribute |
 | `time_major` | `BoolAttr` | bool attribute attribute |
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
-## tfl.unpack (TFL::UnpackOp)
+### tfl.unidirectional_sequence_rnn (TFL::UnidirectionalSequenceRNNOp)
+Unidirectional sequence rnn operator
+
+#### Description:
+
+A recurrent neural network specified by an RNN cell. This Op takes in input
+in a format {batch_size, seq_len, input_size} or
+{seq_len, batch_size, input_size} if it's time-majored.
+
+It implements the following operation for
+each element in the sequence s = 1...sequence_length:
+  outputs[s] = state = activation(RNNOp(inputs[s]))
+
+where RNNOp is RNNOp TF Lite Op and the “activation” is the function passed
+as the “fused_activation_function” argument (if not “NONE”).
+
+#### Operands:
+1. `input`: tensor of 32-bit float or 8-bit integer values
+1. `input_to_input_weights`: tensor of 32-bit float or 8-bit integer values
+1. `recurrent_to_input_weights`: tensor of 32-bit float or 8-bit integer values
+1. `input_gate_bias`: tensor of 32-bit float values
+1. `hidden_state`: stateful tensor
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `time_major` | `BoolAttr` | bool attribute attribute |
+| `fused_activation_function` | `StringAttr` | fused activation enum attribute |
+
+#### Results:
+1. `output`: tensor of 32-bit float or 8-bit integer values
+
+### tfl.unique (TFL::UniqueOp)
+Unique Op.
+
+#### Description:
+
+  This operation returns a tensor `y` containing all of the unique elements of `x`
+sorted in the same order that they occur in `x`. This operation also returns a
+tensor `idx` the same size as `x` that contains the index of each value of `x`
+in the unique output `y`. In other words:
+
+#### Operands:
+1. `input`: tensor of 8-bit integer or 16-bit integer or 32-bit integer or 64-bit integer or 32-bit float values
+
+#### Attributes:
+| Attribute | MLIR Type | Description |
+| :-------: | :-------: | ----------- |
+| `idx_out_type` | `Attribute` | derived attribute attribute |
+
+#### Results:
+1. `output`: tensor of 8-bit integer or 16-bit integer or 32-bit integer or 64-bit integer or 32-bit float values
+1. `idx`: tensor of 32-bit integer or 64-bit integer values
+
+### tfl.unpack (TFL::UnpackOp)
 Unpacks a tensor along a dimension into multiple tensors
 
-### Description:
+#### Description:
 
 Unpacks a given dimension of a rank-`R` tensor into `num` rank-`(R-1)` tensors.
 
@@ -1579,30 +2329,50 @@ Etc.
 
 This is the opposite of `pack`.
 
-### Operands:
-1. `input`: tensor of 32-bit float or 8-bit integer or 32-bit integer values
+#### Operands:
+1. `input`: tensor of 32-bit float or 8-bit integer or 32-bit integer or QI8 type or QUI8 type values
 
-### Attributes:
+#### Attributes:
 | Attribute | MLIR Type | Description |
 | :-------: | :-------: | ----------- |
 | `num` | `IntegerAttr` | 32-bit integer attribute attribute |
 | `axis` | `IntegerAttr` | 32-bit integer attribute attribute |
 
-### Results:
-1. `outputs`: tensor of 32-bit float or 8-bit integer or 32-bit integer values
+#### Results:
+1. `outputs`: tensor of 32-bit float or 8-bit integer or 32-bit integer or QI8 type or QUI8 type values
 
-## tfl.zeros_like (TFL::ZerosLikeOp)
+### tfl.where (TFL::WhereOp)
+Returns locations of nonzero / true values in a tensor.
+
+#### Description:
+
+This operation returns the coordinates of true elements in `condition`. The
+coordinates are returned in a 2-D tensor where the first dimension (rows)
+represents the number of true elements, and the second dimension (columns)
+represents the coordinates of the true elements. Keep in mind, the shape of
+the output tensor can vary depending on how many true values there are in
+`condition`. Indices are output in row-major order.
+
+#### Operands:
+1. `input`: tensor of 1-bit integer values
+
+#### Attributes:
+
+#### Results:
+1. `index`: tensor of 64-bit integer values
+
+### tfl.zeros_like (TFL::ZerosLikeOp)
 ZerosLike operator
 
-### Description:
+#### Description:
 
 Returns a tensor of zeros with the same shape and type as the input tensor.
 
-### Operands:
+#### Operands:
 1. `input`: tensor of any type values
 
-### Attributes:
+#### Attributes:
 
-### Results:
+#### Results:
 1. `output`: tensor of any type values
 
diff --git a/tensorflow/compiler/mlir/xla/BUILD b/tensorflow/compiler/mlir/xla/BUILD
index 1730cff0013..81b4a1e783e 100644
--- a/tensorflow/compiler/mlir/xla/BUILD
+++ b/tensorflow/compiler/mlir/xla/BUILD
@@ -104,6 +104,7 @@ cc_library(
         "transforms/legalize_tf.cc",
     ],
     deps = [
+        ":convert_op_folder",
         ":hlo",
         "//tensorflow/compiler/mlir/tensorflow",
         "//tensorflow/compiler/mlir/tensorflow:lower_tf_lib",
@@ -257,6 +258,7 @@ cc_library(
     ],
     includes = ["include"],
     deps = [
+        ":convert_op_folder",
         ":hlo_ops_base_inc_gen",
         ":hlo_ops_inc_gen",
         ":xla_canonicalize_inc_gen",
@@ -470,3 +472,12 @@ genrule(
            " -o $@"),
     tools = [":operator_writer_gen"],
 )
+
+cc_library(
+    name = "convert_op_folder",
+    srcs = ["convert_op_folder.cc"],
+    hdrs = ["convert_op_folder.h"],
+    deps = [
+        "@local_config_mlir//:IR",
+    ],
+)
diff --git a/tensorflow/compiler/mlir/xla/convert_op_folder.cc b/tensorflow/compiler/mlir/xla/convert_op_folder.cc
new file mode 100644
index 00000000000..d26bec292cc
--- /dev/null
+++ b/tensorflow/compiler/mlir/xla/convert_op_folder.cc
@@ -0,0 +1,84 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// This file defines helpers useful when creating or manipulating lhlo/hlo.
+
+#include "tensorflow/compiler/mlir/xla/convert_op_folder.h"
+
+#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
+#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
+#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
+
+namespace xla {
+
+mlir::ElementsAttr ConvertElementsAttr(const mlir::ElementsAttr& elements,
+                                       mlir::Type new_type) {
+  auto old_type = getElementTypeOrSelf(elements);
+  size_t bit_width = new_type.isBF16() ? 64 : new_type.getIntOrFloatBitWidth();
+
+  if (old_type.isa<mlir::FloatType>()) {
+    // mapValues always takes a function returning APInt, even when the output
+    // is actually float.
+    using func_type = mlir::APInt(const llvm::APFloat&);
+    if (auto newFloatType = new_type.dyn_cast<mlir::FloatType>()) {
+      // Float -> Float
+      return elements.mapValues(
+          new_type, llvm::function_ref<func_type>(
+                        [&newFloatType](const llvm::APFloat& floatVal) {
+                          llvm::APFloat newDouble(
+                              mlir::FloatAttr::getValueAsDouble(floatVal));
+                          bool loses_info = false;
+                          newDouble.convert(newFloatType.getFloatSemantics(),
+                                            llvm::APFloat::rmNearestTiesToEven,
+                                            &loses_info);
+                          return newDouble.bitcastToAPInt();
+                        }));
+    }
+    // Float -> Int
+    return elements.mapValues(
+        new_type, llvm::function_ref<func_type>(
+                      [&bit_width](const llvm::APFloat& floatVal) {
+                        return llvm::APInt(
+                            bit_width,
+                            mlir::FloatAttr::getValueAsDouble(floatVal));
+                      }));
+  }
+
+  // old_type is Integer
+  // mapValues always takes a function returning APInt, even when the output
+  // is actually float.
+  using func_type = llvm::APInt(const llvm::APInt&);
+  if (auto newFloatType = new_type.dyn_cast<mlir::FloatType>()) {
+    // Int -> Float
+    return elements.mapValues(
+        new_type, llvm::function_ref<func_type>([&newFloatType](
+                                                    const llvm::APInt& intVal) {
+          llvm::APFloat newDouble(static_cast<double>(intVal.getSExtValue()));
+          bool loses_info = false;
+          newDouble.convert(newFloatType.getFloatSemantics(),
+                            llvm::APFloat::rmNearestTiesToEven, &loses_info);
+          return newDouble.bitcastToAPInt();
+        }));
+  }
+  // new_type is Integer
+  // Int -> Int
+  return elements.mapValues(
+      new_type,
+      llvm::function_ref<func_type>([&bit_width](const llvm::APInt& intVal) {
+        return llvm::APInt(bit_width, intVal.getSExtValue());
+      }));
+}
+
+}  // namespace xla
diff --git a/tensorflow/compiler/mlir/xla/convert_op_folder.h b/tensorflow/compiler/mlir/xla/convert_op_folder.h
new file mode 100644
index 00000000000..1c3f75489f8
--- /dev/null
+++ b/tensorflow/compiler/mlir/xla/convert_op_folder.h
@@ -0,0 +1,31 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_MLIR_XLA_CONVERT_OP_FOLDER_H_
+#define TENSORFLOW_COMPILER_MLIR_XLA_CONVERT_OP_FOLDER_H_
+
+#include "mlir/IR/Attributes.h"  // TF:local_config_mlir
+#include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
+
+namespace xla {
+
+// Converts the given elements attr to the specified elements type.
+// Requires type of the elements and new_type to be either integer or float
+// type.
+mlir::ElementsAttr ConvertElementsAttr(const mlir::ElementsAttr& elements,
+                                       mlir::Type new_type);
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_MLIR_XLA_CONVERT_OP_FOLDER_H_
diff --git a/tensorflow/compiler/mlir/xla/hlo_utils.cc b/tensorflow/compiler/mlir/xla/hlo_utils.cc
index b8c55929986..e8a4e82776e 100644
--- a/tensorflow/compiler/mlir/xla/hlo_utils.cc
+++ b/tensorflow/compiler/mlir/xla/hlo_utils.cc
@@ -19,6 +19,7 @@ limitations under the License.
 
 #include "mlir/IR/Attributes.h"  // TF:local_config_mlir
 #include "mlir/IR/StandardTypes.h"  // TF:local_config_mlir
+#include "mlir/IR/TypeUtilities.h"  // TF:local_config_mlir
 #include "tensorflow/compiler/xla/literal.h"
 
 namespace xla {
@@ -79,4 +80,62 @@ mlir::DenseIntElementsAttr CreateDenseIntElementsAttrFromVector(
       .cast<mlir::DenseIntElementsAttr>();
 }
 
+mlir::ElementsAttr ConvertElementsAttr(const mlir::ElementsAttr& elements,
+                                       mlir::Type new_type) {
+  auto old_type = getElementTypeOrSelf(elements);
+  size_t bit_width = new_type.isBF16() ? 64 : new_type.getIntOrFloatBitWidth();
+
+  if (old_type.isa<mlir::FloatType>()) {
+    // mapValues always takes a function returning APInt, even when the output
+    // is actually float.
+    using func_type = mlir::APInt(const llvm::APFloat&);
+    if (auto newFloatType = new_type.dyn_cast<mlir::FloatType>()) {
+      // Float -> Float
+      return elements.mapValues(
+          new_type, llvm::function_ref<func_type>(
+                        [&newFloatType](const llvm::APFloat& floatVal) {
+                          llvm::APFloat newDouble(
+                              mlir::FloatAttr::getValueAsDouble(floatVal));
+                          bool loses_info = false;
+                          newDouble.convert(newFloatType.getFloatSemantics(),
+                                            llvm::APFloat::rmNearestTiesToEven,
+                                            &loses_info);
+                          return newDouble.bitcastToAPInt();
+                        }));
+    }
+    // Float -> Int
+    return elements.mapValues(
+        new_type, llvm::function_ref<func_type>(
+                      [&bit_width](const llvm::APFloat& floatVal) {
+                        return llvm::APInt(
+                            bit_width,
+                            mlir::FloatAttr::getValueAsDouble(floatVal));
+                      }));
+  }
+
+  // old_type is Integer
+  // mapValues always takes a function returning APInt, even when the output
+  // is actually float.
+  using func_type = llvm::APInt(const llvm::APInt&);
+  if (auto newFloatType = new_type.dyn_cast<mlir::FloatType>()) {
+    // Int -> Float
+    return elements.mapValues(
+        new_type, llvm::function_ref<func_type>([&newFloatType](
+                                                    const llvm::APInt& intVal) {
+          llvm::APFloat newDouble(static_cast<double>(intVal.getSExtValue()));
+          bool loses_info = false;
+          newDouble.convert(newFloatType.getFloatSemantics(),
+                            llvm::APFloat::rmNearestTiesToEven, &loses_info);
+          return newDouble.bitcastToAPInt();
+        }));
+  }
+  // new_type is Integer
+  // Int -> Int
+  return elements.mapValues(
+      new_type,
+      llvm::function_ref<func_type>([&bit_width](const llvm::APInt& intVal) {
+        return llvm::APInt(bit_width, intVal.getSExtValue());
+      }));
+}
+
 }  // namespace xla
diff --git a/tensorflow/compiler/mlir/xla/hlo_utils.h b/tensorflow/compiler/mlir/xla/hlo_utils.h
index b3bd727a28a..d8715a8e450 100644
--- a/tensorflow/compiler/mlir/xla/hlo_utils.h
+++ b/tensorflow/compiler/mlir/xla/hlo_utils.h
@@ -77,6 +77,11 @@ StatusOr<mlir::Type> ConvertShapeToType(const Shape& shape,
   return ConvertTensorShapeToType<TypeT>(shape, builder);
 }
 
+// Converts the given elements attr to the specified elements type.
+// Requires type of the elements and new_type to be either integer or float
+// type.
+mlir::ElementsAttr ConvertElementsAttr(const mlir::ElementsAttr& elements,
+                                       mlir::Type new_type);
 }  // namespace xla
 
 #endif  // TENSORFLOW_COMPILER_MLIR_XLA_HLO_UTILS_H_
diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_ops.cc b/tensorflow/compiler/mlir/xla/ir/hlo_ops.cc
index ec139aca1d3..7f50b86fbd6 100644
--- a/tensorflow/compiler/mlir/xla/ir/hlo_ops.cc
+++ b/tensorflow/compiler/mlir/xla/ir/hlo_ops.cc
@@ -44,6 +44,7 @@ limitations under the License.
 #include "mlir/IR/Types.h"  // TF:local_config_mlir
 #include "mlir/IR/Value.h"  // TF:local_config_mlir
 #include "mlir/Support/LogicalResult.h"  // TF:local_config_mlir
+#include "tensorflow/compiler/mlir/xla/convert_op_folder.h"
 #include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h.inc"
 
 namespace mlir {
@@ -246,69 +247,13 @@ void ConvertOp::build(Builder* builder, OperationState& result, Value* operand,
   build(builder, result, result_ty, operand);
 }
 
-namespace {
-
-// Converts the values of an ElementsAttr into the corresponding type.
-ElementsAttr ConvertElements(const ElementsAttr& elements, Type newType) {
-  auto oldType = getElementTypeOrSelf(elements);
-  size_t bitWidth = newType.isBF16() ? 64 : newType.getIntOrFloatBitWidth();
-
-  if (oldType.isa<FloatType>()) {
-    // mapValues always takes a function returning APInt, even when the output
-    // is actually float.
-    using func_type = APInt(const APFloat&);
-    if (auto newFloatType = newType.dyn_cast<FloatType>()) {
-      // Float -> Float
-      return elements.mapValues(
-          newType, llvm::function_ref<func_type>([&newFloatType](
-                                                     const APFloat& floatVal) {
-            APFloat newDouble(FloatAttr::getValueAsDouble(floatVal));
-            bool losesInfo = false;
-            newDouble.convert(newFloatType.getFloatSemantics(),
-                              llvm::APFloat::rmNearestTiesToEven, &losesInfo);
-            return newDouble.bitcastToAPInt();
-          }));
-    }
-    // Float -> Int
-    return elements.mapValues(
-        newType,
-        llvm::function_ref<func_type>([&bitWidth](const APFloat& floatVal) {
-          return APInt(bitWidth, FloatAttr::getValueAsDouble(floatVal));
-        }));
-  }
-
-  // oldType is Integer
-  // mapValues always takes a function returning APInt, even when the output
-  // is actually float.
-  using func_type = APInt(const APInt&);
-  if (auto newFloatType = newType.dyn_cast<FloatType>()) {
-    // Int -> Float
-    return elements.mapValues(
-        newType,
-        llvm::function_ref<func_type>([&newFloatType](const APInt& intVal) {
-          APFloat newDouble(static_cast<double>(intVal.getSExtValue()));
-          bool losesInfo = false;
-          newDouble.convert(newFloatType.getFloatSemantics(),
-                            llvm::APFloat::rmNearestTiesToEven, &losesInfo);
-          return newDouble.bitcastToAPInt();
-        }));
-  }
-  // newType is Integer
-  // Int -> Int
-  return elements.mapValues(
-      newType, llvm::function_ref<func_type>([&bitWidth](const APInt& intVal) {
-        return APInt(bitWidth, intVal.getSExtValue());
-      }));
-}
-
-}  // namespace
-
 OpFoldResult ConvertOp::fold(ArrayRef<Attribute> operands) {
   if (getOperand()->getType() == getResult()->getType()) return getOperand();
 
   // If the operand is constant, we can do the conversion now.
   if (auto elementsAttr = operands.front().dyn_cast_or_null<ElementsAttr>()) {
-    return ConvertElements(elementsAttr, getElementTypeOrSelf(getResult()));
+    return xla::ConvertElementsAttr(elementsAttr,
+                                    getElementTypeOrSelf(getResult()));
   }
 
   return {};
diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_ops.td b/tensorflow/compiler/mlir/xla/ir/hlo_ops.td
index 48f2a0212be..6e110e4eb0b 100644
--- a/tensorflow/compiler/mlir/xla/ir/hlo_ops.td
+++ b/tensorflow/compiler/mlir/xla/ir/hlo_ops.td
@@ -490,6 +490,20 @@ def HLO_ConcatenateOp : HLO_Op<"concatenate",
   let hasCustomHLOConverter = 1;
 }
 
+def HLO_CrossReplicaSumOp : HLO_Op<"cross-replica-sum",
+    [NoSideEffect, SameOperandsAndResultType]>, BASE_HLO_CrossReplicaSumOp {
+
+  let arguments = (ins
+    HLO_Tensor:$operand,
+    I64ElementsAttr:$replica_groups
+  );
+
+  let results = (outs HLO_Tensor);
+
+  // TODO(b/129422361) ConcatOp has special conversion logic to HLO.
+  let hasCustomHLOConverter = 1;
+}
+
 // TODO(hinsu): Make this struct dialect independent so that it can be shared
 // between HLO and LHLO dialect.
 def ConvDimensionNumbers : StructAttr<"ConvDimensionNumbers", HLO_Dialect, [
diff --git a/tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td b/tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td
index 7bfbd204540..6620b591978 100644
--- a/tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td
+++ b/tensorflow/compiler/mlir/xla/ir/hlo_ops_base.td
@@ -547,6 +547,22 @@ class BASE_HLO_ConcatenateOp {
    }];
 }
 
+class BASE_HLO_CrossReplicaSumOp {
+   string summary = "Sums input across replicated instances.";
+
+   string description = [{
+     For each of the replica groups, operands of the group devices are summed
+     so that each device has the sum.
+
+     For example, suppose there are 8 TPU devices: `[A, B, C, D, E, F, G, H]`.
+     Passing group_assignment=`[[0,2,4,6],[1,3,5,7]]` sets `A, C, E, G` as group 0,
+     and `B, D, F, H` as group 1. Thus we get the outputs:
+     `[A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H, A+C+E+G, B+D+F+H]`.
+
+     See https://www.tensorflow.org/xla/operation_semantics#crossreplicasum.
+   }];
+}
+
 class BASE_HLO_ConvOp {
   string summary = "Convolution operator";
 
diff --git a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc
index 42706c53a60..dcb34c0f0a9 100644
--- a/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc
+++ b/tensorflow/compiler/mlir/xla/mlir_hlo_to_hlo.cc
@@ -362,6 +362,10 @@ LogicalResult ExportXlaOp(CopyOp op, OpLoweringContext ctx) {
   return failure();
 }
 
+LogicalResult ExportXlaOp(CrossReplicaSumOp op, OpLoweringContext ctx) {
+  return failure();
+}
+
 LogicalResult ExportXlaOp(DynamicSliceOp op, OpLoweringContext ctx) {
   return failure();
 }
diff --git a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir
index 4828a92fc4e..8dc276428d7 100644
--- a/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir
+++ b/tensorflow/compiler/mlir/xla/tests/legalize-tf.mlir
@@ -1670,3 +1670,15 @@ func @conv2d_backprop_filter(
   } : (tensor<100x28x28x1xf32>, tensor<4xi32>, tensor<100x26x26x32xf32>) -> tensor<100x28x28x1xf32>
   return %result : tensor<100x28x28x1xf32>
 }
+
+// CHECK-LABEL: @cross_replica_sum
+func @cross_replica_sum(%input: tensor<10xf32>) -> tensor<10xf32> {
+  %replica_groups = "tf.Const" () {
+    value = dense<[[0, 2, 4, 6], [1, 3, 5, 7]]> : tensor<2x4xi32>
+  } : () -> tensor<2x4xi32>
+
+  // CHECK: xla_hlo.cross-replica-sum
+  // CHECK-SAME: replica_groups = dense<{{\[}}[0, 2, 4, 6], [1, 3, 5, 7]]> : tensor<2x4xi64>
+  %result = "tf.CrossReplicaSum" (%input, %replica_groups) : (tensor<10xf32>, tensor<2x4xi32>) -> tensor<10xf32>
+  return %result : tensor<10xf32>
+}
diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
index de7b1f955c2..444f19c8358 100644
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf.cc
@@ -38,6 +38,7 @@ limitations under the License.
 #include "mlir/Transforms/DialectConversion.h"  // TF:local_config_mlir
 #include "tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h"
 #include "tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.h"
+#include "tensorflow/compiler/mlir/xla/convert_op_folder.h"
 #include "tensorflow/compiler/mlir/xla/ir/hlo_ops.h"
 #include "tensorflow/compiler/mlir/xla/transforms/passes.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
diff --git a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td
index 3d06d590496..72e194ad3ff 100644
--- a/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td
+++ b/tensorflow/compiler/mlir/xla/transforms/legalize_tf_patterns.td
@@ -228,6 +228,18 @@ def : Pat<(TF_ConcatV2Op $inputs, (TF_ConstOp OneElementAttr:$axis), $unused),
           (HLO_ConcatenateOp $inputs, (GetHLOAxisFromTFAxis $axis, $inputs)),
           [(HasRankedFirstOperand $inputs)]>;
 
+//===----------------------------------------------------------------------===//
+// CrossReplicaSum op patterns.
+//===----------------------------------------------------------------------===//
+
+def CastElementsToI64Elements : NativeCodeCall<
+  "::xla::ConvertElementsAttr("
+    "$0, $_builder.getIntegerType(64)).cast<DenseIntElementsAttr>()">;
+
+def : Pat<(TF_CrossReplicaSumOp $input, (TF_ConstOp $group_assignment)),
+          (HLO_CrossReplicaSumOp $input,
+            (CastElementsToI64Elements $group_assignment))>;
+
 //===----------------------------------------------------------------------===//
 // Fft op patterns.
 //===----------------------------------------------------------------------===//
diff --git a/tensorflow/compiler/xla/service/call_inliner.cc b/tensorflow/compiler/xla/service/call_inliner.cc
index 062110af867..d78d370f7c7 100644
--- a/tensorflow/compiler/xla/service/call_inliner.cc
+++ b/tensorflow/compiler/xla/service/call_inliner.cc
@@ -145,7 +145,12 @@ StatusOr<bool> CallInliner::Run(HloModule* module) {
       call_graph->VisitNodes([&](const CallGraphNode& node) -> Status {
         for (const CallSite& callsite : node.caller_callsites()) {
           VLOG(1) << "Visiting callsite: " << callsite.ToString();
-          if (callsite.instruction()->opcode() == HloOpcode::kCall) {
+          bool callsite_alive =
+              absl::c_any_of(node.callers(), [&](HloComputation* caller) {
+                return caller->ContainsInstruction(callsite.instruction());
+              });
+          if (callsite.instruction()->opcode() == HloOpcode::kCall &&
+              callsite_alive) {
             HloInstruction* call = callsite.instruction();
             TF_RETURN_IF_ERROR(Inline(call).status());
             did_mutate = true;
diff --git a/tensorflow/compiler/xla/service/call_inliner_test.cc b/tensorflow/compiler/xla/service/call_inliner_test.cc
index 0b6e323f75c..02f43ba70c7 100644
--- a/tensorflow/compiler/xla/service/call_inliner_test.cc
+++ b/tensorflow/compiler/xla/service/call_inliner_test.cc
@@ -142,6 +142,46 @@ TEST_F(CallInlinerTest, InlineWithoutRunningPass) {
               ElementsAre(op::Constant()));
 }
 
+// Test that inlining can work with computations with dead parameter.
+TEST_F(CallInlinerTest, InlineWithEmptyComputation) {
+  const Shape pred = ShapeUtil::MakeShape(PRED, {});
+  auto module = CreateNewVerifiedModule();
+  Shape r0s32 = ShapeUtil::MakeShape(S32, {});
+  HloComputation::Builder empty(TestName() + ".empty");
+  empty.AddInstruction(HloInstruction::CreateParameter(0, r0s32, "A"));
+  empty.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<int32>(0)));
+  HloComputation* empty_computation =
+      module->AddEmbeddedComputation(empty.Build());
+
+  HloComputation::Builder empty2(TestName() + ".empty");
+  empty2.AddInstruction(HloInstruction::CreateParameter(0, r0s32, "A"));
+  empty2.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<int32>(0)));
+  HloComputation* empty2_computation =
+      module->AddEmbeddedComputation(empty2.Build());
+
+  HloComputation::Builder entry("entry");
+  auto zero = entry.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<int32>(0)));
+  // The order of the call chain are crafted to test a specific pattern such
+  // that the third call instruction will be flattened before the second one
+  // (which makes the second call instruction dead before it is flattened).
+  entry.AddInstruction(
+      HloInstruction::CreateCall(r0s32, {zero}, empty_computation));
+  HloInstruction* call1 = entry.AddInstruction(
+      HloInstruction::CreateCall(r0s32, {zero}, empty2_computation));
+  entry.AddInstruction(
+      HloInstruction::CreateCall(r0s32, {call1}, empty_computation));
+  auto computation = module->AddEntryComputation(entry.Build());
+
+  CallInliner call_inliner;
+  TF_ASSERT_OK_AND_ASSIGN(bool mutated, call_inliner.Run(module.get()));
+  ASSERT_TRUE(mutated);
+
+  EXPECT_THAT(computation->root_instruction(), op::Constant());
+}
+
 TEST_F(CallInlinerTest, CallToOutfeedComputationIsInlined) {
   const Shape f32 = ShapeUtil::MakeShape(F32, {});
   auto module = CreateNewVerifiedModule();
diff --git a/tensorflow/lite/experimental/micro/micro_allocator.cc b/tensorflow/lite/experimental/micro/micro_allocator.cc
index a29923d9757..02c8920a1e6 100644
--- a/tensorflow/lite/experimental/micro/micro_allocator.cc
+++ b/tensorflow/lite/experimental/micro/micro_allocator.cc
@@ -20,6 +20,7 @@ limitations under the License.
 #include "tensorflow/lite/core/api/tensor_utils.h"
 #include "tensorflow/lite/experimental/micro/memory_helpers.h"
 #include "tensorflow/lite/experimental/micro/memory_planner/greedy_memory_planner.h"
+#include "tensorflow/lite/experimental/micro/simple_memory_allocator.h"
 
 namespace tflite {
 
@@ -89,15 +90,29 @@ TfLiteStatus MicroAllocator::RegisterPreallocatedInput(uint8_t* buffer,
 TfLiteStatus MicroAllocator::AllocateTensors() {
   const size_t tensors_size = tensors_->size();
 
-  // It would be better not to allocate this memory for the lifetime of the
-  // model, but we don't have a straightforward way to avoid it.
-  TensorInfo* tensor_info =
-      reinterpret_cast<TensorInfo*>(memory_allocator_.AllocateFromTail(
-          sizeof(TensorInfo) * tensors_size, sizeof(TensorInfo)));
-
   const flatbuffers::Vector<flatbuffers::Offset<Buffer>>* buffers =
       model_->buffers();
 
+  // Initialize runtime tensors.
+  for (size_t i = 0; i < tensors_size; ++i) {
+    auto* runtime_tensor = &context_->tensors[i];
+    auto* flatbuffer_tensor = tensors_->Get(i);
+
+    // Preallocated inputs have already been set up earlier, so skip them.
+    const bool is_preallocated_input = (runtime_tensor->data.raw != nullptr);
+    if (!is_preallocated_input) {
+      TF_LITE_ENSURE_STATUS(InitializeRuntimeTensor(*flatbuffer_tensor, buffers,
+                                                    error_reporter_,
+                                                    runtime_tensor, nullptr));
+    }
+  }
+
+  // tensor_info is only used in this function.
+  auto tmp_allocator = memory_allocator_.CreateChildAllocator();
+  TensorInfo* tensor_info =
+      reinterpret_cast<TensorInfo*>(tmp_allocator.AllocateFromTail(
+          sizeof(TensorInfo) * tensors_size, sizeof(TensorInfo)));
+
   // Set up the runtime data structures for all tensors.
   for (size_t i = 0; i < tensors_size; ++i) {
     TensorInfo* current = &tensor_info[i];
@@ -112,14 +127,6 @@ TfLiteStatus MicroAllocator::AllocateTensors() {
       current->last_used = -1;
     }
     current->needs_allocating = false;
-    // Preallocated inputs have already been set up earlier, so skip them.
-    const bool is_preallocated_input =
-        (current->runtime_tensor->data.raw != nullptr);
-    if (!is_preallocated_input) {
-      TF_LITE_ENSURE_STATUS(InitializeRuntimeTensor(
-          *current->flatbuffer_tensor, buffers, error_reporter_,
-          current->runtime_tensor, nullptr));
-    }
   }
 
   // First go through the inputs and figure out if they need to be allocated.
@@ -181,8 +188,9 @@ TfLiteStatus MicroAllocator::AllocateTensors() {
   uint8_t* aligned_arena = AlignPointerUp(arena_, kBufferAlignment);
   const size_t alignment_loss = (aligned_arena - arena_);
 
+  // Remaining arena size that memory planner can use for calculating offsets.
   int remaining_arena_size =
-      arena_size_ - (memory_allocator_.GetDataSize() + alignment_loss);
+      arena_size_ - (tmp_allocator.GetDataSize() + alignment_loss);
   GreedyMemoryPlanner planner(aligned_arena, remaining_arena_size);
 
   // Add the tensors to our allocation plan.
@@ -201,8 +209,12 @@ TfLiteStatus MicroAllocator::AllocateTensors() {
     }
   }
 
+  // Actual size available for placing tensors. This includes memory held by the
+  // tensor info array, which will be released.
+  int actual_available_arena_size =
+      arena_size_ - (memory_allocator_.GetDataSize() + alignment_loss);
   // Make sure we have enough room.
-  if (planner.GetMaximumMemorySize() > remaining_arena_size) {
+  if (planner.GetMaximumMemorySize() > actual_available_arena_size) {
     error_reporter_->Report(
         "Arena size is too small for activation buffers. Needed %d but only %d "
         "was available.",
diff --git a/tensorflow/lite/experimental/micro/simple_memory_allocator.cc b/tensorflow/lite/experimental/micro/simple_memory_allocator.cc
index a9e8c228fe1..5a0fca52442 100644
--- a/tensorflow/lite/experimental/micro/simple_memory_allocator.cc
+++ b/tensorflow/lite/experimental/micro/simple_memory_allocator.cc
@@ -22,6 +22,10 @@ namespace tflite {
 
 uint8_t* SimpleMemoryAllocator::AllocateFromTail(size_t size,
                                                  size_t alignment) {
+  if (has_child_allocator_) {
+    // TODO(wangtz): Add error reporting when the parent allocator is locked!
+    return nullptr;
+  }
   uint8_t* previous_free = (data_ + data_size_max_) - data_size_;
   uint8_t* current_data = previous_free - size;
   uint8_t* aligned_result = AlignPointerDown(current_data, alignment);
@@ -34,4 +38,21 @@ uint8_t* SimpleMemoryAllocator::AllocateFromTail(size_t size,
   return aligned_result;
 }
 
+SimpleMemoryAllocator SimpleMemoryAllocator::CreateChildAllocator() {
+  // Note that the parameterized constructor initializes data_size_ to 0 which
+  // is not what we expected.
+  SimpleMemoryAllocator child = *this;
+  child.parent_allocator_ = this;
+  // With C++ copy elision, &child should be available after return.
+  has_child_allocator_ = true;
+  return child;
+}
+
+SimpleMemoryAllocator::~SimpleMemoryAllocator() {
+  // Root allocator doesn't have a parent.
+  if (nullptr != parent_allocator_) {
+    parent_allocator_->has_child_allocator_ = false;
+  }
+}
+
 }  // namespace tflite
diff --git a/tensorflow/lite/experimental/micro/simple_memory_allocator.h b/tensorflow/lite/experimental/micro/simple_memory_allocator.h
index f44c012e0e2..a7243c827e8 100644
--- a/tensorflow/lite/experimental/micro/simple_memory_allocator.h
+++ b/tensorflow/lite/experimental/micro/simple_memory_allocator.h
@@ -28,7 +28,7 @@ namespace tflite {
 class SimpleMemoryAllocator {
  public:
   SimpleMemoryAllocator(uint8_t* buffer, size_t buffer_size)
-      : data_size_(0), data_size_max_(buffer_size), data_(buffer) {}
+      : data_size_max_(buffer_size), data_(buffer) {}
 
   // Allocates memory starting at the end of the arena (highest address and
   // moving downwards, so that tensor buffers can be allocated from the start
@@ -37,10 +37,25 @@ class SimpleMemoryAllocator {
 
   int GetDataSize() const { return data_size_; }
 
+  // Child allocator is something like a temporary allocator. Memory allocated
+  // by the child allocator will be freed once the child allocator is
+  // deallocated. Child allocator could be cascaded to have for example
+  // grandchild allocator. But at any given time, only the latest child
+  // allocator can be used. All its ancestors will be locked to avoid memory
+  // corruption. Locked means that the allocator can't allocate memory.
+  // WARNING: Parent allocator needs to live longer than the child allocator.
+  SimpleMemoryAllocator CreateChildAllocator();
+
+  // Unlocks parent allocator when the child allocator is deconstructed.
+  ~SimpleMemoryAllocator();
+
  private:
-  int data_size_;
+  int data_size_ = 0;
   size_t data_size_max_;
   uint8_t* data_;
+  SimpleMemoryAllocator* parent_allocator_ = nullptr;
+  // The allocator is locaked if it has a child.
+  bool has_child_allocator_ = false;
 };
 
 }  // namespace tflite
diff --git a/tensorflow/lite/experimental/micro/simple_memory_allocator_test.cc b/tensorflow/lite/experimental/micro/simple_memory_allocator_test.cc
index 58aa394878e..152a908f227 100644
--- a/tensorflow/lite/experimental/micro/simple_memory_allocator_test.cc
+++ b/tensorflow/lite/experimental/micro/simple_memory_allocator_test.cc
@@ -56,4 +56,32 @@ TF_LITE_MICRO_TEST(TestMultipleTooLarge) {
   TF_LITE_MICRO_EXPECT_EQ(nullptr, result);
 }
 
+TF_LITE_MICRO_TEST(TestChildAllocator) {
+  constexpr size_t arena_size = 1024;
+  uint8_t arena[arena_size];
+  tflite::SimpleMemoryAllocator allocator(arena, arena_size);
+
+  uint8_t* first = allocator.AllocateFromTail(16, 4);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, first);
+
+  {
+    auto child_allocator = allocator.CreateChildAllocator();
+    uint8_t* second = child_allocator.AllocateFromTail(16, 4);
+    TF_LITE_MICRO_EXPECT_EQ(second, first - 16);
+
+    auto grand_child_allocator = child_allocator.CreateChildAllocator();
+    uint8_t* third = grand_child_allocator.AllocateFromTail(15, 4);
+    TF_LITE_MICRO_EXPECT_EQ(third, second - 16);
+
+    // Parent allocator is locked.
+    TF_LITE_MICRO_EXPECT_EQ(nullptr, allocator.AllocateFromTail(16, 4));
+    TF_LITE_MICRO_EXPECT_EQ(nullptr, child_allocator.AllocateFromTail(16, 4));
+  }
+
+  // Parent allocator is unlocked.
+  auto child_allocator = allocator.CreateChildAllocator();
+  uint8_t* fourth = child_allocator.AllocateFromTail(16, 4);
+  TF_LITE_MICRO_EXPECT_EQ(fourth, first - 16);
+}
+
 TF_LITE_MICRO_TESTS_END
diff --git a/tensorflow/lite/kernels/add.cc b/tensorflow/lite/kernels/add.cc
index 17214d53ea1..6bc88c37fdc 100644
--- a/tensorflow/lite/kernels/add.cc
+++ b/tensorflow/lite/kernels/add.cc
@@ -212,7 +212,7 @@ void EvalAdd(TfLiteContext* context, TfLiteNode* node, TfLiteAddParams* params,
       }
     } else {
       if (need_broadcast) {
-        TF_LITE_ADD(optimized_ops, BroadcastAddFivefold, float);
+        TF_LITE_ADD(optimized_ops, BroadcastAddDispatch, float);
       } else {
         TF_LITE_ADD(optimized_ops, Add, float);
       }
@@ -256,11 +256,8 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
           TF_LITE_ADD(reference_integer_ops, Add, int8_t);
         }
       } else {
-        if (op_params.broadcast_category ==
-            BroadcastableOpCategory::kGenericBroadcast) {
-          TF_LITE_ADD(reference_integer_ops, BroadcastAdd4DSlow, int8_t);
-        } else if (need_broadcast) {
-          TF_LITE_ADD(optimized_integer_ops, BroadcastAddFivefold, int8_t);
+        if (need_broadcast) {
+          TF_LITE_ADD(optimized_integer_ops, BroadcastAddDispatch, int8_t);
         } else {
           TF_LITE_ADD(optimized_integer_ops, Add, int8_t);
         }
@@ -273,11 +270,8 @@ TfLiteStatus EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
           TF_LITE_ADD(reference_ops, Add, uint8_t);
         }
       } else {
-        if (op_params.broadcast_category ==
-            BroadcastableOpCategory::kGenericBroadcast) {
-          TF_LITE_ADD(optimized_ops, BroadcastAdd4DSlow, uint8_t);
-        } else if (need_broadcast) {
-          TF_LITE_ADD(optimized_ops, BroadcastAddFivefold, uint8_t);
+        if (need_broadcast) {
+          TF_LITE_ADD(optimized_ops, BroadcastAddDispatch, uint8_t);
         } else {
           TF_LITE_ADD(optimized_ops, Add, uint8_t);
         }
diff --git a/tensorflow/lite/kernels/add_test.cc b/tensorflow/lite/kernels/add_test.cc
index ef97b7785e1..267b80564c9 100644
--- a/tensorflow/lite/kernels/add_test.cc
+++ b/tensorflow/lite/kernels/add_test.cc
@@ -139,6 +139,68 @@ TEST(FloatAddOpModel, WithBroadcast) {
   }
 }
 
+TEST(FloatAddOpModel, WithBroadcastGeneric) {
+  std::vector<int> test_shape1 = {1, 3, 1};
+  std::vector<int> test_shape2 = {2, 1, 2};
+  FloatAddOpModel m({TensorType_FLOAT32, test_shape1},
+                    {TensorType_FLOAT32, test_shape2}, {TensorType_FLOAT32, {}},
+                    ActivationFunctionType_NONE);
+  m.PopulateTensor<float>(m.input1(), {0.1, 0.2, 0.3});
+  m.PopulateTensor<float>(m.input2(), {0.1, 0.2, 0.3, 0.4});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(),
+              ElementsAreArray(ArrayFloatNear({0.2, 0.3, 0.3, 0.4, 0.4, 0.5,
+                                               0.4, 0.5, 0.5, 0.6, 0.6, 0.7})));
+}
+
+TEST(FloatAddOpModel, MixedBroadcast) {
+  const std::vector<int> base_shape = {2, 3, 1, 2};
+  std::vector<std::vector<int>> test_shapes = {
+      {1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<float>> test_outputs = {
+      {-0.1f, 2.6f,  -0.7f, 2.8f, 0.7f,  3.2f, 1.1f,  0.8f, 0.5f,
+       1.0f,  1.9f,  1.4f,  1.0f, -0.8f, 0.4f, -0.6f, 1.8f, -0.2f,
+       1.4f,  3.1f,  0.8f,  3.3f, 2.2f,  3.7f, -1.4f, 0.3f, -2.0f,
+       0.5f,  -0.6f, 0.9f,  0.9f, -1.9f, 0.3f, -1.7f, 1.7f, -1.3f},
+      {-0.1f, 2.6f, 0.5f, 1.0f, 1.8f, -0.2f, 1.4f, 3.1f, -2.0f, 0.5f, 1.7f,
+       -1.3f},
+      {-0.1f, 2.5f,  0.0f, 2.6f, -0.7f, 1.9f, 1.1f,  0.7f, 1.2f,
+       0.8f,  0.5f,  0.1f, 1.0f, -0.9f, 1.1f, -0.8f, 0.4f, -1.5f,
+       1.7f,  3.3f,  2.2f, 3.8f, 2.1f,  3.7f, -1.1f, 0.5f, -0.6f,
+       1.0f,  -0.7f, 0.9f, 1.2f, -1.7f, 1.7f, -1.2f, 1.6f, -1.3f},
+      {-0.1f, 2.5f, 1.2f, 0.8f, 0.4f, -1.5f, 1.7f, 3.3f, -0.6f, 1.0f, 1.6f,
+       -1.3f}};
+  for (size_t i = 0; i < test_shapes.size(); ++i) {
+    FloatAddOpModel model_fixture(
+        {TensorType_FLOAT32, base_shape}, {TensorType_FLOAT32, test_shapes[i]},
+        {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE);
+    model_fixture.PopulateTensor<float>(
+        model_fixture.input1(), {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, 1.2f,
+                                 2.8f, -1.6f, 0.0f, 0.7f, -2.2f});
+    model_fixture.PopulateTensor<float>(model_fixture.input2(),
+                                        {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f});
+    model_fixture.Invoke();
+    EXPECT_THAT(model_fixture.GetOutput(),
+                ElementsAreArray(ArrayFloatNear(test_outputs[i], 0.0001f)))
+        << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < test_shapes.size(); ++i) {
+    FloatAddOpModel model_fixture(
+        {TensorType_FLOAT32, test_shapes[i]}, {TensorType_FLOAT32, base_shape},
+        {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE);
+    model_fixture.PopulateTensor<float>(model_fixture.input1(),
+                                        {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f});
+    model_fixture.PopulateTensor<float>(
+        model_fixture.input2(), {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, 1.2f,
+                                 2.8f, -1.6f, 0.0f, 0.7f, -2.2f});
+    model_fixture.Invoke();
+    EXPECT_THAT(model_fixture.GetOutput(),
+                ElementsAreArray(ArrayFloatNear(test_outputs[i], 0.0001f)))
+        << "With shape number " << i;
+  }
+}
+
 TEST(IntegerAddOpModel, NoActivation) {
   IntegerAddOpModel m({TensorType_INT32, {1, 2, 2, 1}},
                       {TensorType_INT32, {1, 2, 2, 1}}, {TensorType_INT32, {}},
@@ -435,5 +497,31 @@ TEST(QuantizedAddOpModel, QuantizedWithMixedBroadcastInt8) {
   QuantizedWithMixedBroadcast<TensorType_INT8, int8_t>();
 }
 
+template <enum TensorType tensor_type, typename integer_dtype>
+void QuantizedWithGenericBroadcast() {
+  float kQuantizedTolerance = GetTolerance(-1.0, 1.0);
+  std::vector<int> test_shape1 = {1, 3, 1};
+  std::vector<int> test_shape2 = {2, 1, 2};
+  QuantizedAddOpModel m({tensor_type, test_shape1, -1.0, 1.0},
+                        {tensor_type, test_shape2, -1.0, 1.0},
+                        {tensor_type, {}, -1.0, 1.0},
+                        ActivationFunctionType_NONE);
+  m.QuantizeAndPopulate<integer_dtype>(m.input1(), {0.1, 0.2, 0.3});
+  m.QuantizeAndPopulate<integer_dtype>(m.input2(), {0.1, -0.2, 0.3, -0.4});
+  m.Invoke();
+  EXPECT_THAT(m.GetDequantizedOutput<integer_dtype>(),
+              ElementsAreArray(ArrayFloatNear({0.2, -0.1, 0.3, 0., 0.4, 0.1,
+                                               0.4, -0.3, 0.5, -0.2, 0.6, -0.1},
+                                              kQuantizedTolerance)));
+}
+
+TEST(QuantizedAddOpModel, QuantizedWithGenericBroadcastUInt8) {
+  QuantizedWithGenericBroadcast<TensorType_UINT8, uint8_t>();
+}
+
+TEST(QuantizedAddOpModel, QuantizedWithGenericdBroadcastInt8) {
+  QuantizedWithGenericBroadcast<TensorType_INT8, int8_t>();
+}
+
 }  // namespace
 }  // namespace tflite
diff --git a/tensorflow/lite/kernels/internal/optimized/integer_ops/add.h b/tensorflow/lite/kernels/internal/optimized/integer_ops/add.h
index 253944ca3f1..2c4a86b5f15 100644
--- a/tensorflow/lite/kernels/internal/optimized/integer_ops/add.h
+++ b/tensorflow/lite/kernels/internal/optimized/integer_ops/add.h
@@ -18,6 +18,7 @@ limitations under the License.
 #include "profiling/instrumentation.h"
 #include "tensorflow/lite/kernels/internal/common.h"
 #include "tensorflow/lite/kernels/internal/optimized/cpu_check.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/add.h"
 #include "tensorflow/lite/kernels/internal/types.h"
 
 namespace tflite {
@@ -325,6 +326,23 @@ inline void BroadcastAddFivefold(const ArithmeticParams& unswitched_params,
   }
 }
 
+inline void BroadcastAddDispatch(const ArithmeticParams& params,
+                                 const RuntimeShape& input1_shape,
+                                 const int8* input1_data,
+                                 const RuntimeShape& input2_shape,
+                                 const int8* input2_data,
+                                 const RuntimeShape& output_shape,
+                                 int8* output_data) {
+  if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast) {
+    return reference_integer_ops::BroadcastAdd4DSlow(
+        params, input1_shape, input1_data, input2_shape, input2_data,
+        output_shape, output_data);
+  }
+
+  BroadcastAddFivefold(params, input1_shape, input1_data, input2_shape,
+                       input2_data, output_shape, output_data);
+}
+
 }  // namespace optimized_integer_ops
 }  // namespace tflite
 
diff --git a/tensorflow/lite/kernels/internal/optimized/integer_ops/mul.h b/tensorflow/lite/kernels/internal/optimized/integer_ops/mul.h
index 74b9d4b6a9e..add455bd44e 100644
--- a/tensorflow/lite/kernels/internal/optimized/integer_ops/mul.h
+++ b/tensorflow/lite/kernels/internal/optimized/integer_ops/mul.h
@@ -18,6 +18,7 @@ limitations under the License.
 #include "profiling/instrumentation.h"
 #include "tensorflow/lite/kernels/internal/common.h"
 #include "tensorflow/lite/kernels/internal/optimized/cpu_check.h"
+#include "tensorflow/lite/kernels/internal/reference/integer_ops/mul.h"
 #include "tensorflow/lite/kernels/internal/types.h"
 
 namespace tflite {
@@ -251,6 +252,23 @@ inline void BroadcastMulFivefold(const ArithmeticParams& unswitched_params,
   }
 }
 
+inline void BroadcastMulDispatch(const ArithmeticParams& params,
+                                 const RuntimeShape& input1_shape,
+                                 const int8* input1_data,
+                                 const RuntimeShape& input2_shape,
+                                 const int8* input2_data,
+                                 const RuntimeShape& output_shape,
+                                 int8* output_data) {
+  if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast) {
+    return reference_integer_ops::BroadcastMul4DSlow(
+        params, input1_shape, input1_data, input2_shape, input2_data,
+        output_shape, output_data);
+  }
+
+  BroadcastMulFivefold(params, input1_shape, input1_data, input2_shape,
+                       input2_data, output_shape, output_data);
+}
+
 }  // namespace optimized_integer_ops
 }  // namespace tflite
 
diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
index d08e4f35239..257bd31c702 100644
--- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
+++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h
@@ -2108,6 +2108,20 @@ inline void BroadcastAddFivefold(const ArithmeticParams& params,
   }
 }
 
+template <typename T>
+inline void BroadcastAddDispatch(
+    const ArithmeticParams& params, const RuntimeShape& input1_shape,
+    const T* input1_data, const RuntimeShape& input2_shape,
+    const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
+  if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast) {
+    return BroadcastAdd4DSlow(params, input1_shape, input1_data, input2_shape,
+                              input2_data, output_shape, output_data);
+  }
+
+  BroadcastAddFivefold(params, input1_shape, input1_data, input2_shape,
+                       input2_data, output_shape, output_data);
+}
+
 inline void MulElementwise(int size, const ArithmeticParams& params,
                            const float* input1_data, const float* input2_data,
                            float* output_data) {
@@ -2601,6 +2615,20 @@ inline void BroadcastMulFivefold(const ArithmeticParams& params,
   }
 }
 
+template <typename T>
+inline void BroadcastMulDispatch(
+    const ArithmeticParams& params, const RuntimeShape& input1_shape,
+    const T* input1_data, const RuntimeShape& input2_shape,
+    const T* input2_data, const RuntimeShape& output_shape, T* output_data) {
+  if (params.broadcast_category == BroadcastableOpCategory::kGenericBroadcast) {
+    return BroadcastMul4DSlow(params, input1_shape, input1_data, input2_shape,
+                              input2_data, output_shape, output_data);
+  }
+
+  BroadcastMulFivefold(params, input1_shape, input1_data, input2_shape,
+                       input2_data, output_shape, output_data);
+}
+
 // TODO(jiawen): We can implement BroadcastDiv on buffers of arbitrary
 // dimensionality if the runtime code does a single loop over one dimension
 // that handles broadcasting as the base case. The code generator would then
diff --git a/tensorflow/lite/kernels/mul.cc b/tensorflow/lite/kernels/mul.cc
index 9feb1794076..9e3240c8775 100644
--- a/tensorflow/lite/kernels/mul.cc
+++ b/tensorflow/lite/kernels/mul.cc
@@ -146,7 +146,7 @@ void EvalMul(TfLiteContext* context, TfLiteNode* node, TfLiteMulParams* params,
       }
     } else {
       if (need_broadcast) {
-        TF_LITE_MUL(optimized_ops, BroadcastMulFivefold, float);
+        TF_LITE_MUL(optimized_ops, BroadcastMulDispatch, float);
       } else {
         TF_LITE_MUL(optimized_ops, Mul, float);
       }
@@ -186,7 +186,7 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
         }
       } else {
         if (need_broadcast) {
-          TF_LITE_MUL(optimized_integer_ops, BroadcastMulFivefold, int8_t);
+          TF_LITE_MUL(optimized_integer_ops, BroadcastMulDispatch, int8_t);
         } else {
           TF_LITE_MUL(optimized_integer_ops, Mul, int8_t);
         }
@@ -201,7 +201,7 @@ TfLiteStatus EvalQuantized(TfLiteContext* context, TfLiteNode* node,
         }
       } else {
         if (need_broadcast) {
-          TF_LITE_MUL(optimized_ops, BroadcastMulFivefold, uint8_t);
+          TF_LITE_MUL(optimized_ops, BroadcastMulDispatch, uint8_t);
         } else {
           TF_LITE_MUL(optimized_ops, Mul, uint8_t);
         }
diff --git a/tensorflow/lite/kernels/mul_test.cc b/tensorflow/lite/kernels/mul_test.cc
index de1834afa42..1762c2501e7 100644
--- a/tensorflow/lite/kernels/mul_test.cc
+++ b/tensorflow/lite/kernels/mul_test.cc
@@ -159,6 +159,55 @@ TEST(FloatMulOpTest, WithBroadcast) {
   }
 }
 
+TEST(FloatMulOpTest, MixedBroadcast) {
+  const std::vector<int> base_shape = {2, 3, 1, 2};
+  std::vector<std::vector<int>> test_shapes = {
+      {1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<float>> test_outputs = {
+      {-0.06f, 0.69f,  0.12f,  1.15f, -0.30f, 2.07f,  0.18f,  0.15f, -0.36f,
+       0.25f,  0.90f,  0.45f,  0.16f, -0.33f, -0.32f, -0.55f, 0.80f, -0.99f,
+       0.24f,  0.84f,  -0.48f, 1.40f, 1.20f,  2.52f,  -0.32f, 0.00f, 0.64f,
+       0.00f,  -1.60f, 0.00f,  0.14f, -0.66f, -0.28f, -1.10f, 0.70f, -1.98f},
+      {-0.06f, 0.69f, -0.36f, 0.25f, 0.80f, -0.99f, 0.24f, 0.84f, 0.64f, 0.00f,
+       0.70f, -1.98f},
+      {-0.06f, 0.46f,  -0.09f, 0.69f, 0.12f,  -0.92f, 0.18f,  0.10f,  0.27f,
+       0.15f,  -0.36f, -0.20f, 0.16f, -0.22f, 0.24f,  -0.33f, -0.32f, 0.44f,
+       0.60f,  1.40f,  1.20f,  2.80f, 1.08f,  2.52f,  -0.80f, 0.00f,  -1.60f,
+       0.00f,  -1.44f, 0.00f,  0.35f, -1.10f, 0.70f,  -2.20f, 0.63f,  -1.98f},
+      {-0.06f, 0.46f, 0.27f, 0.15f, -0.32f, 0.44f, 0.60f, 1.40f, -1.60f, 0.00f,
+       0.63f, -1.98f}};
+  for (size_t i = 0; i < test_shapes.size(); ++i) {
+    FloatMulOpModel model_fixture(
+        {TensorType_FLOAT32, base_shape}, {TensorType_FLOAT32, test_shapes[i]},
+        {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE);
+    model_fixture.PopulateTensor<float>(
+        model_fixture.input1(), {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, 1.2f,
+                                 2.8f, -1.6f, 0.0f, 0.7f, -2.2f});
+    model_fixture.PopulateTensor<float>(model_fixture.input2(),
+                                        {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f});
+    model_fixture.Invoke();
+
+    EXPECT_THAT(model_fixture.GetOutput(),
+                ElementsAreArray(ArrayFloatNear(test_outputs[i], 0.0001f)))
+        << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < test_shapes.size(); ++i) {
+    FloatMulOpModel model_fixture(
+        {TensorType_FLOAT32, test_shapes[i]}, {TensorType_FLOAT32, base_shape},
+        {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE);
+    model_fixture.PopulateTensor<float>(model_fixture.input1(),
+                                        {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f});
+    model_fixture.PopulateTensor<float>(
+        model_fixture.input2(), {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, 1.2f,
+                                 2.8f, -1.6f, 0.0f, 0.7f, -2.2f});
+    model_fixture.Invoke();
+    EXPECT_THAT(model_fixture.GetOutput(),
+                ElementsAreArray(ArrayFloatNear(test_outputs[i], 0.0001f)))
+        << "With shape number " << i;
+  }
+}
+
 TEST(FloatMulOpTest, WithBroadcast2Elements) {
   std::vector<std::vector<int>> test_shapes = {
       {2, 2}, {2, 1, 2}, {1, 2, 2}, {1, 2, 1, 2}};
@@ -342,6 +391,60 @@ void WithBroadcast() {
   }
 }
 
+template <enum TensorType tensor_type, typename integer_dtype>
+void QuantizedWithMixedBroadcast() {
+  float kQuantizedTolerance = GetTolerance(-3.f, 3.f);
+  const std::vector<int> base_shape = {2, 3, 1, 2};
+  std::vector<std::vector<int>> test_shapes = {
+      {1, 1, 3, 2}, {1, 3, 1, 2}, {2, 1, 3, 1}, {2, 3, 1, 1}};
+  std::vector<std::vector<float>> test_outputs = {
+      {-0.06f, 0.69f,  0.12f,  1.15f, -0.30f, 2.07f,  0.18f,  0.15f, -0.36f,
+       0.25f,  0.90f,  0.45f,  0.16f, -0.33f, -0.32f, -0.55f, 0.80f, -0.99f,
+       0.24f,  0.84f,  -0.48f, 1.40f, 1.20f,  2.52f,  -0.32f, 0.00f, 0.64f,
+       0.00f,  -1.60f, 0.00f,  0.14f, -0.66f, -0.28f, -1.10f, 0.70f, -1.98f},
+      {-0.06f, 0.69f, -0.36f, 0.25f, 0.80f, -0.99f, 0.24f, 0.84f, 0.64f, 0.00f,
+       0.70f, -1.98f},
+      {-0.06f, 0.46f,  -0.09f, 0.69f, 0.12f,  -0.92f, 0.18f,  0.10f,  0.27f,
+       0.15f,  -0.36f, -0.20f, 0.16f, -0.22f, 0.24f,  -0.33f, -0.32f, 0.44f,
+       0.60f,  1.40f,  1.20f,  2.80f, 1.08f,  2.52f,  -0.80f, 0.00f,  -1.60f,
+       0.00f,  -1.44f, 0.00f,  0.35f, -1.10f, 0.70f,  -2.20f, 0.63f,  -1.98f},
+      {-0.06f, 0.46f, 0.27f, 0.15f, -0.32f, 0.44f, 0.60f, 1.40f, -1.60f, 0.00f,
+       0.63f, -1.98f}};
+  for (size_t i = 0; i < test_shapes.size(); ++i) {
+    QuantizedMulOpModel model_fixture({tensor_type, base_shape, -3.f, 3.f},
+                                      {tensor_type, test_shapes[i], -3.f, 3.f},
+                                      {tensor_type, {}, -3.f, 3.f},
+                                      ActivationFunctionType_NONE);
+    model_fixture.QuantizeAndPopulate<integer_dtype>(
+        model_fixture.input1(), {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, 1.2f,
+                                 2.8f, -1.6f, 0.0f, 0.7f, -2.2f});
+    model_fixture.QuantizeAndPopulate<integer_dtype>(
+        model_fixture.input2(), {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f});
+    model_fixture.Invoke();
+    EXPECT_THAT(
+        model_fixture.GetDequantizedOutput<integer_dtype>(),
+        ElementsAreArray(ArrayFloatNear(test_outputs[i], kQuantizedTolerance)))
+        << "With shape number " << i;
+  }
+  // Re-run with exchanged inputs.
+  for (size_t i = 0; i < test_shapes.size(); ++i) {
+    QuantizedMulOpModel model_fixture({tensor_type, test_shapes[i], -3.f, 3.f},
+                                      {tensor_type, base_shape, -3.f, 3.f},
+                                      {tensor_type, {}, -3.f, 3.f},
+                                      ActivationFunctionType_NONE);
+    model_fixture.QuantizeAndPopulate<integer_dtype>(
+        model_fixture.input1(), {0.2f, 0.3f, -0.4f, 0.5f, 1.0f, 0.9f});
+    model_fixture.QuantizeAndPopulate<integer_dtype>(
+        model_fixture.input2(), {-0.3f, 2.3f, 0.9f, 0.5f, 0.8f, -1.1f, 1.2f,
+                                 2.8f, -1.6f, 0.0f, 0.7f, -2.2f});
+    model_fixture.Invoke();
+    EXPECT_THAT(
+        model_fixture.GetDequantizedOutput<integer_dtype>(),
+        ElementsAreArray(ArrayFloatNear(test_outputs[i], kQuantizedTolerance)))
+        << "With shape number " << i;
+  }
+}
+
 TEST(QuantizedMulOpTest, WithBroadcastUInt8) {
   WithBroadcast<TensorType_UINT8, uint8_t>();
 }
@@ -350,5 +453,13 @@ TEST(QuantizedMulOpTest, WithBroadcastInt8) {
   WithBroadcast<TensorType_INT8, int8_t>();
 }
 
+TEST(QuantizedMulOpTest, QuantizedWithMixedBroadcastUInt8) {
+  QuantizedWithMixedBroadcast<TensorType_UINT8, uint8_t>();
+}
+
+TEST(QuantizedMulOpTest, QuantizedWithMixedBroadcastInt8) {
+  QuantizedWithMixedBroadcast<TensorType_INT8, int8_t>();
+}
+
 }  // namespace
 }  // namespace tflite
diff --git a/tensorflow/python/tpu/tensor_tracer.py b/tensorflow/python/tpu/tensor_tracer.py
index b9aec3f2e26..780d127084e 100644
--- a/tensorflow/python/tpu/tensor_tracer.py
+++ b/tensorflow/python/tpu/tensor_tracer.py
@@ -107,7 +107,7 @@ def op_priority(op_type):
     Integer value corresponding the priority of the op.
   """
   if op_type in ('Const', 'Shape', 'BroadcastGradientArgs', 'Range',
-                 'VariableShape', 'Fill', 'OneHot'):
+                 'VariableShape', 'Fill', 'OneHot', 'ShapeN'):
     # Lowest priority ops, e.g., constant ops accross different steps,
     # They will be traced only if trace_level>=7
     return 7
diff --git a/tensorflow/python/tpu/tpu.py b/tensorflow/python/tpu/tpu.py
index 9aa1fc5ef5b..f96aad69632 100644
--- a/tensorflow/python/tpu/tpu.py
+++ b/tensorflow/python/tpu/tpu.py
@@ -649,7 +649,15 @@ def outside_compilation(computation, *args, **kwargs):
   # we need to attach _xla_outside_compilation attribute directly because we are
   # not in TPUReplicateContext.
   if isinstance(graph, func_graph.FuncGraph):
-    tpu_context, _ = _enclosing_tpu_context_and_graph()
+    try:
+      tpu_context, _ = _enclosing_tpu_context_and_graph()
+    except ValueError:
+      logging.warning(
+          "Outside compilation attempted outside TPUReplicateContext "
+          "scope. As no enclosing TPUReplicateContext can be found, "
+          "returning the result of `computation` as is.")
+      return computation(*args, **kwargs)
+
     # pylint: disable=protected-access
     outside_compilation_name = str(tpu_context._outside_compilation_counter)
     tpu_context._outside_compilation_counter = (
diff --git a/tensorflow/tools/ci_build/release/common.sh b/tensorflow/tools/ci_build/release/common.sh
index 5fd121fdd7b..7b273fbfed1 100644
--- a/tensorflow/tools/ci_build/release/common.sh
+++ b/tensorflow/tools/ci_build/release/common.sh
@@ -78,8 +78,9 @@ function update_bazel_linux {
   popd
 
   PATH="/home/kbuilder/bin:$PATH"
-
   set_bazel_outdir
+  which bazel
+  bazel version
 }
 # LINT.ThenChange(
 #   //tensorflow_estimator/google/kokoro/common.sh)
@@ -99,6 +100,9 @@ function update_bazel_macos {
   run_with_retry "${BAZEL_COMMAND}"
   # Add new bazel installation to path
   PATH="/Users/kbuilder/bin:$PATH"
+  set_bazel_outdir
+  which bazel
+  bazel version
 }
 
 function install_pip2 {